Refactor library

This commit is contained in:
2023-07-19 16:16:15 +02:00
parent 168cc368ee
commit 7f5ea1ab1e
11 changed files with 125 additions and 34462 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,37 +0,0 @@
# distutils: language = c++
# cython: language_level = 3
from libcpp.vector cimport vector
from libcpp.string cimport string
from libcpp.map cimport map
import numpy as np
cdef extern from "cpp/TAN.h" namespace "bayesnet":
cdef cppclass TAN:
TAN() except +
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
vector[int] predict(vector[vector[int]]&)
vector[vector[double]] predict_proba(vector[vector[int]]&)
float score(const vector[vector[int]]&, const vector[int]&)
vector[string] graph()
cdef class CTAN:
cdef TAN *thisptr
def __cinit__(self):
self.thisptr = new TAN()
def __dealloc__(self):
del self.thisptr
def fit(self, X, y, features, className, states):
X_ = [X[:, i] for i in range(X.shape[1])]
features_bytes = [x.encode() for x in features]
self.thisptr.fit(X_, y, features_bytes, className.encode(), states)
return self
def predict(self, X):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.predict(X_)
def score(self, X, y):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.score(X_, y)
def graph(self):
return self.thisptr.graph()
def __reduce__(self):
return (CTAN, ())

File diff suppressed because it is too large Load Diff

View File

@@ -2,6 +2,7 @@
# cython: language_level = 3 # cython: language_level = 3
from libcpp.vector cimport vector from libcpp.vector cimport vector
from libcpp.string cimport string from libcpp.string cimport string
from libcpp.map cimport map
import numpy as np import numpy as np
cdef extern from "cpp/Network.h" namespace "bayesnet": cdef extern from "cpp/Network.h" namespace "bayesnet":
@@ -76,3 +77,101 @@ cdef class CMetrics:
def __reduce__(self): def __reduce__(self):
return (CMetrics, ()) return (CMetrics, ())
cdef extern from "cpp/TAN.h" namespace "bayesnet":
cdef cppclass CTAN:
CTAN() except +
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
vector[int] predict(vector[vector[int]]&)
vector[vector[double]] predict_proba(vector[vector[int]]&)
float score(const vector[vector[int]]&, const vector[int]&)
vector[string] graph()
cdef extern from "cpp/KDB.h" namespace "bayesnet":
cdef cppclass CKDB:
CKDB(int) except +
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
vector[int] predict(vector[vector[int]]&)
vector[vector[double]] predict_proba(vector[vector[int]]&)
float score(const vector[vector[int]]&, const vector[int]&)
vector[string] graph()
cdef extern from "cpp/AODE.h" namespace "bayesnet":
cdef cppclass CAODE:
CAODE() except +
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
vector[int] predict(vector[vector[int]]&)
vector[vector[double]] predict_proba(vector[vector[int]]&)
float score(const vector[vector[int]]&, const vector[int]&)
vector[string] graph()
cdef class TAN:
cdef CTAN *thisptr
def __cinit__(self):
self.thisptr = new CTAN()
def __dealloc__(self):
del self.thisptr
def fit(self, X, y, features, className, states):
X_ = [X[:, i] for i in range(X.shape[1])]
features_bytes = [x.encode() for x in features]
states_dict = {key.encode(): value for key, value in states.items()}
states_dict[className.encode()] = np.unique(y).tolist()
self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
return self
def predict(self, X):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.predict(X_)
def score(self, X, y):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.score(X_, y)
def graph(self):
return self.thisptr.graph()
def __reduce__(self):
return (TAN, ())
cdef class CKDB:
cdef KDB *thisptr
def __cinit__(self, k):
self.thisptr = new KDB(k)
def __dealloc__(self):
del self.thisptr
def fit(self, X, y, features, className, states):
X_ = [X[:, i] for i in range(X.shape[1])]
features_bytes = [x.encode() for x in features]
states_dict = {key.encode(): value for key, value in states.items()}
states_dict[className.encode()] = np.unique(y).tolist()
self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
return self
def predict(self, X):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.predict(X_)
def score(self, X, y):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.score(X_, y)
def graph(self):
return self.thisptr.graph()
def __reduce__(self):
return (CKDB, ())
cdef class CAODE:
cdef AODE *thisptr
def __cinit__(self):
self.thisptr = new AODE()
def __dealloc__(self):
del self.thisptr
def fit(self, X, y, features, className, states):
X_ = [X[:, i] for i in range(X.shape[1])]
features_bytes = [x.encode() for x in features]
states_dict = {key.encode(): value for key, value in states.items()}
states_dict[className.encode()] = np.unique(y).tolist()
self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
return self
def predict(self, X):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.predict(X_)
def score(self, X, y):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.score(X_, y)
def graph(self):
return self.thisptr.graph()
def __reduce__(self):
return (CAODE, ())

View File

@@ -8,7 +8,6 @@ namespace bayesnet {
BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states) BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
{ {
dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1); dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1);
this->features = features; this->features = features;
this->className = className; this->className = className;
@@ -116,4 +115,13 @@ namespace bayesnet {
} }
model.addNode(className, states[className].size()); model.addNode(className, states[className].size());
} }
int BaseClassifier::getNumberOfNodes()
{
// Features does not include class
return fitted ? model.getFeatures().size() + 1 : 0;
}
int BaseClassifier::getNumberOfEdges()
{
return fitted ? model.getEdges().size() : 0;
}
} }

View File

@@ -30,6 +30,8 @@ namespace bayesnet {
virtual ~BaseClassifier() = default; virtual ~BaseClassifier() = default;
BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states); BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
void addNodes(); void addNodes();
int getNumberOfNodes();
int getNumberOfEdges();
Tensor predict(Tensor& X); Tensor predict(Tensor& X);
vector<int> predict(vector<vector<int>>& X); vector<int> predict(vector<vector<int>>& X);
float score(Tensor& X, Tensor& y); float score(Tensor& X, Tensor& y);

File diff suppressed because it is too large Load Diff

View File

@@ -275,5 +275,17 @@ namespace bayesnet {
output.push_back("}\n"); output.push_back("}\n");
return output; return output;
} }
vector<pair<string, string>> Network::getEdges()
{
auto edges = vector<pair<string, string>>();
for (const auto& node : nodes) {
auto head = node.first;
for (const auto& child : node.second->getChildren()) {
auto tail = child->getName();
edges.push_back({ head, tail });
}
}
return edges;
}
} }

View File

@@ -36,6 +36,7 @@ namespace bayesnet {
map<string, std::unique_ptr<Node>>& getNodes(); map<string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures(); vector<string> getFeatures();
int getStates(); int getStates();
vector<pair<string, string>> getEdges();
int getClassNumStates(); int getClassNumStates();
string getClassName(); string getClassName();
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&); void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);

File diff suppressed because it is too large Load Diff

View File

@@ -33,23 +33,7 @@ setup(
"bayesclass/cpp/Network.cc", "bayesclass/cpp/Network.cc",
"bayesclass/cpp/Node.cc", "bayesclass/cpp/Node.cc",
"bayesclass/cpp/Metrics.cc", "bayesclass/cpp/Metrics.cc",
"bayesclass/cpp/Mst.cc", "bayesclass/cpp/utils.cc",
"bayesclass/cpp/BaseClassifier.cc",
"bayesclass/cpp/Ensemble.cc",
"bayesclass/cpp/TAN.cc",
"bayesclass/cpp/KDB.cc",
"bayesclass/cpp/SPODE.cc",
"bayesclass/cpp/AODE.cc",
],
include_dirs=include_paths(),
),
CppExtension(
name="bayesclass.BayesModels",
sources=[
"bayesclass/BayesModels.pyx",
"bayesclass/cpp/Network.cc",
"bayesclass/cpp/Node.cc",
"bayesclass/cpp/Metrics.cc",
"bayesclass/cpp/Mst.cc", "bayesclass/cpp/Mst.cc",
"bayesclass/cpp/BaseClassifier.cc", "bayesclass/cpp/BaseClassifier.cc",
"bayesclass/cpp/Ensemble.cc", "bayesclass/cpp/Ensemble.cc",