Refactor library

This commit is contained in:
2023-07-19 16:16:15 +02:00
parent 168cc368ee
commit 7f5ea1ab1e
11 changed files with 125 additions and 34462 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,37 +0,0 @@
# distutils: language = c++
# cython: language_level = 3
from libcpp.vector cimport vector
from libcpp.string cimport string
from libcpp.map cimport map
import numpy as np
cdef extern from "cpp/TAN.h" namespace "bayesnet":
cdef cppclass TAN:
TAN() except +
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
vector[int] predict(vector[vector[int]]&)
vector[vector[double]] predict_proba(vector[vector[int]]&)
float score(const vector[vector[int]]&, const vector[int]&)
vector[string] graph()
cdef class CTAN:
cdef TAN *thisptr
def __cinit__(self):
self.thisptr = new TAN()
def __dealloc__(self):
del self.thisptr
def fit(self, X, y, features, className, states):
X_ = [X[:, i] for i in range(X.shape[1])]
features_bytes = [x.encode() for x in features]
self.thisptr.fit(X_, y, features_bytes, className.encode(), states)
return self
def predict(self, X):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.predict(X_)
def score(self, X, y):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.score(X_, y)
def graph(self):
return self.thisptr.graph()
def __reduce__(self):
return (CTAN, ())

File diff suppressed because it is too large Load Diff

View File

@@ -2,6 +2,7 @@
# cython: language_level = 3
from libcpp.vector cimport vector
from libcpp.string cimport string
from libcpp.map cimport map
import numpy as np
cdef extern from "cpp/Network.h" namespace "bayesnet":
@@ -76,3 +77,101 @@ cdef class CMetrics:
def __reduce__(self):
return (CMetrics, ())
cdef extern from "cpp/TAN.h" namespace "bayesnet":
cdef cppclass CTAN:
CTAN() except +
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
vector[int] predict(vector[vector[int]]&)
vector[vector[double]] predict_proba(vector[vector[int]]&)
float score(const vector[vector[int]]&, const vector[int]&)
vector[string] graph()
cdef extern from "cpp/KDB.h" namespace "bayesnet":
cdef cppclass CKDB:
CKDB(int) except +
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
vector[int] predict(vector[vector[int]]&)
vector[vector[double]] predict_proba(vector[vector[int]]&)
float score(const vector[vector[int]]&, const vector[int]&)
vector[string] graph()
cdef extern from "cpp/AODE.h" namespace "bayesnet":
cdef cppclass CAODE:
CAODE() except +
void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
vector[int] predict(vector[vector[int]]&)
vector[vector[double]] predict_proba(vector[vector[int]]&)
float score(const vector[vector[int]]&, const vector[int]&)
vector[string] graph()
cdef class TAN:
cdef CTAN *thisptr
def __cinit__(self):
self.thisptr = new CTAN()
def __dealloc__(self):
del self.thisptr
def fit(self, X, y, features, className, states):
X_ = [X[:, i] for i in range(X.shape[1])]
features_bytes = [x.encode() for x in features]
states_dict = {key.encode(): value for key, value in states.items()}
states_dict[className.encode()] = np.unique(y).tolist()
self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
return self
def predict(self, X):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.predict(X_)
def score(self, X, y):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.score(X_, y)
def graph(self):
return self.thisptr.graph()
def __reduce__(self):
return (TAN, ())
cdef class CKDB:
cdef KDB *thisptr
def __cinit__(self, k):
self.thisptr = new KDB(k)
def __dealloc__(self):
del self.thisptr
def fit(self, X, y, features, className, states):
X_ = [X[:, i] for i in range(X.shape[1])]
features_bytes = [x.encode() for x in features]
states_dict = {key.encode(): value for key, value in states.items()}
states_dict[className.encode()] = np.unique(y).tolist()
self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
return self
def predict(self, X):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.predict(X_)
def score(self, X, y):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.score(X_, y)
def graph(self):
return self.thisptr.graph()
def __reduce__(self):
return (CKDB, ())
cdef class CAODE:
cdef AODE *thisptr
def __cinit__(self):
self.thisptr = new AODE()
def __dealloc__(self):
del self.thisptr
def fit(self, X, y, features, className, states):
X_ = [X[:, i] for i in range(X.shape[1])]
features_bytes = [x.encode() for x in features]
states_dict = {key.encode(): value for key, value in states.items()}
states_dict[className.encode()] = np.unique(y).tolist()
self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
return self
def predict(self, X):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.predict(X_)
def score(self, X, y):
X_ = [X[:, i] for i in range(X.shape[1])]
return self.thisptr.score(X_, y)
def graph(self):
return self.thisptr.graph()
def __reduce__(self):
return (CAODE, ())

View File

@@ -8,7 +8,6 @@ namespace bayesnet {
BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
{
dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1);
this->features = features;
this->className = className;
@@ -116,4 +115,13 @@ namespace bayesnet {
}
model.addNode(className, states[className].size());
}
int BaseClassifier::getNumberOfNodes()
{
// Features does not include class
return fitted ? model.getFeatures().size() + 1 : 0;
}
int BaseClassifier::getNumberOfEdges()
{
return fitted ? model.getEdges().size() : 0;
}
}

View File

@@ -30,6 +30,8 @@ namespace bayesnet {
virtual ~BaseClassifier() = default;
BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
void addNodes();
int getNumberOfNodes();
int getNumberOfEdges();
Tensor predict(Tensor& X);
vector<int> predict(vector<vector<int>>& X);
float score(Tensor& X, Tensor& y);

File diff suppressed because it is too large Load Diff

View File

@@ -275,5 +275,17 @@ namespace bayesnet {
output.push_back("}\n");
return output;
}
vector<pair<string, string>> Network::getEdges()
{
auto edges = vector<pair<string, string>>();
for (const auto& node : nodes) {
auto head = node.first;
for (const auto& child : node.second->getChildren()) {
auto tail = child->getName();
edges.push_back({ head, tail });
}
}
return edges;
}
}

View File

@@ -36,6 +36,7 @@ namespace bayesnet {
map<string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures();
int getStates();
vector<pair<string, string>> getEdges();
int getClassNumStates();
string getClassName();
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);

File diff suppressed because it is too large Load Diff

View File

@@ -33,23 +33,7 @@ setup(
"bayesclass/cpp/Network.cc",
"bayesclass/cpp/Node.cc",
"bayesclass/cpp/Metrics.cc",
"bayesclass/cpp/Mst.cc",
"bayesclass/cpp/BaseClassifier.cc",
"bayesclass/cpp/Ensemble.cc",
"bayesclass/cpp/TAN.cc",
"bayesclass/cpp/KDB.cc",
"bayesclass/cpp/SPODE.cc",
"bayesclass/cpp/AODE.cc",
],
include_dirs=include_paths(),
),
CppExtension(
name="bayesclass.BayesModels",
sources=[
"bayesclass/BayesModels.pyx",
"bayesclass/cpp/Network.cc",
"bayesclass/cpp/Node.cc",
"bayesclass/cpp/Metrics.cc",
"bayesclass/cpp/utils.cc",
"bayesclass/cpp/Mst.cc",
"bayesclass/cpp/BaseClassifier.cc",
"bayesclass/cpp/Ensemble.cc",