Refactor library

2025-08-20 18:15:57 +00:00 · 2023-07-19 16:16:15 +02:00
parent 168cc368ee
commit 7f5ea1ab1e
11 changed files with 125 additions and 34462 deletions
--- a/bayesclass/BayesModels.cpp
+++ b/bayesclass/BayesModels.cpp
--- a/bayesclass/BayesModels.pyx
+++ b/bayesclass/BayesModels.pyx
@@ -1,37 +0,0 @@
-# distutils: language = c++
-# cython: language_level = 3
-from libcpp.vector cimport vector
-from libcpp.string cimport string
-from libcpp.map cimport map
-import numpy as np
-
-cdef extern from "cpp/TAN.h" namespace "bayesnet":
-    cdef cppclass TAN:
-        TAN() except + 
-        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
-        vector[int] predict(vector[vector[int]]&)
-        vector[vector[double]] predict_proba(vector[vector[int]]&)
-        float score(const vector[vector[int]]&, const vector[int]&)
-        vector[string] graph()
-        
-cdef class CTAN:
-    cdef TAN *thisptr
-    def __cinit__(self):
-        self.thisptr = new TAN() 
-    def __dealloc__(self):
-        del self.thisptr
-    def fit(self, X, y, features, className, states):
-        X_ = [X[:, i] for i in range(X.shape[1])]
-        features_bytes = [x.encode() for x in features]
-        self.thisptr.fit(X_, y, features_bytes, className.encode(), states)
-        return self
-    def predict(self, X):
-        X_ = [X[:, i] for i in range(X.shape[1])]
-        return self.thisptr.predict(X_)
-    def score(self, X, y):
-        X_ = [X[:, i] for i in range(X.shape[1])]
-        return self.thisptr.score(X_, y)
-    def graph(self):
-        return self.thisptr.graph()
-    def __reduce__(self):
-        return (CTAN, ())
--- a/bayesclass/BayesNetwork.cpp
+++ b/bayesclass/BayesNetwork.cpp
--- a/bayesclass/BayesNetwork.pyx
+++ b/bayesclass/BayesNetwork.pyx
@@ -2,6 +2,7 @@
 # cython: language_level = 3
 from libcpp.vector cimport vector
 from libcpp.string cimport string
+from libcpp.map cimport map
 import numpy as np

 cdef extern from "cpp/Network.h" namespace "bayesnet":
@@ -76,3 +77,101 @@ cdef class CMetrics:
    def __reduce__(self):
        return (CMetrics, ())

+cdef extern from "cpp/TAN.h" namespace "bayesnet":
+    cdef cppclass CTAN:
+        CTAN() except + 
+        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
+        vector[int] predict(vector[vector[int]]&)
+        vector[vector[double]] predict_proba(vector[vector[int]]&)
+        float score(const vector[vector[int]]&, const vector[int]&)
+        vector[string] graph()
+
+cdef extern from "cpp/KDB.h" namespace "bayesnet":
+    cdef cppclass CKDB:
+        CKDB(int) except + 
+        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
+        vector[int] predict(vector[vector[int]]&)
+        vector[vector[double]] predict_proba(vector[vector[int]]&)
+        float score(const vector[vector[int]]&, const vector[int]&)
+        vector[string] graph()
+
+cdef extern from "cpp/AODE.h" namespace "bayesnet":
+    cdef cppclass CAODE:
+        CAODE() except + 
+        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string, map[string, vector[int]]&)
+        vector[int] predict(vector[vector[int]]&)
+        vector[vector[double]] predict_proba(vector[vector[int]]&)
+        float score(const vector[vector[int]]&, const vector[int]&)
+        vector[string] graph()
+        
+cdef class TAN:
+    cdef CTAN *thisptr
+    def __cinit__(self):
+        self.thisptr = new CTAN() 
+    def __dealloc__(self):
+        del self.thisptr
+    def fit(self, X, y, features, className, states):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        features_bytes = [x.encode() for x in features]
+        states_dict = {key.encode(): value for key, value in states.items()}
+        states_dict[className.encode()] = np.unique(y).tolist()
+        self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
+        return self
+    def predict(self, X):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.predict(X_)
+    def score(self, X, y):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.score(X_, y)
+    def graph(self):
+        return self.thisptr.graph()
+    def __reduce__(self):
+        return (TAN, ())
+
+cdef class CKDB:
+    cdef KDB *thisptr
+    def __cinit__(self, k):
+        self.thisptr = new KDB(k) 
+    def __dealloc__(self):
+        del self.thisptr
+    def fit(self, X, y, features, className, states):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        features_bytes = [x.encode() for x in features]
+        states_dict = {key.encode(): value for key, value in states.items()}
+        states_dict[className.encode()] = np.unique(y).tolist()
+        self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
+        return self
+    def predict(self, X):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.predict(X_)
+    def score(self, X, y):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.score(X_, y)
+    def graph(self):
+        return self.thisptr.graph()
+    def __reduce__(self):
+        return (CKDB, ())
+
+cdef class CAODE:
+    cdef AODE *thisptr
+    def __cinit__(self):
+        self.thisptr = new AODE() 
+    def __dealloc__(self):
+        del self.thisptr
+    def fit(self, X, y, features, className, states):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        features_bytes = [x.encode() for x in features]
+        states_dict = {key.encode(): value for key, value in states.items()}
+        states_dict[className.encode()] = np.unique(y).tolist()
+        self.thisptr.fit(X_, y, features_bytes, className.encode(), states_dict)
+        return self
+    def predict(self, X):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.predict(X_)
+    def score(self, X, y):
+        X_ = [X[:, i] for i in range(X.shape[1])]
+        return self.thisptr.score(X_, y)
+    def graph(self):
+        return self.thisptr.graph()
+    def __reduce__(self):
+        return (CAODE, ())
--- a/bayesclass/cpp/BaseClassifier.cc
+++ b/bayesclass/cpp/BaseClassifier.cc
@@ -8,7 +8,6 @@ namespace bayesnet {
    BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
    BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
    {
-
        dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1);
        this->features = features;
        this->className = className;
@@ -116,4 +115,13 @@ namespace bayesnet {
        }
        model.addNode(className, states[className].size());
    }
+    int BaseClassifier::getNumberOfNodes()
+    {
+        // Features does not include class
+        return fitted ? model.getFeatures().size() + 1 : 0;
+    }
+    int BaseClassifier::getNumberOfEdges()
+    {
+        return fitted ? model.getEdges().size() : 0;
+    }
 }
--- a/bayesclass/cpp/BaseClassifier.h
+++ b/bayesclass/cpp/BaseClassifier.h
@@ -30,6 +30,8 @@ namespace bayesnet {
        virtual ~BaseClassifier() = default;
        BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
        void addNodes();
+        int getNumberOfNodes();
+        int getNumberOfEdges();
        Tensor predict(Tensor& X);
        vector<int> predict(vector<vector<int>>& X);
        float score(Tensor& X, Tensor& y);
--- a/bayesclass/cpp/BayesNetwork.cpp
+++ b/bayesclass/cpp/BayesNetwork.cpp
--- a/bayesclass/cpp/Network.cc
+++ b/bayesclass/cpp/Network.cc
@@ -275,5 +275,17 @@ namespace bayesnet {
        output.push_back("}\n");
        return output;
    }
+    vector<pair<string, string>> Network::getEdges()
+    {
+        auto edges = vector<pair<string, string>>();
+        for (const auto& node : nodes) {
+            auto head = node.first;
+            for (const auto& child : node.second->getChildren()) {
+                auto tail = child->getName();
+                edges.push_back({ head, tail });
+            }
+        }
+        return edges;
+    }

 }
--- a/bayesclass/cpp/Network.h
+++ b/bayesclass/cpp/Network.h
@@ -36,6 +36,7 @@ namespace bayesnet {
        map<string, std::unique_ptr<Node>>& getNodes();
        vector<string> getFeatures();
        int getStates();
+        vector<pair<string, string>> getEdges();
        int getClassNumStates();
        string getClassName();
        void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
--- a/bayesclass/cpp/cSelectFeatures.cpp
+++ b/bayesclass/cpp/cSelectFeatures.cpp
--- a/setup.py
+++ b/setup.py
@@ -33,23 +33,7 @@ setup(
                "bayesclass/cpp/Network.cc",
                "bayesclass/cpp/Node.cc",
                "bayesclass/cpp/Metrics.cc",
-                "bayesclass/cpp/Mst.cc",
-                "bayesclass/cpp/BaseClassifier.cc",
-                "bayesclass/cpp/Ensemble.cc",
-                "bayesclass/cpp/TAN.cc",
-                "bayesclass/cpp/KDB.cc",
-                "bayesclass/cpp/SPODE.cc",
-                "bayesclass/cpp/AODE.cc",
-            ],
-            include_dirs=include_paths(),
-        ),
-        CppExtension(
-            name="bayesclass.BayesModels",
-            sources=[
-                "bayesclass/BayesModels.pyx",
-                "bayesclass/cpp/Network.cc",
-                "bayesclass/cpp/Node.cc",
-                "bayesclass/cpp/Metrics.cc",
+                "bayesclass/cpp/utils.cc",
                "bayesclass/cpp/Mst.cc",
                "bayesclass/cpp/BaseClassifier.cc",
                "bayesclass/cpp/Ensemble.cc",