Fix KDB algorithm argsort

2025-08-17 08:35:53 +00:00 · 2023-07-13 16:59:37 +02:00
parent 64f1500176
commit 99083ceede
11 changed files with 603 additions and 257 deletions
--- a/bayesclass/BaseClassifier.cc
+++ b/bayesclass/BaseClassifier.cc
@@ -0,0 +1,93 @@
+#include "BaseClassifier.h"
+
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+
+    BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0) {}
+    BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
+    {
+
+        dataset = torch::cat({ X, y.view({150, 1}) }, 1);
+        this->features = features;
+        this->className = className;
+        this->states = states;
+        checkFitParameters();
+        train();
+        return *this;
+    }
+    BaseClassifier& BaseClassifier::fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
+    {
+        this->X = X;
+        this->y = y;
+        return build(features, className, states);
+    }
+    BaseClassifier& BaseClassifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
+    {
+        this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
+        for (int i = 0; i < X.size(); ++i) {
+            this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
+        }
+        this->y = torch::tensor(y, kInt64);
+        return build(features, className, states);
+    }
+    void BaseClassifier::checkFitParameters()
+    {
+        auto sizes = X.sizes();
+        m = sizes[0];
+        n = sizes[1];
+        if (m != y.size(0)) {
+            throw invalid_argument("X and y must have the same number of samples");
+        }
+        if (n != features.size()) {
+            throw invalid_argument("X and features must have the same number of features");
+        }
+        if (states.find(className) == states.end()) {
+            throw invalid_argument("className not found in states");
+        }
+        for (auto feature : features) {
+            if (states.find(feature) == states.end()) {
+                throw invalid_argument("feature [" + feature + "] not found in states");
+            }
+        }
+    }
+    vector<vector<int>> tensorToVector(const torch::Tensor& tensor)
+    {
+        // convert mxn tensor to nxm vector
+        vector<vector<int>> result;
+        auto tensor_accessor = tensor.accessor<int, 2>();
+
+        // Iterate over columns and rows of the tensor
+        for (int j = 0; j < tensor.size(1); ++j) {
+            vector<int> column;
+            for (int i = 0; i < tensor.size(0); ++i) {
+                column.push_back(tensor_accessor[i][j]);
+            }
+            result.push_back(column);
+        }
+
+        return result;
+    }
+    Tensor BaseClassifier::predict(Tensor& X)
+    {
+        auto m_ = X.size(0);
+        auto n_ = X.size(1);
+        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
+        for (auto i = 0; i < n_; i++) {
+            auto temp = X.index({ "...", i });
+            Xd[i] = vector<int>(temp.data_ptr<int>(), temp.data_ptr<int>() + m_);
+        }
+        auto yp = model.predict(Xd);
+        auto ypred = torch::tensor(yp, torch::kInt64);
+        return ypred;
+    }
+    float BaseClassifier::score(Tensor& X, Tensor& y)
+    {
+        Tensor y_pred = predict(X);
+        return (y_pred == y).sum().item<float>() / y.size(0);
+    }
+    void BaseClassifier::show()
+    {
+        model.show();
+    }
+}
--- a/bayesclass/BaseClassifier.h
+++ b/bayesclass/BaseClassifier.h
@@ -0,0 +1,40 @@
+#ifndef CLASSIFIERS_H
+#include <torch/torch.h>
+#include "Network.h"
+using namespace std;
+using namespace torch;
+
+namespace bayesnet {
+    class BaseClassifier {
+    private:
+        BaseClassifier& build(vector<string>& features, string className, map<string, vector<int>>& states);
+    protected:
+        Network model;
+        int m, n; // m: number of samples, n: number of features
+        Tensor X;
+        Tensor y;
+        Tensor dataset;
+        vector<string> features;
+        string className;
+        map<string, vector<int>> states;
+        void checkFitParameters();
+        virtual void train() = 0;
+    public:
+        BaseClassifier(Network model);
+        Tensor& getX();
+        vector<string>& getFeatures();
+        string& getClassName();
+        BaseClassifier& fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states);
+        BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
+        Tensor predict(Tensor& X);
+        float score(Tensor& X, Tensor& y);
+        void show();
+    };
+
+}
+#endif
+
+
+
+
+
--- a/bayesclass/BayesNetwork.cpp
+++ b/bayesclass/BayesNetwork.cpp
--- a/bayesclass/BayesNetwork.pyx
+++ b/bayesclass/BayesNetwork.pyx
@@ -18,6 +18,7 @@ cdef extern from "Network.h" namespace "bayesnet":
        int getStates()
        string getClassName()
        string version()
+        void show()
        
 cdef class BayesNetwork:
    cdef Network *thisptr
@@ -52,6 +53,8 @@ cdef class BayesNetwork:
        return self.thisptr.getClassName().decode()
    def getClassNumStates(self):
        return self.thisptr.getClassNumStates()
+    def show(self):
+        return self.thisptr.show()
    def __reduce__(self):
        return (BayesNetwork, ())

--- a/bayesclass/KDB.cc
+++ b/bayesclass/KDB.cc
@@ -0,0 +1,110 @@
+#include "KDB.h"
+#include "Metrics.hpp"
+
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+    vector<int> argsort(vector<float>& nums)
+    {
+        int n = nums.size();
+        vector<int> indices(n);
+        iota(indices.begin(), indices.end(), 0);
+        sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
+        return indices;
+    }
+    KDB::KDB(int k, float theta) : BaseClassifier(Network()), k(k), theta(theta) {}
+    void KDB::train()
+    {
+        /*
+        1. For each feature Xi, compute mutual information, I(X;C),
+        where C is the class.
+        2. Compute class conditional mutual information I(Xi;XjIC), f or each
+        pair of features Xi and Xj, where i#j.
+        3. Let the used variable list, S, be empty.
+        4. Let the DAG network being constructed, BN, begin with a single
+        class node, C.
+        5. Repeat until S includes all domain features
+        5.1. Select feature Xmax which is not in S and has the largest value
+        I(Xmax;C).
+        5.2. Add a node to BN representing Xmax.
+        5.3. Add an arc from C to Xmax in BN.
+        5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
+        the highest value for I(Xmax;X,jC).
+        5.5. Add Xmax to S.
+        Compute the conditional probabilility infered by the structure of BN by
+        using counts from DB, and output BN.
+        */
+        // 1. For each feature Xi, compute mutual information, I(X;C),
+        // where C is the class.
+        cout << "Computing mutual information between features and class" << endl;
+        auto n_classes = states[className].size();
+        auto metrics = Metrics(dataset, features, className, n_classes);
+        vector <float> mi;
+        for (auto i = 0; i < features.size(); i++) {
+            Tensor firstFeature = X.index({ "...", i });
+            mi.push_back(metrics.mutualInformation(firstFeature, y));
+            cout << "Mutual information between " << features[i] << " and " << className << " is " << mi[i] << endl;
+        }
+        // 2. Compute class conditional mutual information I(Xi;XjIC), f or each
+        auto conditionalEdgeWeights = metrics.conditionalEdge();
+        cout << "Conditional edge weights" << endl;
+        cout << conditionalEdgeWeights << endl;
+        // 3. Let the used variable list, S, be empty.
+        vector<int> S;
+        // 4. Let the DAG network being constructed, BN, begin with a single
+        // class node, C.
+        model.addNode(className, states[className].size());
+        cout << "Adding node " << className << " to the network" << endl;
+        // 5. Repeat until S includes all domain features
+        // 5.1. Select feature Xmax which is not in S and has the largest value
+        // I(Xmax;C).
+        auto order = argsort(mi);
+        for (auto idx : order) {
+            cout << idx << " " << mi[idx] << endl;
+            // 5.2. Add a node to BN representing Xmax.
+            model.addNode(features[idx], states[features[idx]].size());
+            // 5.3. Add an arc from C to Xmax in BN.
+            model.addEdge(className, features[idx]);
+            // 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
+            // the highest value for I(Xmax;X,jC).
+            add_m_edges(idx, S, conditionalEdgeWeights);
+            // 5.5. Add Xmax to S.
+            S.push_back(idx);
+        }
+    }
+    void KDB::add_m_edges(int idx, vector<int>& S, Tensor& weights)
+    {
+        auto n_edges = min(k, static_cast<int>(S.size()));
+        auto cond_w = clone(weights);
+        cout << "Conditional edge weights cloned for idx " << idx << endl;
+        cout << cond_w << endl;
+        bool exit_cond = k == 0;
+        int num = 0;
+        while (!exit_cond) {
+            auto max_minfo = argmax(cond_w.index({ idx, "..." })).item<int>();
+            auto belongs = find(S.begin(), S.end(), max_minfo) != S.end();
+            if (belongs && cond_w.index({ idx, max_minfo }).item<float>() > theta) {
+                try {
+                    model.addEdge(features[max_minfo], features[idx]);
+                    num++;
+                }
+                catch (const invalid_argument& e) {
+                    // Loops are not allowed
+                }
+            }
+            cond_w.index_put_({ idx, max_minfo }, -1);
+            cout << "Conditional edge weights cloned for idx " << idx << " After -1" << endl;
+            cout << cond_w << endl;
+            cout << "cond_w.index({ idx, '...'})" << endl;
+            cout << cond_w.index({ idx, "..." }) << endl;
+            auto candidates_mask = cond_w.index({ idx, "..." }).gt(theta);
+            auto candidates = candidates_mask.nonzero();
+            cout << "Candidates mask" << endl;
+            cout << candidates_mask << endl;
+            cout << "Candidates: " << endl;
+            cout << candidates << endl;
+            cout << "Candidates size: " << candidates.size(0) << endl;
+            exit_cond = num == n_edges || candidates.size(0) == 0;
+        }
+    }
+}
--- a/bayesclass/KDB.h
+++ b/bayesclass/KDB.h
@@ -0,0 +1,18 @@
+#ifndef KDB_H
+#define KDB_H
+#include "BaseClassifier.h"
+namespace bayesnet {
+    using namespace std;
+    using namespace torch;
+    class KDB : public BaseClassifier {
+    private:
+        int k;
+        float theta;
+        void add_m_edges(int idx, vector<int>& S, Tensor& weights);
+    protected:
+        void train();
+    public:
+        KDB(int k, float theta=0.03);
+    };
+}
+#endif
--- a/bayesclass/Metrics.cc
+++ b/bayesclass/Metrics.cc
@@ -30,7 +30,7 @@ namespace bayesnet {
        }
        return result;
    }
-    vector<float> Metrics::conditionalEdgeWeights()
+    torch::Tensor Metrics::conditionalEdge()
    {
        auto result = vector<double>();
        auto source = vector<string>(features);
@@ -65,6 +65,11 @@ namespace bayesnet {
            matrix[x][y] = result[i];
            matrix[y][x] = result[i];
        }
+        return matrix;
+    }
+    vector<float> Metrics::conditionalEdgeWeights()
+    {
+        auto matrix = conditionalEdge();
        std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
        return v;
    }
@@ -89,7 +94,7 @@ namespace bayesnet {
            totalWeight += 1;
        }
        if (totalWeight == 0)
-            return 0;
+            throw invalid_argument("Total weight should not be zero");
        double entropyValue = 0;
        for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
            double p_f = featureCounts[value].item<double>() / totalWeight;
--- a/bayesclass/Metrics.hpp
+++ b/bayesclass/Metrics.hpp
@@ -14,11 +14,12 @@ namespace bayesnet {
        vector<pair<string, string>> doCombinations(const vector<string>&);
        double entropy(torch::Tensor&);
        double conditionalEntropy(torch::Tensor&, torch::Tensor&);
-        double mutualInformation(torch::Tensor&, torch::Tensor&);
    public:
+        double mutualInformation(torch::Tensor&, torch::Tensor&);
        Metrics(torch::Tensor&, vector<string>&, string&, int);
        Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
        vector<float> conditionalEdgeWeights();
+        torch::Tensor conditionalEdge();
    };
 }
 #endif
--- a/bayesclass/Network.cc
+++ b/bayesclass/Network.cc
@@ -245,5 +245,16 @@ namespace bayesnet {
        }
        return result;
    }
+    void Network::show()
+    {
+        // Draw the network
+        for (auto node : nodes) {
+            cout << node.first << " -> ";
+            for (auto child : node.second->getChildren()) {
+                cout << child->getName() << ", ";
+            }
+            cout << endl;
+        }
+    }

 }
--- a/bayesclass/Network.h
+++ b/bayesclass/Network.h
@@ -4,7 +4,6 @@
 #include <map>
 #include <vector>

-
 namespace bayesnet {
    class Network {
    private:
@@ -45,6 +44,7 @@ namespace bayesnet {
        torch::Tensor conditionalEdgeWeight();
        vector<vector<double>> predict_proba(const vector<vector<int>>&);
        double score(const vector<vector<int>>&, const vector<int>&);
+        void show();
        inline string version() { return "0.1.0"; }
    };
 }
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -423,7 +423,7 @@ class KDB(BayesBase):
        self.model_.addNode(self.class_name_, self.n_classes_)
        # 5. Repeat until S includes all domain features
        # 5.1 Select feature Xmax which is not in S and has the largest value
-        for idx in np.argsort(mutual):
+        for idx in np.argsort(-mutual):
            # 5.2 Add a node to BN representing Xmax.
            feature = self.feature_names_in_[idx]
            self.model_.addNode(feature, num_states[feature])