Fix some small mistakes

Fix KDB algorithm argsort
Refactor cpp library methods
2025-08-18 17:15:53 +00:00 · 2023-07-13 17:11:08 +02:00 · 2023-07-13 16:59:37 +02:00 · 2023-07-12 12:59:02 +02:00 · 2023-07-12 12:07:01 +02:00 · 2023-07-12 11:20:05 +02:00
41 changed files with 15108 additions and 233 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,7 @@
 cmake_minimum_required(VERSION 3.20)
 project(feature)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_BUILD_TYPE Debug)
 add_executable(feature bayesclass/cpp/FeatureSelect.cpp)
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,5 @@
 include README.md LICENSE
 include bayesclass/FeatureSelect.h
 include bayesclass/Node.h
 include bayesclass/Network.h
 include bayesclass/Metrics.hpp
--- a/10
+++ b/10
@@ -16,6 +16,10 @@ lint:  ## Lint and static-check
 	flake8 bayesclass
 	mypy bayesclass
 feature: ## compile FeatureSelect
 	cmake -B build feature
 push:  ## Push code with tags
 	git push && git push --tags
@@ -37,6 +41,12 @@ doc-clean:  ## Update documentation
 audit: ## Audit pip
 	pip-audit
 version:
 	@echo "Current Python version .....: $(shell python --version)"
 	@echo "Current Bayesclass version .: $(shell python -c "from bayesclass import _version; print(_version.__version__)")"
 	@echo "Installed Bayesclass version: $(shell pip show bayesclass | grep Version | cut -d' ' -f2)"
 	@echo "Installed pgmpy version ....: $(shell pip show pgmpy | grep Version | cut -d' ' -f2)"
 help: ## Show help message
 	@IFS=$$'\n' ; \
 	help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
--- a/bayesclass/BaseClassifier.cc
+++ b/bayesclass/BaseClassifier.cc
@@ -0,0 +1,93 @@
 #include "BaseClassifier.h"
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    BaseClassifier::BaseClassifier(Network model) : model(model), m(0), n(0) {}
    BaseClassifier& BaseClassifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
    {
        dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1);
        this->features = features;
        this->className = className;
        this->states = states;
        checkFitParameters();
        train();
        return *this;
    }
    BaseClassifier& BaseClassifier::fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
    {
        this->X = X;
        this->y = y;
        return build(features, className, states);
    }
    BaseClassifier& BaseClassifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
    {
        this->X = torch::zeros({ static_cast<int64_t>(X[0].size()), static_cast<int64_t>(X.size()) }, kInt64);
        for (int i = 0; i < X.size(); ++i) {
            this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt64));
        }
        this->y = torch::tensor(y, kInt64);
        return build(features, className, states);
    }
    void BaseClassifier::checkFitParameters()
    {
        auto sizes = X.sizes();
        m = sizes[0];
        n = sizes[1];
        if (m != y.size(0)) {
            throw invalid_argument("X and y must have the same number of samples");
        }
        if (n != features.size()) {
            throw invalid_argument("X and features must have the same number of features");
        }
        if (states.find(className) == states.end()) {
            throw invalid_argument("className not found in states");
        }
        for (auto feature : features) {
            if (states.find(feature) == states.end()) {
                throw invalid_argument("feature [" + feature + "] not found in states");
            }
        }
    }
    vector<vector<int>> tensorToVector(const torch::Tensor& tensor)
    {
        // convert mxn tensor to nxm vector
        vector<vector<int>> result;
        auto tensor_accessor = tensor.accessor<int, 2>();
        // Iterate over columns and rows of the tensor
        for (int j = 0; j < tensor.size(1); ++j) {
            vector<int> column;
            for (int i = 0; i < tensor.size(0); ++i) {
                column.push_back(tensor_accessor[i][j]);
            }
            result.push_back(column);
        }
        return result;
    }
    Tensor BaseClassifier::predict(Tensor& X)
    {
        auto m_ = X.size(0);
        auto n_ = X.size(1);
        vector<vector<int>> Xd(n_, vector<int>(m_, 0));
        for (auto i = 0; i < n_; i++) {
            auto temp = X.index({ "...", i });
            Xd[i] = vector<int>(temp.data_ptr<int>(), temp.data_ptr<int>() + m_);
        }
        auto yp = model.predict(Xd);
        auto ypred = torch::tensor(yp, torch::kInt64);
        return ypred;
    }
    float BaseClassifier::score(Tensor& X, Tensor& y)
    {
        Tensor y_pred = predict(X);
        return (y_pred == y).sum().item<float>() / y.size(0);
    }
    vector<string> BaseClassifier::show()
    {
        return model.show();
    }
 }
--- a/bayesclass/BaseClassifier.h
+++ b/bayesclass/BaseClassifier.h
@@ -0,0 +1,39 @@
 #ifndef CLASSIFIERS_H
 #include <torch/torch.h>
 #include "Network.h"
 using namespace std;
 using namespace torch;
 namespace bayesnet {
    class BaseClassifier {
    private:
        BaseClassifier& build(vector<string>& features, string className, map<string, vector<int>>& states);
    protected:
        Network model;
        int m, n; // m: number of samples, n: number of features
        Tensor X;
        Tensor y;
        Tensor dataset;
        vector<string> features;
        string className;
        map<string, vector<int>> states;
        void checkFitParameters();
        virtual void train() = 0;
    public:
        BaseClassifier(Network model);
        Tensor& getX();
        vector<string>& getFeatures();
        string& getClassName();
        BaseClassifier& fit(Tensor& X, Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states);
        BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states);
        Tensor predict(Tensor& X);
        float score(Tensor& X, Tensor& y);
        vector<string> show();
    };
 }
 #endif
--- a/bayesclass/BayesNetwork.cpp
+++ b/bayesclass/BayesNetwork.cpp
--- a/bayesclass/BayesNetwork.pyx
+++ b/bayesclass/BayesNetwork.pyx
@@ -0,0 +1,78 @@
 # distutils: language = c++
 # cython: language_level = 3
 from libcpp.vector cimport vector
 from libcpp.string cimport string
 import numpy as np
 cdef extern from "Network.h" namespace "bayesnet":
    cdef cppclass Network:
        Network(float, float) except + 
        void fit(vector[vector[int]]&, vector[int]&, vector[string]&, string)
        vector[int] predict(vector[vector[int]]&)
        vector[vector[double]] predict_proba(vector[vector[int]]&)
        float score(const vector[vector[int]]&, const vector[int]&)
        void addNode(string, int)
        void addEdge(string, string) except +
        vector[string] getFeatures()
        int getClassNumStates()
        int getStates()
        string getClassName()
        string version()
        void show()
 cdef class BayesNetwork:
    cdef Network *thisptr
    def __cinit__(self, maxThreads=0.8, laplaceSmooth=1.0):
        self.thisptr = new Network(maxThreads, laplaceSmooth) 
    def __dealloc__(self):
        del self.thisptr
    def fit(self, X, y, features, className):
        X_ = [X[:, i] for i in range(X.shape[1])]
        features_bytes = [x.encode() for x in features]
        self.thisptr.fit(X_, y, features_bytes, className.encode())
        return self
    def predict(self, X):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.predict(X_)
    def predict_proba(self, X):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.predict_proba(X_)
    def score(self, X, y):
        X_ = [X[:, i] for i in range(X.shape[1])]
        return self.thisptr.score(X_, y)
    def addNode(self, name, states):
        self.thisptr.addNode(str.encode(name), states)
    def addEdge(self, source, destination):
        self.thisptr.addEdge(str.encode(source), str.encode(destination))
    def getFeatures(self):
        res = self.thisptr.getFeatures()
        return [x.decode() for x in res]
    def getStates(self):
        return self.thisptr.getStates()
    def getClassName(self):
        return self.thisptr.getClassName().decode()
    def getClassNumStates(self):
        return self.thisptr.getClassNumStates()
    def show(self):
        return self.thisptr.show()
    def __reduce__(self):
        return (BayesNetwork, ())
 cdef extern from "Metrics.hpp" namespace "bayesnet":
    cdef cppclass Metrics:
        Metrics(vector[vector[int]], vector[int], vector[string]&, string&, int) except +
        vector[float] conditionalEdgeWeights()
 cdef class CMetrics:
    cdef Metrics *thisptr
    def __cinit__(self, X, y, features, className, classStates):
        X_ = [X[:, i] for i in range(X.shape[1])]
        features_bytes = [x.encode() for x in features]
        self.thisptr = new Metrics(X_, y, features_bytes, className.encode(), classStates)
    def __dealloc__(self):
        del self.thisptr
    def conditionalEdgeWeights(self, n_vars):
        return np.reshape(self.thisptr.conditionalEdgeWeights(), (n_vars, n_vars))
    def __reduce__(self):
        return (CMetrics, ())
--- a/bayesclass/FeatureSelect.cpp
+++ b/bayesclass/FeatureSelect.cpp
@@ -0,0 +1,118 @@
 #include "FeatureSelect.h"
 namespace features {
    SelectKBestWeighted::SelectKBestWeighted(samples_t& samples, labels_t& labels, weights_t& weights, int k, bool nat)
        : samples(samples), labels(labels), weights(weights), k(k), nat(nat)
    {
        if (samples.size() == 0 || samples[0].size() == 0)
            throw invalid_argument("features must be a non-empty matrix");
        if (samples.size() != labels.size())
            throw invalid_argument("number of samples and labels must be equal");
        if (samples.size() != weights.size())
            throw invalid_argument("number of samples and weights must be equal");
        if (k < 1 || k >  static_cast<int>(samples[0].size()))
            throw invalid_argument("k must be between 1 and number of features");
        numFeatures = 0;
        numClasses = 0;
        numSamples = 0;
        fitted = false;
    }
    void SelectKBestWeighted::SelectKBestWeighted::fit()
    {
        auto labelsCopy = labels;
        numFeatures = samples[0].size();
        numSamples = samples.size();
        // compute number of classes
        sort(labelsCopy.begin(), labelsCopy.end());
        auto last = unique(labelsCopy.begin(), labelsCopy.end());
        labelsCopy.erase(last, labelsCopy.end());
        numClasses = labelsCopy.size();
        // compute scores
        scores.reserve(numFeatures);
        for (int i = 0; i < numFeatures; ++i) {
            scores.push_back(MutualInformation(i));
            features.push_back(i);
        }
        // sort & reduce scores and features
        sort(features.begin(), features.end(), [&](int i, int j)
            { return scores[i] > scores[j]; });
        sort(scores.begin(), scores.end(), greater<precision_t>());
        features.resize(k);
        scores.resize(k);
        fitted = true;
    }
    precision_t SelectKBestWeighted::entropyLabel()
    {
        return entropy(labels);
    }
    precision_t SelectKBestWeighted::entropy(const sample_t& data)
    {
        precision_t ventropy = 0, totalWeight = 0;
        score_t counts(numClasses + 1, 0);
        for (auto i = 0; i < static_cast<int>(data.size()); ++i) {
            counts[data[i]] += weights[i];
            totalWeight += weights[i];
        }
        for (auto count : counts) {
            precision_t p = count / totalWeight;
            if (p > 0) {
                if (nat) {
                    ventropy -= p * log(p);
                } else {
                    ventropy -= p * log2(p);
                }
            }
        }
        return ventropy;
    }
    // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
    precision_t SelectKBestWeighted::conditionalEntropy(const int feature)
    {
        unordered_map<value_t, precision_t> featureCounts;
        unordered_map<value_t, unordered_map<value_t, precision_t>> jointCounts;
        featureCounts.clear();
        jointCounts.clear();
        precision_t totalWeight = 0;
        for (auto i = 0; i < numSamples; i++) {
            featureCounts[samples[i][feature]] += weights[i];
            jointCounts[samples[i][feature]][labels[i]] += weights[i];
            totalWeight += weights[i];
        }
        if (totalWeight == 0)
            throw invalid_argument("Total weight should not be zero");
        precision_t entropy = 0;
        for (auto& [feat, count] : featureCounts) {
            auto p_f = count / totalWeight;
            precision_t entropy_f = 0;
            for (auto& [label, jointCount] : jointCounts[feat]) {
                auto p_l_f = jointCount / count;
                if (p_l_f > 0) {
                    if (nat) {
                        entropy_f -= p_l_f * log(p_l_f);
                    } else {
                        entropy_f -= p_l_f * log2(p_l_f);
                    }
                }
            }
            entropy += p_f * entropy_f;
        }
        return entropy;
    }
    // I(X;Y) = H(Y) - H(Y|X)
    precision_t SelectKBestWeighted::MutualInformation(const int i)
    {
        return entropyLabel() - conditionalEntropy(i);
    }
    score_t SelectKBestWeighted::getScores() const
    {
        if (!fitted)
            throw logic_error("score not fitted");
        return scores;
    }
    //Return the indices of the selected features
    labels_t SelectKBestWeighted::getFeatures() const
    {
        if (!fitted)
            throw logic_error("score not fitted");
        return features;
    }
 }
--- a/bayesclass/FeatureSelect.h
+++ b/bayesclass/FeatureSelect.h
@@ -0,0 +1,38 @@
 #ifndef SELECT_K_BEST_WEIGHTED_H
 #define SELECT_K_BEST_WEIGHTED_H
 #include <map>
 #include <vector>
 #include <string>
 using namespace std;
 namespace features {
    typedef float precision_t;
    typedef int value_t;
    typedef vector<value_t> sample_t;
    typedef vector<sample_t> samples_t;
    typedef vector<value_t> labels_t;
    typedef vector<precision_t> score_t, weights_t;
    class SelectKBestWeighted {
    private:
        const samples_t samples;
        const labels_t labels;
        const weights_t weights;
        const int k;
        bool nat; // use natural log or log2
        int numFeatures, numClasses, numSamples;
        bool fitted;
        score_t scores; // scores of the features
        labels_t features; // indices of the selected features
        precision_t entropyLabel();
        precision_t entropy(const sample_t&);
        precision_t conditionalEntropy(const int);
        precision_t MutualInformation(const int);
    public:
        SelectKBestWeighted(samples_t&, labels_t&, weights_t&, int, bool);
        void fit();
        score_t getScores() const;
        labels_t getFeatures() const; //Return the indices of the selected features
        static inline string version() { return "0.1.0"; };
    };
 }
 #endif
--- a/bayesclass/KDB.cc
+++ b/bayesclass/KDB.cc
@@ -0,0 +1,110 @@
 #include "KDB.h"
 #include "Metrics.hpp"
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    vector<int> argsort(vector<float>& nums)
    {
        int n = nums.size();
        vector<int> indices(n);
        iota(indices.begin(), indices.end(), 0);
        sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
        return indices;
    }
    KDB::KDB(int k, float theta) : BaseClassifier(Network()), k(k), theta(theta) {}
    void KDB::train()
    {
        /*
        1. For each feature Xi, compute mutual information, I(X;C),
        where C is the class.
        2. Compute class conditional mutual information I(Xi;XjIC), f or each
        pair of features Xi and Xj, where i#j.
        3. Let the used variable list, S, be empty.
        4. Let the DAG network being constructed, BN, begin with a single
        class node, C.
        5. Repeat until S includes all domain features
        5.1. Select feature Xmax which is not in S and has the largest value
        I(Xmax;C).
        5.2. Add a node to BN representing Xmax.
        5.3. Add an arc from C to Xmax in BN.
        5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
        the highest value for I(Xmax;X,jC).
        5.5. Add Xmax to S.
        Compute the conditional probabilility infered by the structure of BN by
        using counts from DB, and output BN.
        */
        // 1. For each feature Xi, compute mutual information, I(X;C),
        // where C is the class.
        cout << "Computing mutual information between features and class" << endl;
        auto n_classes = states[className].size();
        auto metrics = Metrics(dataset, features, className, n_classes);
        vector <float> mi;
        for (auto i = 0; i < features.size(); i++) {
            Tensor firstFeature = X.index({ "...", i });
            mi.push_back(metrics.mutualInformation(firstFeature, y));
            cout << "Mutual information between " << features[i] << " and " << className << " is " << mi[i] << endl;
        }
        // 2. Compute class conditional mutual information I(Xi;XjIC), f or each
        auto conditionalEdgeWeights = metrics.conditionalEdge();
        cout << "Conditional edge weights" << endl;
        cout << conditionalEdgeWeights << endl;
        // 3. Let the used variable list, S, be empty.
        vector<int> S;
        // 4. Let the DAG network being constructed, BN, begin with a single
        // class node, C.
        model.addNode(className, states[className].size());
        cout << "Adding node " << className << " to the network" << endl;
        // 5. Repeat until S includes all domain features
        // 5.1. Select feature Xmax which is not in S and has the largest value
        // I(Xmax;C).
        auto order = argsort(mi);
        for (auto idx : order) {
            cout << idx << " " << mi[idx] << endl;
            // 5.2. Add a node to BN representing Xmax.
            model.addNode(features[idx], states[features[idx]].size());
            // 5.3. Add an arc from C to Xmax in BN.
            model.addEdge(className, features[idx]);
            // 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
            // the highest value for I(Xmax;X,jC).
            add_m_edges(idx, S, conditionalEdgeWeights);
            // 5.5. Add Xmax to S.
            S.push_back(idx);
        }
    }
    void KDB::add_m_edges(int idx, vector<int>& S, Tensor& weights)
    {
        auto n_edges = min(k, static_cast<int>(S.size()));
        auto cond_w = clone(weights);
        cout << "Conditional edge weights cloned for idx " << idx << endl;
        cout << cond_w << endl;
        bool exit_cond = k == 0;
        int num = 0;
        while (!exit_cond) {
            auto max_minfo = argmax(cond_w.index({ idx, "..." })).item<int>();
            auto belongs = find(S.begin(), S.end(), max_minfo) != S.end();
            if (belongs && cond_w.index({ idx, max_minfo }).item<float>() > theta) {
                try {
                    model.addEdge(features[max_minfo], features[idx]);
                    num++;
                }
                catch (const invalid_argument& e) {
                    // Loops are not allowed
                }
            }
            cond_w.index_put_({ idx, max_minfo }, -1);
            cout << "Conditional edge weights cloned for idx " << idx << " After -1" << endl;
            cout << cond_w << endl;
            cout << "cond_w.index({ idx, '...'})" << endl;
            cout << cond_w.index({ idx, "..." }) << endl;
            auto candidates_mask = cond_w.index({ idx, "..." }).gt(theta);
            auto candidates = candidates_mask.nonzero();
            cout << "Candidates mask" << endl;
            cout << candidates_mask << endl;
            cout << "Candidates: " << endl;
            cout << candidates << endl;
            cout << "Candidates size: " << candidates.size(0) << endl;
            exit_cond = num == n_edges || candidates.size(0) == 0;
        }
    }
 }
--- a/bayesclass/KDB.h
+++ b/bayesclass/KDB.h
@@ -0,0 +1,18 @@
 #ifndef KDB_H
 #define KDB_H
 #include "BaseClassifier.h"
 namespace bayesnet {
    using namespace std;
    using namespace torch;
    class KDB : public BaseClassifier {
    private:
        int k;
        float theta;
        void add_m_edges(int idx, vector<int>& S, Tensor& weights);
    protected:
        void train() override;
    public:
        KDB(int k, float theta = 0.03);
    };
 }
 #endif
--- a/bayesclass/Metrics.cc
+++ b/bayesclass/Metrics.cc
@@ -0,0 +1,119 @@
 #include "Metrics.hpp"
 using namespace std;
 namespace bayesnet {
    Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates)
        : samples(samples)
        , features(features)
        , className(className)
        , classNumStates(classNumStates)
    {
    }
    Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates)
        : features(features)
        , className(className)
        , classNumStates(classNumStates)
    {
        samples = torch::zeros({ static_cast<int64_t>(vsamples[0].size()), static_cast<int64_t>(vsamples.size() + 1) }, torch::kInt64);
        for (int i = 0; i < vsamples.size(); ++i) {
            samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt64));
        }
        samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
    }
    vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
    {
        vector<pair<string, string>> result;
        for (int i = 0; i < source.size(); ++i) {
            string temp = source[i];
            for (int j = i + 1; j < source.size(); ++j) {
                result.push_back({ temp, source[j] });
            }
        }
        return result;
    }
    torch::Tensor Metrics::conditionalEdge()
    {
        auto result = vector<double>();
        auto source = vector<string>(features);
        source.push_back(className);
        auto combinations = doCombinations(source);
        // Compute class prior
        auto margin = torch::zeros({ classNumStates });
        for (int value = 0; value < classNumStates; ++value) {
            auto mask = samples.index({ "...", -1 }) == value;
            margin[value] = mask.sum().item<float>() / samples.sizes()[0];
        }
        for (auto [first, second] : combinations) {
            int64_t index_first = find(features.begin(), features.end(), first) - features.begin();
            int64_t index_second = find(features.begin(), features.end(), second) - features.begin();
            double accumulated = 0;
            for (int value = 0; value < classNumStates; ++value) {
                auto mask = samples.index({ "...", -1 }) == value;
                auto first_dataset = samples.index({ mask, index_first });
                auto second_dataset = samples.index({ mask, index_second });
                auto mi = mutualInformation(first_dataset, second_dataset);
                auto pb = margin[value].item<float>();
                accumulated += pb * mi;
            }
            result.push_back(accumulated);
        }
        long n_vars = source.size();
        auto matrix = torch::zeros({ n_vars, n_vars });
        auto indices = torch::triu_indices(n_vars, n_vars, 1);
        for (auto i = 0; i < result.size(); ++i) {
            auto x = indices[0][i];
            auto y = indices[1][i];
            matrix[x][y] = result[i];
            matrix[y][x] = result[i];
        }
        return matrix;
    }
    vector<float> Metrics::conditionalEdgeWeights()
    {
        auto matrix = conditionalEdge();
        std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
        return v;
    }
    double Metrics::entropy(torch::Tensor& feature)
    {
        torch::Tensor counts = feature.bincount();
        int totalWeight = counts.sum().item<int>();
        torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
        torch::Tensor logProbs = torch::log(probs);
        torch::Tensor entropy = -probs * logProbs;
        return entropy.nansum().item<double>();
    }
    // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
    double Metrics::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
    {
        int numSamples = firstFeature.sizes()[0];
        torch::Tensor featureCounts = secondFeature.bincount();
        unordered_map<int, unordered_map<int, double>> jointCounts;
        double totalWeight = 0;
        for (auto i = 0; i < numSamples; i++) {
            jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
            totalWeight += 1;
        }
        if (totalWeight == 0)
            throw invalid_argument("Total weight should not be zero");
        double entropyValue = 0;
        for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
            double p_f = featureCounts[value].item<double>() / totalWeight;
            double entropy_f = 0;
            for (auto& [label, jointCount] : jointCounts[value]) {
                double p_l_f = jointCount / featureCounts[value].item<double>();
                if (p_l_f > 0) {
                    entropy_f -= p_l_f * log(p_l_f);
                } else {
                    entropy_f = 0;
                }
            }
            entropyValue += p_f * entropy_f;
        }
        return entropyValue;
    }
    // I(X;Y) = H(Y) - H(Y|X)
    double Metrics::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
    {
        return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
    }
 }
--- a/bayesclass/Metrics.hpp
+++ b/bayesclass/Metrics.hpp
@@ -0,0 +1,25 @@
 #ifndef BAYESNET_METRICS_H
 #define BAYESNET_METRICS_H
 #include <torch/torch.h>
 #include <vector>
 #include <string>
 using namespace std;
 namespace bayesnet {
    class Metrics {
    private:
        torch::Tensor samples;
        vector<string> features;
        string className;
        int classNumStates;
        vector<pair<string, string>> doCombinations(const vector<string>&);
        double entropy(torch::Tensor&);
        double conditionalEntropy(torch::Tensor&, torch::Tensor&);
    public:
        double mutualInformation(torch::Tensor&, torch::Tensor&);
        Metrics(torch::Tensor&, vector<string>&, string&, int);
        Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
        vector<float> conditionalEdgeWeights();
        torch::Tensor conditionalEdge();
    };
 }
 #endif
--- a/bayesclass/Network.cc
+++ b/bayesclass/Network.cc
@@ -0,0 +1,262 @@
 #include <thread>
 #include <mutex>
 #include "Network.h"
 namespace bayesnet {
    Network::Network() : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(0.8) {}
    Network::Network(float maxT) : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT) {}
    Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT) {}
    Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads())
    {
        for (auto& pair : other.nodes) {
            nodes[pair.first] = new Node(*pair.second);
        }
    }
    Network::~Network()
    {
        for (auto& pair : nodes) {
            delete pair.second;
        }
    }
    float Network::getmaxThreads()
    {
        return maxThreads;
    }
    torch::Tensor& Network::getSamples()
    {
        return samples;
    }
    void Network::addNode(string name, int numStates)
    {
        if (nodes.find(name) != nodes.end()) {
            // if node exists update its number of states
            nodes[name]->setNumStates(numStates);
            return;
        }
        nodes[name] = new Node(name, numStates);
    }
    vector<string> Network::getFeatures()
    {
        return features;
    }
    int Network::getClassNumStates()
    {
        return classNumStates;
    }
    int Network::getStates()
    {
        int result = 0;
        for (auto node : nodes) {
            result += node.second->getNumStates();
        }
        return result;
    }
    string Network::getClassName()
    {
        return className;
    }
    bool Network::isCyclic(const string& nodeId, unordered_set<string>& visited, unordered_set<string>& recStack)
    {
        if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet
        {
            visited.insert(nodeId);
            recStack.insert(nodeId);
            for (Node* child : nodes[nodeId]->getChildren()) {
                if (visited.find(child->getName()) == visited.end() && isCyclic(child->getName(), visited, recStack))
                    return true;
                else if (recStack.find(child->getName()) != recStack.end())
                    return true;
            }
        }
        recStack.erase(nodeId); // remove node from recursion stack before function ends
        return false;
    }
    void Network::addEdge(const string parent, const string child)
    {
        if (nodes.find(parent) == nodes.end()) {
            throw invalid_argument("Parent node " + parent + " does not exist");
        }
        if (nodes.find(child) == nodes.end()) {
            throw invalid_argument("Child node " + child + " does not exist");
        }
        // Temporarily add edge to check for cycles
        nodes[parent]->addChild(nodes[child]);
        nodes[child]->addParent(nodes[parent]);
        unordered_set<string> visited;
        unordered_set<string> recStack;
        if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
        {
            // remove problematic edge
            nodes[parent]->removeChild(nodes[child]);
            nodes[child]->removeParent(nodes[parent]);
            throw invalid_argument("Adding this edge forms a cycle in the graph.");
        }
    }
    map<string, Node*>& Network::getNodes()
    {
        return nodes;
    }
    void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className)
    {
        features = featureNames;
        this->className = className;
        dataset.clear();
        // Build dataset & tensor of samples
        samples = torch::zeros({ static_cast<int64_t>(input_data[0].size()), static_cast<int64_t>(input_data.size() + 1) }, torch::kInt64);
        for (int i = 0; i < featureNames.size(); ++i) {
            dataset[featureNames[i]] = input_data[i];
            samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt64));
        }
        dataset[className] = labels;
        samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt64));
        classNumStates = *max_element(labels.begin(), labels.end()) + 1;
        int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
        if (maxThreadsRunning < 1) {
            maxThreadsRunning = 1;
        }
        vector<thread> threads;
        mutex mtx;
        condition_variable cv;
        int activeThreads = 0;
        int nextNodeIndex = 0;
        while (nextNodeIndex < nodes.size()) {
            unique_lock<mutex> lock(mtx);
            cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
            if (nextNodeIndex >= nodes.size()) {
                break;  // No more work remaining
            }
            threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() {
                while (true) {
                    unique_lock<mutex> lock(mtx);
                    if (nextNodeIndex >= nodes.size()) {
                        break;  // No more work remaining
                    }
                    auto& pair = *std::next(nodes.begin(), nextNodeIndex);
                    ++nextNodeIndex;
                    lock.unlock();
                    pair.second->computeCPT(dataset, laplaceSmoothing);
                    lock.lock();
                    nodes[pair.first] = pair.second;
                    lock.unlock();
                }
                lock_guard<mutex> lock(mtx);
                --activeThreads;
                cv.notify_one();
                });
            ++activeThreads;
        }
        for (auto& thread : threads) {
            thread.join();
        }
    }
    vector<int> Network::predict(const vector<vector<int>>& tsamples)
    {
        vector<int> predictions;
        vector<int> sample;
        for (int row = 0; row < tsamples[0].size(); ++row) {
            sample.clear();
            for (int col = 0; col < tsamples.size(); ++col) {
                sample.push_back(tsamples[col][row]);
            }
            vector<double> classProbabilities = predict_sample(sample);
            // Find the class with the maximum posterior probability
            auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
            int predictedClass = distance(classProbabilities.begin(), maxElem);
            predictions.push_back(predictedClass);
        }
        return predictions;
    }
    vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
    {
        vector<vector<double>> predictions;
        vector<int> sample;
        for (int row = 0; row < tsamples[0].size(); ++row) {
            sample.clear();
            for (int col = 0; col < tsamples.size(); ++col) {
                sample.push_back(tsamples[col][row]);
            }
            predictions.push_back(predict_sample(sample));
        }
        return predictions;
    }
    double Network::score(const vector<vector<int>>& tsamples, const vector<int>& labels)
    {
        vector<int> y_pred = predict(tsamples);
        int correct = 0;
        for (int i = 0; i < y_pred.size(); ++i) {
            if (y_pred[i] == labels[i]) {
                correct++;
            }
        }
        return (double)correct / y_pred.size();
    }
    vector<double> Network::predict_sample(const vector<int>& sample)
    {
        // Ensure the sample size is equal to the number of features
        if (sample.size() != features.size()) {
            throw invalid_argument("Sample size (" + to_string(sample.size()) +
                ") does not match the number of features (" + to_string(features.size()) + ")");
        }
        map<string, int> evidence;
        for (int i = 0; i < sample.size(); ++i) {
            evidence[features[i]] = sample[i];
        }
        return exactInference(evidence);
    }
    double Network::computeFactor(map<string, int>& completeEvidence)
    {
        double result = 1.0;
        for (auto node : getNodes()) {
            result *= node.second->getFactorValue(completeEvidence);
        }
        return result;
    }
    vector<double> Network::exactInference(map<string, int>& evidence)
    {
        vector<double> result(classNumStates, 0.0);
        vector<thread> threads;
        mutex mtx;
        for (int i = 0; i < classNumStates; ++i) {
            threads.emplace_back([this, &result, &evidence, i, &mtx]() {
                auto completeEvidence = map<string, int>(evidence);
                completeEvidence[getClassName()] = i;
                double factor = computeFactor(completeEvidence);
                lock_guard<mutex> lock(mtx);
                result[i] = factor;
                });
        }
        for (auto& thread : threads) {
            thread.join();
        }
        // Normalize result
        double sum = accumulate(result.begin(), result.end(), 0.0);
        for (double& value : result) {
            value /= sum;
        }
        return result;
    }
    vector<string> Network::show()
    {
        vector<string> result;
        // Draw the network
        for (auto node : nodes) {
            string line = node.first + " -> ";
            for (auto child : node.second->getChildren()) {
                line += child->getName() + ", ";
            }
            result.push_back(line);
        }
        return result;
    }
 }
--- a/bayesclass/Network.h
+++ b/bayesclass/Network.h
@@ -0,0 +1,51 @@
 #ifndef NETWORK_H
 #define NETWORK_H
 #include "Node.h"
 #include <map>
 #include <vector>
 namespace bayesnet {
    class Network {
    private:
        map<string, Node*> nodes;
        map<string, vector<int>> dataset;
        float maxThreads;
        int classNumStates;
        vector<string> features;
        string className;
        int laplaceSmoothing;
        torch::Tensor samples;
        bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
        vector<double> predict_sample(const vector<int>&);
        vector<double> exactInference(map<string, int>&);
        double computeFactor(map<string, int>&);
        double mutual_info(torch::Tensor&, torch::Tensor&);
        double entropy(torch::Tensor&);
        double conditionalEntropy(torch::Tensor&, torch::Tensor&);
        double mutualInformation(torch::Tensor&, torch::Tensor&);
    public:
        Network();
        Network(float, int);
        Network(float);
        Network(Network&);
        ~Network();
        torch::Tensor& getSamples();
        float getmaxThreads();
        void addNode(string, int);
        void addEdge(const string, const string);
        map<string, Node*>& getNodes();
        vector<string> getFeatures();
        int getStates();
        int getClassNumStates();
        string getClassName();
        void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
        vector<int> predict(const vector<vector<int>>&);
        //Computes the conditional edge weight of variable index u and v conditioned on class_node
        torch::Tensor conditionalEdgeWeight();
        vector<vector<double>> predict_proba(const vector<vector<int>>&);
        double score(const vector<vector<int>>&, const vector<int>&);
        vector<string> show();
        inline string version() { return "0.1.0"; }
    };
 }
 #endif
--- a/bayesclass/Node.cc
+++ b/bayesclass/Node.cc
@@ -0,0 +1,114 @@
 #include "Node.h"
 namespace bayesnet {
    Node::Node(const std::string& name, int numStates)
        : name(name), numStates(numStates), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
    {
    }
    string Node::getName() const
    {
        return name;
    }
    void Node::addParent(Node* parent)
    {
        parents.push_back(parent);
    }
    void Node::removeParent(Node* parent)
    {
        parents.erase(std::remove(parents.begin(), parents.end(), parent), parents.end());
    }
    void Node::removeChild(Node* child)
    {
        children.erase(std::remove(children.begin(), children.end(), child), children.end());
    }
    void Node::addChild(Node* child)
    {
        children.push_back(child);
    }
    vector<Node*>& Node::getParents()
    {
        return parents;
    }
    vector<Node*>& Node::getChildren()
    {
        return children;
    }
    int Node::getNumStates() const
    {
        return numStates;
    }
    void Node::setNumStates(int numStates)
    {
        this->numStates = numStates;
    }
    torch::Tensor& Node::getCPT()
    {
        return cpTable;
    }
    /*
     The MinFill criterion is a heuristic for variable elimination.
     The variable that minimizes the number of edges that need to be added to the graph to make it triangulated.
     This is done by counting the number of edges that need to be added to the graph if the variable is eliminated.
     The variable with the minimum number of edges is chosen.
     Here this is done computing the length of the combinations of the node neighbors taken 2 by 2.
    */
    unsigned Node::minFill()
    {
        unordered_set<string> neighbors;
        for (auto child : children) {
            neighbors.emplace(child->getName());
        }
        for (auto parent : parents) {
            neighbors.emplace(parent->getName());
        }
        auto source = vector<string>(neighbors.begin(), neighbors.end());
        return combinations(source).size();
    }
    vector<pair<string, string>> Node::combinations(const vector<string>& source)
    {
        vector<pair<string, string>> result;
        for (int i = 0; i < source.size(); ++i) {
            string temp = source[i];
            for (int j = i + 1; j < source.size(); ++j) {
                result.push_back({ temp, source[j] });
            }
        }
        return result;
    }
    void Node::computeCPT(map<string, vector<int>>& dataset, const int laplaceSmoothing)
    {
        // Get dimensions of the CPT
        dimensions.push_back(numStates);
        for (auto father : getParents()) {
            dimensions.push_back(father->getNumStates());
        }
        auto length = dimensions.size();
        // Create a tensor of zeros with the dimensions of the CPT
        cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
        // Fill table with counts
        for (int n_sample = 0; n_sample < dataset[name].size(); ++n_sample) {
            torch::List<c10::optional<torch::Tensor>> coordinates;
            coordinates.push_back(torch::tensor(dataset[name][n_sample]));
            for (auto father : getParents()) {
                coordinates.push_back(torch::tensor(dataset[father->getName()][n_sample]));
            }
            // Increment the count of the corresponding coordinate
            cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + 1);
        }
        // Normalize the counts
        cpTable = cpTable / cpTable.sum(0);
    }
    float Node::getFactorValue(map<string, int>& evidence)
    {
        torch::List<c10::optional<torch::Tensor>> coordinates;
        // following predetermined order of indices in the cpTable (see Node.h)
        coordinates.push_back(torch::tensor(evidence[name]));
        for (auto parent : getParents()) {
            coordinates.push_back(torch::tensor(evidence[parent->getName()]));
        }
        return cpTable.index({ coordinates }).item<float>();
    }
 }
--- a/bayesclass/Node.h
+++ b/bayesclass/Node.h
@@ -0,0 +1,35 @@
 #ifndef NODE_H
 #define NODE_H
 #include <torch/torch.h>
 #include <unordered_set>
 #include <vector>
 #include <string>
 namespace bayesnet {
    using namespace std;
    class Node {
    private:
        string name;
        vector<Node*> parents;
        vector<Node*> children;
        int numStates; // number of states of the variable
        torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
        vector<int64_t> dimensions; // dimensions of the cpTable
    public:
        vector<pair<string, string>> combinations(const vector<string>&);
        Node(const std::string&, int);
        void addParent(Node*);
        void addChild(Node*);
        void removeParent(Node*);
        void removeChild(Node*);
        string getName() const;
        vector<Node*>& getParents();
        vector<Node*>& getChildren();
        torch::Tensor& getCPT();
        void computeCPT(map<string, vector<int>>&, const int);
        int getNumStates() const;
        void setNumStates(int);
        unsigned minFill();
        float getFactorValue(map<string, int>&);
    };
 }
 #endif
--- a/bayesclass/init.py
+++ b/bayesclass/init.py
@@ -16,4 +16,8 @@ __all__ = [
    "TAN",
    "KDB",
    "AODE",
    "KDBNew",
    "AODENew",
    "BoostAODE",
    "BoostSPODE",
 ]
--- a/bayesclass/_version.py
+++ b/bayesclass/_version.py
@@ -1 +1 @@
-__version__ = "0.1.0"
+__version__ = "0.2.0"
--- a/bayesclass/cSelectFeatures.cpp
+++ b/bayesclass/cSelectFeatures.cpp
--- a/bayesclass/cSelectFeatures.pyx
+++ b/bayesclass/cSelectFeatures.pyx
@@ -0,0 +1,33 @@
 # distutils: language = c++
 # cython: language_level = 3
 from libcpp.vector cimport vector
 from libcpp.string cimport string
 from libcpp cimport bool
 cdef extern from "FeatureSelect.h" namespace "features":
    ctypedef float precision_t
    cdef cppclass SelectKBestWeighted:
        SelectKBestWeighted(vector[vector[int]]&, vector[int]&, vector[precision_t]&, int, bool) except + 
        void fit()
        string version()
        vector[precision_t] getScores()
        vector[int] getFeatures()
 cdef class CSelectKBestWeighted:
    cdef SelectKBestWeighted *thisptr
    def __cinit__(self, X, y, weights, k, natural=False): # log or log2
        self.thisptr = new SelectKBestWeighted(X, y, weights, k, natural) 
    def __dealloc__(self):
        del self.thisptr
    def fit(self,):
        self.thisptr.fit()
        return self
    def get_scores(self):
        return self.thisptr.getScores()
    def get_features(self):
        return self.thisptr.getFeatures()
    def get_version(self):
        return self.thisptr.version()
    def __reduce__(self):
        return (CSelectKBestWeighted, ())
--- a/bayesclass/clfs.py
+++ b/bayesclass/clfs.py
@@ -1,19 +1,29 @@
 import random
 import warnings
 import numpy as np
 import pandas as pd
 from scipy.stats import mode
-from sklearn.base import ClassifierMixin, BaseEstimator
+from sklearn.base import clone, ClassifierMixin, BaseEstimator
 from sklearn.ensemble import BaseEnsemble
 from sklearn.feature_selection import mutual_info_classif
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 from sklearn.utils.multiclass import unique_labels
 from sklearn.feature_selection import mutual_info_classif
 import networkx as nx
 from pgmpy.estimators import TreeSearch, BayesianEstimator
 from pgmpy.models import BayesianNetwork
 from pgmpy.base import DAG
 import matplotlib.pyplot as plt
 from fimdlp.mdlp import FImdlp
 from .cppSelectFeatures import CSelectKBestWeighted
 from .BayesNet import BayesNetwork, CMetrics
 from ._version import __version__
 def default_feature_names(num_features):
    return [f"feature_{i}" for i in range(num_features)]
 class BayesBase(BaseEstimator, ClassifierMixin):
    def __init__(self, random_state, show_progress):
        self.random_state = random_state
@@ -23,7 +33,7 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        return {
            "requires_positive_X": True,
            "requires_positive_y": True,
-            "preserve_dtype": [np.int64, np.int32],
+            "preserve_dtype": [np.int32, np.int64],
            "requires_y": True,
        }
@@ -32,35 +42,68 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        """Return the version of the package."""
        return __version__
-    def nodes_leaves(self):
+    def nodes_edges(self):
-        """To keep compatiblity with the benchmark platform"""
+        if hasattr(self, "dag_"):
            return len(self.dag_), len(self.dag_.edges())
        return 0, 0
    @staticmethod
    def default_class_name():
        return "class"
    def build_dataset(self):
        self.dataset_ = pd.DataFrame(
            self.X_, columns=self.feature_names_in_, dtype=np.int32
        )
        self.dataset_[self.class_name_] = self.y_
        if self.sample_weight_ is not None:
            self.dataset_["_weight"] = self.sample_weight_
    def _check_params_fit(self, X, y, expected_args, kwargs):
        """Check the common parameters passed to fit"""
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        X = self._validate_data(X, reset=True)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)
        self.n_classes_ = self.classes_.shape[0]
        # Default values
-        self.class_name_ = "class"
+        self.weighted_ = False
-        self.features_ = [f"feature_{i}" for i in range(X.shape[1])]
+        self.sample_weight_ = None
        self.class_name_ = self.default_class_name()
        self.features_ = default_feature_names(X.shape[1])
        for key, value in kwargs.items():
            if key in expected_args:
                setattr(self, f"{key}_", value)
            else:
                raise ValueError(f"Unexpected argument: {key}")
        self.feature_names_in_ = self.features_
        # used for local discretization
        self.indexed_features_ = {
            feature: i for i, feature in enumerate(self.features_)
        }
        if self.random_state is not None:
            random.seed(self.random_state)
-        if len(self.features_) != X.shape[1]:
+        if len(self.feature_names_in_) != X.shape[1]:
            raise ValueError(
                "Number of features does not match the number of columns in X"
            )
        self.n_features_in_ = X.shape[1]
        return X, y
    @property
    def states_(self):
        if hasattr(self, "fitted_"):
            return self.states_computed_
        return 0
    @property
    def depth_(self):
        return self.states_
    def fit(self, X, y, **kwargs):
-        """A reference implementation of a fitting function for a classifier.
+        """Fit classifier
        Parameters
        ----------
@@ -97,29 +140,54 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        >>> model.fit(train_data, train_y, features=features, class_name='E')
        TAN(random_state=17)
        """
-        X_, y_ = self._check_params(X, y, kwargs)
+        self.X_, self.y_ = self._check_params(X, y, kwargs)
        # Store the information needed to build the model
-        self.X_ = X_
+        self.build_dataset()
        self.y_ = y_
        self.dataset_ = pd.DataFrame(self.X_, columns=self.features_)
        self.dataset_[self.class_name_] = self.y_
        # Build the DAG
-        self._build()
+        self._build(kwargs)
        # Train the model
-        self._train()
+        self._train(kwargs)
        self.fitted_ = True
        # To keep compatiblity with the benchmark platform
        self.nodes_leaves = self.nodes_edges
        # Return the classifier
        return self
-    def _train(self):
+    def _build(self, kwargs):
-        self.model_ = BayesianNetwork(
+        self.model_ = BayesNetwork()
-            self.dag_.edges(), show_progress=self.show_progress
+        features = kwargs["features"]
-        )
+        states = kwargs["state_names"]
-        self.model_.fit(
+        for feature in features:
-            self.dataset_,
+            self.model_.addNode(feature, len(states[feature]))
-            estimator=BayesianEstimator,
+        class_name = kwargs["class_name"]
-            prior_type="K2",
+        self.model_.addNode(class_name, max(self.y_) + 1)
-        )
+
    def _train(self, kwargs):
        """Build and train a BayesianNetwork from the DAG and the dataset
        Parameters
        ----------
        kwargs : dict
            fit parameters
        """
        # self.model_ = BayesianNetwork(
        #     self.dag_.edges(), show_progress=self.show_progress
        # )
        # states = dict(state_names=kwargs.pop("state_names", []))
        # self.model_.fit(
        #     self.dataset_,
        #     estimator=BayesianEstimator,
        #     prior_type="K2",
        #     weighted=self.weighted_,
        #     **states,
        # )
        features = kwargs["features"]
        class_name = kwargs["class_name"]
        for source, destination in self.edges_:
            self.model_.addEdge(source, destination)
        self.model_.fit(self.X_, self.y_, features, class_name)
        self.states_computed_ = self.model_.getStates()
    def predict(self, X):
        """A reference implementation of a prediction for a classifier.
@@ -169,13 +237,16 @@ class BayesBase(BaseEstimator, ClassifierMixin):
        """
        # Check is fit had been called
        check_is_fitted(self, ["X_", "y_", "fitted_"])
        # Input validation
        X = check_array(X)
-        dataset = pd.DataFrame(X, columns=self.features_, dtype="int16")
+        # dataset = pd.DataFrame(
-        return self.model_.predict(dataset).values.ravel()
+        #     X, columns=self.feature_names_in_, dtype=np.int32
        # )
        # return self.model_.predict(dataset).values.ravel()
        return self.model_.predict(X)
    def plot(self, title="", node_size=800):
        warnings.simplefilter("ignore", UserWarning)
        nx.draw_circular(
            self.model_,
            with_labels=True,
@@ -208,7 +279,7 @@ class TAN(BayesBase):
        The classes seen at :meth:`fit`.
    class_name_ : str
        The name of the class column
-    features_ : list
+    feature_names_in_ : list
        The list of features names
    head_ : int
        The index of the node used as head for the initial DAG
@@ -227,21 +298,47 @@ class TAN(BayesBase):
    def _check_params(self, X, y, kwargs):
        self.head_ = 0
-        expected_args = ["class_name", "features", "head"]
+        expected_args = ["class_name", "features", "head", "state_names"]
        X, y = self._check_params_fit(X, y, expected_args, kwargs)
        if self.head_ == "random":
-            self.head_ = random.randint(0, len(self.features_) - 1)
+            self.head_ = random.randint(0, self.n_features_in_ - 1)
-        if self.head_ is not None and self.head_ >= len(self.features_):
+        if self.head_ is not None and self.head_ >= self.n_features_in_:
            raise ValueError("Head index out of range")
        return X, y
-    def _build(self):
+    def _build(self, kwargs):
-        est = TreeSearch(self.dataset_, root_node=self.features_[self.head_])
+        est = TreeSearch(
            self.dataset_, root_node=self.feature_names_in_[self.head_]
        )
        self.dag_ = est.estimate(
            estimator_type="tan",
            class_node=self.class_name_,
            show_progress=self.show_progress,
        )
        # Code taken from pgmpy
        # n_jobs = -1
        # weights = TreeSearch._get_conditional_weights(
        #     self.dataset_,
        #     self.class_name_,
        #     "mutual_info",
        #     n_jobs,
        #     self.show_progress,
        # )
        # # Step 4.2: Construct chow-liu DAG on {data.columns - class_node}
        # class_node_idx = np.where(self.dataset_.columns == self.class_name_)[
        #     0
        # ][0]
        # weights = np.delete(weights, class_node_idx, axis=0)
        # weights = np.delete(weights, class_node_idx, axis=1)
        # reduced_columns = np.delete(self.dataset_.columns, class_node_idx)
        # D = TreeSearch._create_tree_and_dag(
        #     weights, reduced_columns, self.feature_names_in_[self.head_]
        # )
        # # Step 4.3: Add edges from class_node to all other nodes.
        # D.add_edges_from(
        #     [(self.class_name_, node) for node in reduced_columns]
        # )
        # self.dag_ = D
 class KDB(BayesBase):
@@ -253,121 +350,625 @@ class KDB(BayesBase):
        )
    def _check_params(self, X, y, kwargs):
-        expected_args = ["class_name", "features"]
+        expected_args = [
            "class_name",
            "features",
            "state_names",
            "sample_weight",
            "weighted",
        ]
        return self._check_params_fit(X, y, expected_args, kwargs)
-    def _build(self):
+    def _add_m_edges(self, idx, S_nodes, conditional_weights):
        n_edges = min(self.k, len(S_nodes))
        cond_w = conditional_weights.copy()
        exit_cond = self.k == 0
        num = 0
        while not exit_cond:
            max_minfo = np.argmax(cond_w[idx, :])
            if max_minfo in S_nodes and cond_w[idx, max_minfo] > self.theta:
                try:
                    self.model_.addEdge(
                        self.feature_names_in_[max_minfo],
                        self.feature_names_in_[idx],
                    )
                    num += 1
                except ValueError:
                    # Loops are not allowed
                    pass
            cond_w[idx, max_minfo] = -1
            exit_cond = num == n_edges or np.all(cond_w[idx, :] <= self.theta)
    def _build(self, kwargs):
        """
-        1. For each feature Xi, compute mutual information, I(X;;C), where C is the class.
+        1. For each feature Xi, compute mutual information, I(X;C),
-        2. Compute class conditional mutual information I(Xi;XjIC), f or each pair of features Xi and Xj, where i#j.
+        where C is the class.
        2. Compute class conditional mutual information I(Xi;XjIC), f or each
        pair of features Xi and Xj, where i#j.
        3. Let the used variable list, S, be empty.
-        4. Let the Bayesian network being constructed, BN, begin with a single class node, C.
+        4. Let the DAG network being constructed, BN, begin with a single
        class node, C.
        5. Repeat until S includes all domain features
-        5.1. Select feature Xmax which is not in S and has the largest value I(Xmax;C).
+        5.1. Select feature Xmax which is not in S and has the largest value
        I(Xmax;C).
        5.2. Add a node to BN representing Xmax.
        5.3. Add an arc from C to Xmax in BN.
-        5.4. Add m =min(lSl,/c) arcs from m distinct features Xj in S with the highest value for I(Xmax;X,jC).
+        5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
        the highest value for I(Xmax;X,jC).
        5.5. Add Xmax to S.
-        Compute the conditional probabilility infered by the structure of BN by using counts from DB, and output BN.
+        Compute the conditional probabilility infered by the structure of BN by
        using counts from DB, and output BN.
        """
        def add_m_edges(dag, idx, S_nodes, conditional_weights):
            n_edges = min(self.k, len(S_nodes))
            cond_w = conditional_weights.copy()
            exit_cond = self.k == 0
            num = 0
            while not exit_cond:
                max_minfo = np.argmax(cond_w[idx, :])
                if (
                    max_minfo in S_nodes
                    and cond_w[idx, max_minfo] > self.theta
                ):
                    try:
                        dag.add_edge(
                            self.features_[max_minfo], self.features_[idx]
                        )
                        num += 1
                    except ValueError:
                        # Loops are not allowed
                        pass
                cond_w[idx, max_minfo] = -1
                exit_cond = num == n_edges or np.all(cond_w[idx, :] <= 0)
        # 1. get the mutual information between each feature and the class
        mutual = mutual_info_classif(self.X_, self.y_, discrete_features=True)
        # 2. symmetric matrix where each element represents I(X, Y| class_node)
-        conditional_weights = TreeSearch(
+        metrics = CMetrics(
-            self.dataset_
+            self.X_,
-        )._get_conditional_weights(
+            self.y_,
-            self.dataset_, self.class_name_, show_progress=self.show_progress
+            self.features_,
            self.class_name_,
            self.n_classes_,
        )
-        # 3.
+        conditional_weights = metrics.conditionalEdgeWeights(
            self.n_features_in_ + 1
        )
        # 3. Let the used variable list, S, be empty.
        S_nodes = []
-        # 4.
+        num_states = {
-        dag = BayesianNetwork()
+            feature: len(states)
-        dag.add_node(self.class_name_)  # , state_names=self.classes_)
+            for feature, states in kwargs["state_names"].items()
-        # 5. 5.1
+        }
-        for idx in np.argsort(mutual):
+        # 4. Let the DAG being constructed, BN, begin with a single class node
-            # 5.2
+        self.model_ = BayesNetwork()
-            feature = self.features_[idx]
+        self.model_.addNode(self.class_name_, self.n_classes_)
-            dag.add_node(feature)
+        # 5. Repeat until S includes all domain features
-            # 5.3
+        # 5.1 Select feature Xmax which is not in S and has the largest value
-            dag.add_edge(self.class_name_, feature)
+        for idx in np.argsort(-mutual):
-            # 5.4
+            # 5.2 Add a node to BN representing Xmax.
-            add_m_edges(dag, idx, S_nodes, conditional_weights)
+            feature = self.feature_names_in_[idx]
-            # 5.5
+            self.model_.addNode(feature, num_states[feature])
            # 5.3 Add an arc from C to Xmax in BN.
            self.model_.addEdge(self.class_name_, feature)
            # 5.4 Add m = min(lSl,/c) arcs from m distinct features Xj in S
            self._add_m_edges(idx, S_nodes, conditional_weights)
            # 5.5 Add Xmax to S.
            S_nodes.append(idx)
-        self.dag_ = dag
+        self.edges_ = []
-class AODE(BayesBase, BaseEnsemble):
+def build_spodes(features, class_name):
-    def __init__(self, show_progress=False, random_state=None):
+    """Build SPODE estimators (Super Parent One Dependent Estimator)"""
    class_edges = [(class_name, f) for f in features]
    for idx in range(len(features)):
        feature_edges = [
            (features[idx], f) for f in features if f != features[idx]
        ]
        feature_edges.extend(class_edges)
        model = BayesianNetwork(feature_edges, show_progress=False)
        yield model
 class SPODE(BayesBase):
    def _check_params(self, X, y, kwargs):
        expected_args = [
            "class_name",
            "features",
            "state_names",
            "sample_weight",
            "weighted",
        ]
        return self._check_params_fit(X, y, expected_args, kwargs)
 class AODE(ClassifierMixin, BaseEnsemble):
    def __init__(
        self,
        show_progress=False,
        random_state=None,
        estimator=None,
    ):
        self.show_progress = show_progress
        self.random_state = random_state
        super().__init__(estimator=estimator)
    def _validate_estimator(self) -> None:
        """Check the estimator and set the estimator_ attribute."""
        super()._validate_estimator(
            default=SPODE(
                random_state=self.random_state,
                show_progress=self.show_progress,
            )
        )
    def fit(self, X, y, **kwargs):
        self.n_features_in_ = X.shape[1]
        self.feature_names_in_ = kwargs.get(
            "features", default_feature_names(self.n_features_in_)
        )
        self.class_name_ = kwargs.get("class_name", "class")
        # build estimator
        self._validate_estimator()
        self.X_ = X
        self.y_ = y
        self.n_samples_ = X.shape[0]
        self.estimators_ = []
        self._train(kwargs)
        self.fitted_ = True
        # To keep compatiblity with the benchmark platform
        self.nodes_leaves = self.nodes_edges
        return self
    def _train(self, kwargs):
        for dag in build_spodes(self.feature_names_in_, self.class_name_):
            estimator = clone(self.estimator_)
            estimator.dag_ = estimator.model_ = dag
            estimator.fit(self.X_, self.y_, **kwargs)
            self.estimators_.append(estimator)
    def predict(self, X: np.ndarray) -> np.ndarray:
        n_samples = X.shape[0]
        n_estimators = len(self.estimators_)
        result = np.empty((n_samples, n_estimators))
        for index, estimator in enumerate(self.estimators_):
            result[:, index] = estimator.predict(X)
        return mode(result, axis=1, keepdims=False).mode.ravel()
    def version(self):
        if hasattr(self, "fitted_"):
            return self.estimator_.version()
        return SPODE(None, False).version()
    @property
    def states_(self):
        if hasattr(self, "fitted_"):
            return sum(
                [
                    len(item)
                    for model in self.estimators_
                    for _, item in model.model_.states.items()
                ]
            ) / len(self.estimators_)
        return 0
    @property
    def depth_(self):
        return self.states_
    def nodes_edges(self):
        nodes = 0
        edges = 0
        if hasattr(self, "fitted_"):
            nodes = sum([len(x.dag_) for x in self.estimators_])
            edges = sum([len(x.dag_.edges()) for x in self.estimators_])
        return nodes, edges
    def plot(self, title=""):
        warnings.simplefilter("ignore", UserWarning)
        for idx, model in enumerate(self.estimators_):
            model.plot(title=f"{idx} {title}")
 class TANNew(TAN):
    def __init__(
        self,
        show_progress=False,
        random_state=None,
        discretizer_depth=1e6,
        discretizer_length=3,
        discretizer_cuts=0,
    ):
        self.discretizer_depth = discretizer_depth
        self.discretizer_length = discretizer_length
        self.discretizer_cuts = discretizer_cuts
        super().__init__(
            show_progress=show_progress, random_state=random_state
        )
-    def _check_params(self, X, y, kwargs):
+    def fit(self, X, y, **kwargs):
-        expected_args = ["class_name", "features"]
+        self.estimator_ = Proposal(self)
-        return self._check_params_fit(X, y, expected_args, kwargs)
+        self.estimator_.fit(X, y, **kwargs)
        return self
-    def _build(self):
+    def predict(self, X):
        return self.estimator_.predict(X)
        self.dag_ = None
-    def _train(self):
+class KDBNew(KDB):
-        """Build SPODE estimators (Super Parent One Dependent Estimator)"""
+    def __init__(
-        self.models_ = []
+        self,
-        class_edges = [(self.class_name_, f) for f in self.features_]
+        k=2,
-        for idx in range(len(self.features_)):
+        show_progress=False,
-            feature_edges = [
+        random_state=None,
-                (self.features_[idx], f)
+        discretizer_depth=1e6,
-                for f in self.features_
+        discretizer_length=3,
-                if f != self.features_[idx]
+        discretizer_cuts=0,
-            ]
+    ):
-            feature_edges.extend(class_edges)
+        self.discretizer_depth = discretizer_depth
-            model = BayesianNetwork(
+        self.discretizer_length = discretizer_length
-                feature_edges, show_progress=self.show_progress
+        self.discretizer_cuts = discretizer_cuts
-            )
+        super().__init__(
-            model.fit(
+            k=k, show_progress=show_progress, random_state=random_state
-                self.dataset_,
+        )
                estimator=BayesianEstimator,
                prior_type="K2",
            )
            self.models_.append(model)
-    def plot(self, title=""):
+    def fit(self, X, y, **kwargs):
-        for idx, model in enumerate(self.models_):
+        self.estimator_ = Proposal(self)
-            self.model_ = model
+        self.estimator_.fit(X, y, **kwargs)
-            super().plot(title=f"{idx} {title}")
+        return self
    def predict(self, X):
        return self.estimator_.predict(X)
 class SPODENew(SPODE):
    """This class implements a classifier for the SPODE algorithm similar to
    TANNew and KDBNew"""
    def __init__(
        self,
        random_state,
        show_progress,
        discretizer_depth=1e6,
        discretizer_length=3,
        discretizer_cuts=0,
    ):
        super().__init__(
            random_state=random_state, show_progress=show_progress
        )
        self.discretizer_depth = discretizer_depth
        self.discretizer_length = discretizer_length
        self.discretizer_cuts = discretizer_cuts
 class AODENew(AODE):
    def __init__(
        self,
        random_state=None,
        show_progress=False,
        discretizer_depth=1e6,
        discretizer_length=3,
        discretizer_cuts=0,
    ):
        self.discretizer_depth = discretizer_depth
        self.discretizer_length = discretizer_length
        self.discretizer_cuts = discretizer_cuts
        super().__init__(
            random_state=random_state,
            show_progress=show_progress,
            estimator=Proposal(
                SPODENew(
                    random_state=random_state,
                    show_progress=show_progress,
                    discretizer_depth=discretizer_depth,
                    discretizer_length=discretizer_length,
                    discretizer_cuts=discretizer_cuts,
                )
            ),
        )
    def _train(self, kwargs):
        for dag in build_spodes(self.feature_names_in_, self.class_name_):
            proposal = clone(self.estimator_)
            proposal.estimator.dag_ = proposal.estimator.model_ = dag
            self.estimators_.append(proposal.fit(self.X_, self.y_, **kwargs))
        self.n_estimators_ = len(self.estimators_)
    def predict(self, X: np.ndarray) -> np.ndarray:
        check_is_fitted(self, ["X_", "y_", "fitted_"])
        # Input validation
-        X = self._validate_data(X, reset=False)
+        X = check_array(X)
-        n_samples = X.shape[0]
+        result = np.empty((X.shape[0], self.n_estimators_))
-        n_estimators = len(self.models_)
+        for index, model in enumerate(self.estimators_):
-        result = np.empty((n_samples, n_estimators))
+            result[:, index] = model.predict(X)
-        dataset = pd.DataFrame(X, columns=self.features_, dtype="int16")
+        return mode(result, axis=1, keepdims=False).mode.ravel()
-        for index, model in enumerate(self.models_):
+
-            result[:, index] = model.predict(dataset).values.ravel()
+    @property
    def states_(self):
        if hasattr(self, "fitted_"):
            return sum(
                [
                    len(item)
                    for model in self.estimators_
                    for _, item in model.estimator.model_.states.items()
                ]
            ) / len(self.estimators_)
        return 0
    @property
    def depth_(self):
        return self.states_
    def nodes_edges(self):
        nodes = 0
        edges = 0
        if hasattr(self, "fitted_"):
            nodes = sum([len(x.estimator.dag_) for x in self.estimators_])
            edges = sum(
                [len(x.estimator.dag_.edges()) for x in self.estimators_]
            )
        return nodes, edges
    def plot(self, title=""):
        warnings.simplefilter("ignore", UserWarning)
        for idx, model in enumerate(self.estimators_):
            model.estimator.plot(title=f"{idx} {title}")
    def version(self):
        if hasattr(self, "fitted_"):
            return self.estimator_.estimator.version()
        return SPODENew(None, False).version()
 class Proposal(BaseEstimator):
    def __init__(self, estimator):
        self.estimator = estimator
        self.class_type = estimator.__class__
    def fit(self, X, y, **kwargs):
        # Check parameters
        self.estimator._check_params(X, y, kwargs)
        # Discretize train data
        self.discretizer_ = FImdlp(
            n_jobs=1,
            max_depth=self.estimator.discretizer_depth,
            min_length=self.estimator.discretizer_length,
            max_cuts=self.estimator.discretizer_cuts,
        )
        self.Xd = self.discretizer_.fit_transform(X, y)
        kwargs = self.update_kwargs(y, kwargs)
        # Build the model
        super(self.class_type, self.estimator).fit(self.Xd, y, **kwargs)
        # Local discretization based on the model
        self._local_discretization()
        # self.check_integrity("fit", self.Xd)
        self.fitted_ = True
        return self
    def predict(self, X):
        # Check is fit had been called
        check_is_fitted(self, ["fitted_"])
        # Input validation
        X = check_array(X)
        Xd = self.discretizer_.transform(X)
        # self.check_integrity("predict", Xd)
        return super(self.class_type, self.estimator).predict(Xd)
    def update_kwargs(self, y, kwargs):
        features = (
            kwargs["features"]
            if "features" in kwargs
            else default_feature_names(self.Xd.shape[1])
        )
        states = {
            features[i]: self.discretizer_.get_states_feature(i)
            for i in range(self.Xd.shape[1])
        }
        class_name = (
            kwargs["class_name"]
            if "class_name" in kwargs
            else self.estimator.default_class_name()
        )
        states[class_name] = np.unique(y).tolist()
        kwargs["state_names"] = states
        self.state_names_ = states
        self.features_ = features
        kwargs["features"] = features
        kwargs["class_name"] = class_name
        return kwargs
    def _local_discretization(self):
        """Discretize each feature with its fathers and the class"""
        upgrade = False
        # order of local discretization is important. no good 0, 1, 2...
        ancestral_order = list(nx.topological_sort(self.estimator.dag_))
        for feature in ancestral_order:
            if feature == self.estimator.class_name_:
                continue
            idx = self.estimator.indexed_features_[feature]
            fathers = self.estimator.dag_.get_parents(feature)
            if len(fathers) > 1:
                # First remove the class name as it will be added later
                fathers.remove(self.estimator.class_name_)
                # Get the fathers indices
                features = [
                    self.estimator.indexed_features_[f] for f in fathers
                ]
                # Update the discretization of the feature
                self.Xd[:, idx] = self.discretizer_.join_fit(
                    # each feature has to use previous discretization data=res
                    target=idx,
                    features=features,
                    data=self.Xd,
                )
                upgrade = True
        if upgrade:
            # Update the dataset
            self.estimator.X_ = self.Xd
            self.estimator.build_dataset()
            self.state_names_ = {
                key: self.discretizer_.get_states_feature(value)
                for key, value in self.estimator.indexed_features_.items()
            }
            states = {"state_names": self.state_names_}
            # Update the model
            self.estimator.model_.fit(
                self.estimator.dataset_,
                estimator=BayesianEstimator,
                prior_type="K2",
                **states,
            )
    # def check_integrity(self, source, X):
    #     # print(f"Checking integrity of {source} data")
    #     for i in range(X.shape[1]):
    #         if not set(np.unique(X[:, i]).tolist()).issubset(
    #             set(self.state_names_[self.features_[i]])
    #         ):
    #             print(
    #                 "i",
    #                 i,
    #                 "features[i]",
    #                 self.features_[i],
    #                 "np.unique(X[:, i])",
    #                 np.unique(X[:, i]),
    #                 "np.array(state_names[features[i]])",
    #                 np.array(self.state_names_[self.features_[i]]),
    #             )
    #             raise ValueError("Discretization error")
 class BoostSPODE(BayesBase):
    def _check_params(self, X, y, kwargs):
        expected_args = [
            "class_name",
            "features",
            "state_names",
            "sample_weight",
            "weighted",
            "sparent",
        ]
        return self._check_params_fit(X, y, expected_args, kwargs)
    def _build(self, _):
        class_edges = [(self.class_name_, f) for f in self.feature_names_in_]
        feature_edges = [
            (self.sparent_, f)
            for f in self.feature_names_in_
            if f != self.sparent_
        ]
        feature_edges.extend(class_edges)
        self.dag_ = DAG(feature_edges)
    def _train(self, kwargs):
        states = dict(state_names=kwargs.get("state_names", []))
        self.model_ = BayesianNetwork(self.dag_.edges(), show_progress=False)
        self.model_.fit(
            self.dataset_,
            estimator=BayesianEstimator,
            prior_type="K2",
            weighted=self.weighted_,
            **states,
        )
 class BoostAODE(ClassifierMixin, BaseEnsemble):
    def __init__(
        self,
        show_progress=False,
        random_state=None,
        estimator=None,
    ):
        self.show_progress = show_progress
        self.random_state = random_state
        super().__init__(estimator=estimator)
    def _validate_estimator(self) -> None:
        """Check the estimator and set the estimator_ attribute."""
        super()._validate_estimator(
            default=BoostSPODE(
                random_state=self.random_state,
                show_progress=self.show_progress,
            )
        )
    def fit(self, X, y, **kwargs):
        self.n_features_in_ = X.shape[1]
        self.feature_names_in_ = kwargs.get(
            "features", default_feature_names(self.n_features_in_)
        )
        self.class_name_ = kwargs.get("class_name", "class")
        self.X_ = X
        self.y_ = y
        self.n_samples_ = X.shape[0]
        self.estimators_ = []
        self._validate_estimator()
        self._train(kwargs)
        self.fitted_ = True
        # To keep compatiblity with the benchmark platform
        self.nodes_leaves = self.nodes_edges
        return self
    def version(self):
        if hasattr(self, "fitted_"):
            return self.estimator_.version()
        return SPODE(None, False).version()
    @property
    def states_(self):
        if hasattr(self, "fitted_"):
            return sum(
                [
                    len(item)
                    for model in self.estimators_
                    for _, item in model.model_.states.items()
                ]
            ) / len(self.estimators_)
        return 0
    @property
    def depth_(self):
        return self.states_
    def nodes_edges(self):
        nodes = 0
        edges = 0
        if hasattr(self, "fitted_"):
            nodes = sum([len(x.dag_) for x in self.estimators_])
            edges = sum([len(x.dag_.edges()) for x in self.estimators_])
        return nodes, edges
    def plot(self, title=""):
        warnings.simplefilter("ignore", UserWarning)
        for idx, model in enumerate(self.estimators_):
            model.plot(title=f"{idx} {title}")
    def _train(self, kwargs):
        """Build boosted SPODEs"""
        weights = [1 / self.n_samples_] * self.n_samples_
        selected_features = []
        # Step 0: Set the finish condition
        for _ in range(self.n_features_in_):
            # Step 1: Build ranking with mutual information
            features = (
                CSelectKBestWeighted(
                    self.X_, self.y_, weights, k=self.n_features_in_
                )
                .fit()
                .get_features()
            )
            # Step 1.1: Select the feature to become the sparent
            for n_feature in features:
                if n_feature not in selected_features:
                    selected_features.append(n_feature)
                    break
            feature = self.feature_names_in_[n_feature]
            # Step 2: Build & train spode with the first feature as sparent
            estimator = clone(self.estimator_)
            _args = kwargs.copy()
            _args["sparent"] = feature
            _args["sample_weight"] = weights
            _args["weighted"] = True
            # print("I'm gonna build a spode with", feature)
            # Step 2.1: build dataset
            # Step 2.2: Train the model
            estimator.fit(self.X_, self.y_, **_args)
            # Step 3: Compute errors (epsilon sub m & alpha sub m)
            # Explanation in https://medium.datadriveninvestor.com/understanding-adaboost-and-scikit-learns-algorithm-c8d8af5ace10
            y_pred = estimator.predict(self.X_)
            em = np.sum(weights * (y_pred != self.y_)) / np.sum(weights)
            am = np.log((1 - em) / em) + np.log(estimator.n_classes_ - 1)
            # Step 3.2: Update weights for next classifier
            weights = [
                wm * np.exp(am * (ym != yp))
                for wm, ym, yp in zip(weights, self.y_, y_pred)
            ]
            # Step 4: Add the new model
            self.estimators_.append(estimator)
        self.weights_ = weights
    def predict(self, X: np.ndarray) -> np.ndarray:
        n_samples = X.shape[0]
        n_estimators = len(self.estimators_)
        result = np.empty((n_samples, n_estimators))
        for index, estimator in enumerate(self.estimators_):
            result[:, index] = estimator.predict(X)
        return mode(result, axis=1, keepdims=False).mode.ravel()
--- a/bayesclass/feature_selection.py
+++ b/bayesclass/feature_selection.py
@@ -0,0 +1,93 @@
 # import numpy as np
 # from sklearn.feature_selection import mutual_info_classif
 # from sklearn.utils.validation import check_X_y, check_is_fitted
 # from sklearn.feature_selection._univariate_selection import (
 #     _BaseFilter,
 #     _clean_nans,
 # )
 # """
 # Compute the weighted mutual information between each feature and the
 # target.
 # Based on
 # Silviu Guiaşu,
 # Weighted entropy,
 # Reports on Mathematical Physics,
 # Volume 2, Issue 3,
 # 1971,
 # Pages 165-179,
 # ISSN 0034-4877,
 # https://doi.org/10.1016/0034-4877(71)90002-4.
 # (https://www.sciencedirect.com/science/article/pii/0034487771900024)
 # Abstract: Weighted entropy is the measure of information supplied by a
 # probablistic experiment whose elementary events are characterized both by their
 # objective probabilities and by some qualitative (objective or subjective)
 # weights. The properties, the axiomatics and the maximum value of the weighted
 # entropy are given.
 # """
 # class SelectKBestWeighted(_BaseFilter):
 #     def __init__(self, *, k=10):
 #         super().__init__(score_func=mutual_info_classif)
 #         self.k = k
 #     def _check_params(self, X, y):
 #         if self.k > X.shape[1] or self.k < 1:
 #             raise ValueError(
 #                 f"k must be between 1 and {X.shape[1]} got {self.k}."
 #             )
 #     def _get_support_mask(self):
 #         check_is_fitted(self)
 #         if self.k == "all":
 #             return np.ones(self.scores_.shape, dtype=bool)
 #         elif self.k == 0:
 #             return np.zeros(self.scores_.shape, dtype=bool)
 #         else:
 #             scores = _clean_nans(self.scores_)
 #             mask = np.zeros(scores.shape, dtype=bool)
 #             # Request a stable sort. Mergesort takes more memory (~40MB per
 #             # megafeature on x86-64).
 #             mask[np.argsort(scores, kind="mergesort")[-self.k :]] = 1
 #             return mask
 #     def fit(self, X, y, sample_weight):
 #         self.X_, self.y_ = check_X_y(X, y)
 #         self._check_params(X, y)
 #         self.n_features_in_ = X.shape[1]
 #         self.sample_weight_ = sample_weight
 #         # Compute the entropy of the target variable
 #         entropy_y = -np.sum(
 #             np.multiply(
 #                 np.bincount(y, weights=sample_weight),
 #                 np.log(np.bincount(y, weights=sample_weight)),
 #             )
 #         )
 #         # Compute the mutual information between each feature and the target
 #         mi = self.score_func(X, y)
 #         # Compute the weighted entropy of each feature
 #         entropy_weighted = []
 #         for i in range(X.shape[1]):
 #             # Compute the weighted frequency of each unique value of the
 #             # feature
 #             freq_weighted = np.bincount(X[:, i], weights=sample_weight)
 #             freq_weighted = freq_weighted[freq_weighted != 0]
 #             # Compute the weighted entropy of the feature
 #             entropy_weighted.append(
 #                 -np.sum(np.multiply(freq_weighted, np.log(freq_weighted)))
 #                 / np.sum(sample_weight)
 #             )
 #         # Compute the weighted mutual information between each feature and
 #         # the target
 #         mi_weighted = mi * entropy_weighted / entropy_y
 #         # Return the weighted mutual information scores
 #         self.scores_ = mi_weighted
 #         return self
--- a/bayesclass/tests/baseline_images/test_AODENew/line_dashes_AODENew-expected.png
+++ b/bayesclass/tests/baseline_images/test_AODENew/line_dashes_AODENew-expected.png
--- a/bayesclass/tests/baseline_images/test_AODENew/line_dashes_AODENew.png
+++ b/bayesclass/tests/baseline_images/test_AODENew/line_dashes_AODENew.png
--- a/bayesclass/tests/baseline_images/test_KDB/line_dashes_KDB.png
+++ b/bayesclass/tests/baseline_images/test_KDB/line_dashes_KDB.png
--- a/bayesclass/tests/baseline_images/test_KDBNew/line_dashes_KDBNew.png
+++ b/bayesclass/tests/baseline_images/test_KDBNew/line_dashes_KDBNew.png
--- a/bayesclass/tests/baseline_images/test_TANNew/line_dashes_TANNew.png
+++ b/bayesclass/tests/baseline_images/test_TANNew/line_dashes_TANNew.png
--- a/bayesclass/tests/conftest.py
+++ b/bayesclass/tests/conftest.py
@@ -0,0 +1,38 @@
 import pytest
 from sklearn.datasets import load_iris
 from fimdlp.mdlp import FImdlp
@pytest.fixture
 def iris():
    dataset = load_iris()
    X = dataset["data"]
    y = dataset["target"]
    features = dataset["feature_names"]
    # To make iris dataset has the same values as our iris.arff dataset
    patch = {(34, 3): (0.2, 0.1), (37, 1): (3.6, 3.1), (37, 2): (1.4, 1.5)}
    for key, value in patch.items():
        X[key] = value[1]
    return X, y, features
@pytest.fixture
 def data(iris):
    return iris[0], iris[1]
@pytest.fixture
 def features(iris):
    return iris[2]
@pytest.fixture
 def class_name():
    return "class"
@pytest.fixture
 def data_disc(data):
    clf = FImdlp()
    X, y = data
    return clf.fit_transform(X, y), y
--- a/bayesclass/tests/test_AODE.py
+++ b/bayesclass/tests/test_AODE.py
@@ -1,6 +1,5 @@
 import pytest
 import numpy as np
 from sklearn.datasets import load_iris
 from sklearn.preprocessing import KBinsDiscretizer
 from matplotlib.testing.decorators import image_comparison
 from matplotlib.testing.conftest import mpl_test_settings
@@ -10,28 +9,21 @@ from bayesclass.clfs import AODE
 from .._version import __version__
@pytest.fixture
 def data():
    X, y = load_iris(return_X_y=True)
    enc = KBinsDiscretizer(encode="ordinal")
    return enc.fit_transform(X), y
@pytest.fixture
 def clf():
-    return AODE()
+    return AODE(random_state=17)
-def test_AODE_default_hyperparameters(data, clf):
+def test_AODE_default_hyperparameters(data_disc, clf):
    # Test default values of hyperparameters
    assert not clf.show_progress
    assert clf.random_state is None
    clf = AODE(show_progress=True, random_state=17)
    assert clf.show_progress
    assert clf.random_state == 17
-    clf.fit(*data)
+    clf = AODE(show_progress=True)
    assert clf.show_progress
    assert clf.random_state is None
    clf.fit(*data_disc)
    assert clf.class_name_ == "class"
-    assert clf.features_ == [
+    assert clf.feature_names_in_ == [
        "feature_0",
        "feature_1",
        "feature_2",
@@ -42,50 +34,66 @@ def test_AODE_default_hyperparameters(data, clf):
@image_comparison(
    baseline_images=["line_dashes_AODE"], remove_text=True, extensions=["png"]
 )
-def test_AODE_plot(data, clf):
+def test_AODE_plot(data_disc, features, clf):
    # mpl_test_settings will automatically clean these internal side effects
    mpl_test_settings
-    dataset = load_iris(as_frame=True)
+    clf.fit(*data_disc, features=features)
    clf.fit(*data, features=dataset["feature_names"])
    clf.plot("AODE Iris")
-def test_AODE_version(clf):
+def test_AODE_version(clf, features, data_disc):
    """Check AODE version."""
    assert __version__ == clf.version()
    clf.fit(*data_disc, features=features)
    assert __version__ == clf.version()
-def test_AODE_nodes_leaves(clf):
+def test_AODE_nodes_edges(clf, data_disc):
-    assert clf.nodes_leaves() == (0, 0)
+    assert clf.nodes_edges() == (0, 0)
    clf.fit(*data_disc)
    assert clf.nodes_leaves() == (20, 28)
-def test_AODE_classifier(data, clf):
+def test_AODE_states(clf, data_disc):
-    clf.fit(*data)
+    assert clf.states_ == 0
-    attribs = ["classes_", "X_", "y_", "features_", "class_name_"]
+    clf.fit(*data_disc)
    assert clf.states_ == 19
    assert clf.depth_ == clf.states_
 def test_AODE_classifier(data_disc, clf):
    clf.fit(*data_disc)
    attribs = [
        "feature_names_in_",
        "class_name_",
        "n_features_in_",
        "X_",
        "y_",
    ]
    for attr in attribs:
        assert hasattr(clf, attr)
-    X = data[0]
+    X = data_disc[0]
-    y = data[1]
+    y = data_disc[1]
    y_pred = clf.predict(X)
    assert y_pred.shape == (X.shape[0],)
-    assert sum(y == y_pred) == 147
+    assert sum(y == y_pred) == 146
-def test_AODE_wrong_num_features(data, clf):
+def test_AODE_wrong_num_features(data_disc, clf):
    with pytest.raises(
        ValueError,
        match="Number of features does not match the number of columns in X",
    ):
-        clf.fit(*data, features=["feature_1", "feature_2"])
+        clf.fit(*data_disc, features=["feature_1", "feature_2"])
-def test_AODE_wrong_hyperparam(data, clf):
+def test_AODE_wrong_hyperparam(data_disc, clf):
    with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
-        clf.fit(*data, wrong_param="wrong_param")
+        clf.fit(*data_disc, wrong_param="wrong_param")
-def test_AODE_error_size_predict(data, clf):
+def test_AODE_error_size_predict(data_disc, clf):
-    X, y = data
+    X, y = data_disc
    clf.fit(X, y)
    with pytest.raises(ValueError):
        X_diff_size = np.ones((10, X.shape[1] + 1))
--- a/bayesclass/tests/test_AODENew.py
+++ b/bayesclass/tests/test_AODENew.py
@@ -0,0 +1,123 @@
 import pytest
 import numpy as np
 from matplotlib.testing.decorators import image_comparison
 from matplotlib.testing.conftest import mpl_test_settings
 from bayesclass.clfs import AODENew
 from .._version import __version__
@pytest.fixture
 def clf():
    return AODENew(random_state=17)
 def test_AODENew_default_hyperparameters(data, clf):
    # Test default values of hyperparameters
    assert not clf.show_progress
    assert clf.random_state == 17
    clf = AODENew(show_progress=True)
    assert clf.show_progress
    assert clf.random_state is None
    clf.fit(*data)
    assert clf.class_name_ == "class"
    assert clf.feature_names_in_ == [
        "feature_0",
        "feature_1",
        "feature_2",
        "feature_3",
    ]
@image_comparison(
    baseline_images=["line_dashes_AODENew"],
    remove_text=True,
    extensions=["png"],
 )
 def test_AODENew_plot(data, features, clf):
    # mpl_test_settings will automatically clean these internal side effects
    mpl_test_settings
    clf.fit(*data, features=features)
    clf.plot("AODE Iris")
 def test_AODENew_version(clf, data):
    """Check AODENew version."""
    assert __version__ == clf.version()
    clf.fit(*data)
    assert __version__ == clf.version()
 def test_AODENew_nodes_edges(clf, data):
    assert clf.nodes_edges() == (0, 0)
    clf.fit(*data)
    assert clf.nodes_leaves() == (20, 28)
 def test_AODENew_states(clf, data):
    assert clf.states_ == 0
    clf.fit(*data)
    assert clf.states_ == 17.75
    assert clf.depth_ == clf.states_
 def test_AODENew_classifier(data, clf):
    clf.fit(*data)
    attribs = [
        "feature_names_in_",
        "class_name_",
        "n_features_in_",
        "X_",
        "y_",
    ]
    for attr in attribs:
        assert hasattr(clf, attr)
    X = data[0]
    y = data[1]
    y_pred = clf.predict(X)
    assert y_pred.shape == (X.shape[0],)
    assert sum(y == y_pred) == 146
 def test_AODENew_local_discretization(clf, data_disc):
    expected_data = [
        [-1, [0, -1], [0, -1], [0, -1]],
        [[1, -1], -1, [1, -1], [1, -1]],
        [[2, -1], [2, -1], -1, [2, -1]],
        [[3, -1], [3, -1], [3, -1], -1],
    ]
    clf.fit(*data_disc)
    for idx, estimator in enumerate(clf.estimators_):
        expected = expected_data[idx]
        for feature in range(4):
            computed = estimator.discretizer_.target_[feature]
            if type(computed) == list:
                for j, k in zip(expected[feature], computed):
                    assert j == k
            else:
                assert (
                    expected[feature]
                    == estimator.discretizer_.target_[feature]
                )
 def test_AODENew_wrong_num_features(data, clf):
    with pytest.raises(
        ValueError,
        match="Number of features does not match the number of columns in X",
    ):
        clf.fit(*data, features=["feature_1", "feature_2"])
 def test_AODENew_wrong_hyperparam(data, clf):
    with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
        clf.fit(*data, wrong_param="wrong_param")
 def test_AODENew_error_size_predict(data, clf):
    X, y = data
    clf.fit(X, y)
    with pytest.raises(ValueError):
        X_diff_size = np.ones((10, X.shape[1] + 1))
        clf.predict(X_diff_size)
--- a/bayesclass/tests/test_BoostAODE.py
+++ b/bayesclass/tests/test_BoostAODE.py
@@ -0,0 +1,100 @@
 import pytest
 import numpy as np
 from sklearn.preprocessing import KBinsDiscretizer
 from matplotlib.testing.decorators import image_comparison
 from matplotlib.testing.conftest import mpl_test_settings
 from bayesclass.clfs import BoostAODE
 from .._version import __version__
@pytest.fixture
 def clf():
    return BoostAODE(random_state=17)
 def test_BoostAODE_default_hyperparameters(data_disc, clf):
    # Test default values of hyperparameters
    assert not clf.show_progress
    assert clf.random_state == 17
    clf = BoostAODE(show_progress=True)
    assert clf.show_progress
    assert clf.random_state is None
    clf.fit(*data_disc)
    assert clf.class_name_ == "class"
    assert clf.feature_names_in_ == [
        "feature_0",
        "feature_1",
        "feature_2",
        "feature_3",
    ]
 # @image_comparison(
 #     baseline_images=["line_dashes_AODE"], remove_text=True, extensions=["png"]
 # )
 # def test_BoostAODE_plot(data_disc, features, clf):
 #     # mpl_test_settings will automatically clean these internal side effects
 #     mpl_test_settings
 #     clf.fit(*data_disc, features=features)
 #     clf.plot("AODE Iris")
 # def test_BoostAODE_version(clf, features, data_disc):
 #     """Check AODE version."""
 #     assert __version__ == clf.version()
 #     clf.fit(*data_disc, features=features)
 #     assert __version__ == clf.version()
 # def test_BoostAODE_nodes_edges(clf, data_disc):
 #     assert clf.nodes_edges() == (0, 0)
 #     clf.fit(*data_disc)
 #     assert clf.nodes_leaves() == (20, 28)
 # def test_BoostAODE_states(clf, data_disc):
 #     assert clf.states_ == 0
 #     clf.fit(*data_disc)
 #     assert clf.states_ == 19
 #     assert clf.depth_ == clf.states_
 # def test_BoostAODE_classifier(data_disc, clf):
 #     clf.fit(*data_disc)
 #     attribs = [
 #         "feature_names_in_",
 #         "class_name_",
 #         "n_features_in_",
 #         "X_",
 #         "y_",
 #     ]
 #     for attr in attribs:
 #         assert hasattr(clf, attr)
 #     X = data_disc[0]
 #     y = data_disc[1]
 #     y_pred = clf.predict(X)
 #     assert y_pred.shape == (X.shape[0],)
 #     assert sum(y == y_pred) == 146
 # def test_BoostAODE_wrong_num_features(data_disc, clf):
 #     with pytest.raises(
 #         ValueError,
 #         match="Number of features does not match the number of columns in X",
 #     ):
 #         clf.fit(*data_disc, features=["feature_1", "feature_2"])
 # def test_BoostAODE_wrong_hyperparam(data_disc, clf):
 #     with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
 #         clf.fit(*data_disc, wrong_param="wrong_param")
 # def test_BoostAODE_error_size_predict(data_disc, clf):
 #     X, y = data_disc
 #     clf.fit(X, y)
 #     with pytest.raises(ValueError):
 #         X_diff_size = np.ones((10, X.shape[1] + 1))
 #         clf.predict(X_diff_size)
--- a/bayesclass/tests/test_KDB.py
+++ b/bayesclass/tests/test_KDB.py
@@ -1,28 +1,21 @@
 import pytest
 import numpy as np
 from sklearn.datasets import load_iris
 from sklearn.preprocessing import KBinsDiscretizer
 from matplotlib.testing.decorators import image_comparison
 from matplotlib.testing.conftest import mpl_test_settings
 from pgmpy.models import BayesianNetwork
 from bayesclass.clfs import KDB
 from .._version import __version__
@pytest.fixture
 def data():
    X, y = load_iris(return_X_y=True)
    enc = KBinsDiscretizer(encode="ordinal")
    return enc.fit_transform(X), y
@pytest.fixture
 def clf():
-    return KDB(k=3)
+    return KDB(k=3, show_progress=False)
-def test_KDB_default_hyperparameters(data, clf):
+def test_KDB_default_hyperparameters(data_disc, clf):
    # Test default values of hyperparameters
    assert not clf.show_progress
    assert clf.random_state is None
@@ -31,9 +24,9 @@ def test_KDB_default_hyperparameters(data, clf):
    assert clf.show_progress
    assert clf.random_state == 17
    assert clf.k == 3
-    clf.fit(*data)
+    clf.fit(*data_disc)
    assert clf.class_name_ == "class"
-    assert clf.features_ == [
+    assert clf.feature_names_in_ == [
        "feature_0",
        "feature_1",
        "feature_2",
@@ -46,49 +39,85 @@ def test_KDB_version(clf):
    assert __version__ == clf.version()
-def test_KDB_nodes_leaves(clf):
+def test_KDB_nodes_edges(clf, data_disc):
-    assert clf.nodes_leaves() == (0, 0)
+    assert clf.nodes_edges() == (0, 0)
    clf.fit(*data_disc)
    assert clf.nodes_leaves() == (5, 9)
-def test_KDB_classifier(data, clf):
+def test_KDB_states(clf, data_disc):
-    clf.fit(*data)
+    assert clf.states_ == 0
-    attribs = ["classes_", "X_", "y_", "features_", "class_name_"]
+    clf.fit(*data_disc)
    assert clf.states_ == 19
    assert clf.depth_ == clf.states_
 def test_KDB_classifier(data_disc, clf):
    clf.fit(*data_disc)
    attribs = ["classes_", "X_", "y_", "feature_names_in_", "class_name_"]
    for attr in attribs:
        assert hasattr(clf, attr)
-    X = data[0]
+    X = data_disc[0]
-    y = data[1]
+    y = data_disc[1]
    y_pred = clf.predict(X)
    assert y_pred.shape == (X.shape[0],)
-    assert sum(y == y_pred) == 148
+    assert sum(y == y_pred) == 146
 def test_KDB_classifier_weighted(data_disc, clf):
    sample_weight = [1] * data_disc[0].shape[0]
    sample_weight[:50] = [0] * 50
    clf.fit(*data_disc, sample_weight=sample_weight, weighted=True)
    assert clf.score(*data_disc) == 0.64
@image_comparison(
    baseline_images=["line_dashes_KDB"], remove_text=True, extensions=["png"]
 )
-def test_KDB_plot(data, clf):
+def test_KDB_plot(data_disc, features, clf):
    # mpl_test_settings will automatically clean these internal side effects
    mpl_test_settings
-    dataset = load_iris(as_frame=True)
+    clf.fit(*data_disc, features=features)
    clf.fit(*data, features=dataset["feature_names"])
    clf.plot("KDB Iris")
-def test_KDB_wrong_num_features(data, clf):
+def test_KDB_wrong_num_features(data_disc, clf):
    with pytest.raises(
        ValueError,
        match="Number of features does not match the number of columns in X",
    ):
-        clf.fit(*data, features=["feature_1", "feature_2"])
+        clf.fit(*data_disc, features=["feature_1", "feature_2"])
-def test_KDB_wrong_hyperparam(data, clf):
+def test_KDB_wrong_hyperparam(data_disc, clf):
    with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
-        clf.fit(*data, wrong_param="wrong_param")
+        clf.fit(*data_disc, wrong_param="wrong_param")
-def test_KDB_error_size_predict(data, clf):
+def test_KDB_error_size_predict(data_disc, clf):
-    X, y = data
+    X, y = data_disc
    clf.fit(X, y)
    with pytest.raises(ValueError):
        X_diff_size = np.ones((10, X.shape[1] + 1))
        clf.predict(X_diff_size)
 def test_KDB_dont_do_cycles():
    clf = KDB(k=4)
    dag = BayesianNetwork(show_progress=False)
    clf.feature_names_in_ = [
        "feature_0",
        "feature_1",
        "feature_2",
        "feature_3",
    ]
    nodes = list(range(4))
    weights = np.ones((4, 4))
    for idx in range(1, 4):
        dag.add_edge(clf.feature_names_in_[0], clf.feature_names_in_[idx])
    dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[2])
    dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[3])
    dag.add_edge(clf.feature_names_in_[2], clf.feature_names_in_[3])
    for idx in range(4):
        clf._add_m_edges(dag, idx, nodes, weights)
        assert len(dag.edges()) == 6
--- a/bayesclass/tests/test_KDBNew.py
+++ b/bayesclass/tests/test_KDBNew.py
@@ -0,0 +1,132 @@
 import pytest
 import numpy as np
 from matplotlib.testing.decorators import image_comparison
 from matplotlib.testing.conftest import mpl_test_settings
 from pgmpy.models import BayesianNetwork
 from bayesclass.clfs import KDBNew
 from .._version import __version__
@pytest.fixture
 def clf():
    return KDBNew(k=3, show_progress=False)
 def test_KDBNew_default_hyperparameters(data, clf):
    # Test default values of hyperparameters
    assert not clf.show_progress
    assert clf.random_state is None
    assert clf.theta == 0.03
    clf = KDBNew(show_progress=True, random_state=17, k=3)
    assert clf.show_progress
    assert clf.random_state == 17
    assert clf.k == 3
    clf.fit(*data)
    assert clf.class_name_ == "class"
    assert clf.feature_names_in_ == [
        "feature_0",
        "feature_1",
        "feature_2",
        "feature_3",
    ]
 def test_KDBNew_version(clf):
    """Check KDBNew version."""
    assert __version__ == clf.version()
 def test_KDBNew_nodes_edges(clf, data):
    assert clf.nodes_edges() == (0, 0)
    clf.fit(*data)
    assert clf.nodes_leaves() == (5, 9)
 def test_KDBNew_states(clf, data):
    assert clf.states_ == 0
    clf.fit(*data)
    assert clf.states_ == 22
    assert clf.depth_ == clf.states_
 def test_KDBNew_classifier(data, clf):
    clf.fit(*data)
    attribs = ["classes_", "X_", "y_", "feature_names_in_", "class_name_"]
    for attr in attribs:
        assert hasattr(clf, attr)
    X = data[0]
    y = data[1]
    y_pred = clf.predict(X)
    assert y_pred.shape == (X.shape[0],)
    assert sum(y == y_pred) == 145
 def test_KDBNew_local_discretization(clf, data):
    expected = [[1, -1], -1, [0, 1, 3, -1], [1, -1]]
    clf.fit(*data)
    for feature in range(4):
        computed = clf.estimator_.discretizer_.target_[feature]
        if type(computed) == list:
            for j, k in zip(expected[feature], computed):
                assert j == k
        else:
            assert (
                expected[feature]
                == clf.estimator_.discretizer_.target_[feature]
            )
@image_comparison(
    baseline_images=["line_dashes_KDBNew"],
    remove_text=True,
    extensions=["png"],
 )
 def test_KDBNew_plot(data, features, class_name, clf):
    # mpl_test_settings will automatically clean these internal side effects
    mpl_test_settings
    clf.fit(*data, features=features, class_name=class_name)
    clf.plot("KDBNew Iris")
 def test_KDBNew_wrong_num_features(data, clf):
    with pytest.raises(
        ValueError,
        match="Number of features does not match the number of columns in X",
    ):
        clf.fit(*data, features=["feature_1", "feature_2"])
 def test_KDBNew_wrong_hyperparam(data, clf):
    with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
        clf.fit(*data, wrong_param="wrong_param")
 def test_KDBNew_error_size_predict(data, clf):
    X, y = data
    clf.fit(X, y)
    with pytest.raises(ValueError):
        X_diff_size = np.ones((10, X.shape[1] + 1))
        clf.predict(X_diff_size)
 def test_KDBNew_dont_do_cycles():
    clf = KDBNew(k=4)
    dag = BayesianNetwork(show_progress=False)
    clf.feature_names_in_ = [
        "feature_0",
        "feature_1",
        "feature_2",
        "feature_3",
    ]
    nodes = list(range(4))
    weights = np.ones((4, 4))
    for idx in range(1, 4):
        dag.add_edge(clf.feature_names_in_[0], clf.feature_names_in_[idx])
    dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[2])
    dag.add_edge(clf.feature_names_in_[1], clf.feature_names_in_[3])
    dag.add_edge(clf.feature_names_in_[2], clf.feature_names_in_[3])
    for idx in range(4):
        clf._add_m_edges(dag, idx, nodes, weights)
        assert len(dag.edges()) == 6
--- a/bayesclass/tests/test_TAN.py
+++ b/bayesclass/tests/test_TAN.py
@@ -1,7 +1,5 @@
 import pytest
 import numpy as np
 from sklearn.datasets import load_iris
 from sklearn.preprocessing import KBinsDiscretizer
 from matplotlib.testing.decorators import image_comparison
 from matplotlib.testing.conftest import mpl_test_settings
@@ -10,29 +8,22 @@ from bayesclass.clfs import TAN
 from .._version import __version__
@pytest.fixture
 def data():
    X, y = load_iris(return_X_y=True)
    enc = KBinsDiscretizer(encode="ordinal")
    return enc.fit_transform(X), y
@pytest.fixture
 def clf():
-    return TAN()
+    return TAN(random_state=17, show_progress=False)
-def test_TAN_default_hyperparameters(data, clf):
+def test_TAN_default_hyperparameters(data_disc, clf):
    # Test default values of hyperparameters
    assert not clf.show_progress
    assert clf.random_state is None
    clf = TAN(show_progress=True, random_state=17)
    assert clf.show_progress
    assert clf.random_state == 17
-    clf.fit(*data)
+    clf = TAN(show_progress=True)
    assert clf.show_progress
    assert clf.random_state is None
    clf.fit(*data_disc)
    assert clf.head_ == 0
    assert clf.class_name_ == "class"
-    assert clf.features_ == [
+    assert clf.feature_names_in_ == [
        "feature_0",
        "feature_1",
        "feature_2",
@@ -45,59 +36,73 @@ def test_TAN_version(clf):
    assert __version__ == clf.version()
-def test_TAN_nodes_leaves(clf):
+def test_TAN_nodes_edges(clf, data_disc):
-    assert clf.nodes_leaves() == (0, 0)
+    assert clf.nodes_edges() == (0, 0)
    clf.fit(*data_disc, head="random")
    assert clf.nodes_leaves() == (5, 7)
-def test_TAN_random_head(data):
+def test_TAN_states(clf, data_disc):
-    clf = TAN(random_state=17)
+    assert clf.states_ == 0
-    clf.fit(*data, head="random")
+    clf.fit(*data_disc)
    assert clf.states_ == 19
    assert clf.depth_ == clf.states_
 def test_TAN_random_head(clf, data_disc):
    clf.fit(*data_disc, head="random")
    assert clf.head_ == 3
-def test_TAN_classifier(data, clf):
+def test_TAN_classifier(data_disc, clf):
-    clf.fit(*data)
+    clf.fit(*data_disc)
-    attribs = ["classes_", "X_", "y_", "head_", "features_", "class_name_"]
+    attribs = [
        "classes_",
        "X_",
        "y_",
        "head_",
        "feature_names_in_",
        "class_name_",
    ]
    for attr in attribs:
        assert hasattr(clf, attr)
-    X = data[0]
+    X = data_disc[0]
-    y = data[1]
+    y = data_disc[1]
    y_pred = clf.predict(X)
    assert y_pred.shape == (X.shape[0],)
-    assert sum(y == y_pred) == 147
+    assert sum(y == y_pred) == 146
@image_comparison(
    baseline_images=["line_dashes_TAN"], remove_text=True, extensions=["png"]
 )
-def test_TAN_plot(data, clf):
+def test_TAN_plot(data_disc, features, clf):
    # mpl_test_settings will automatically clean these internal side effects
    mpl_test_settings
-    dataset = load_iris(as_frame=True)
+    clf.fit(*data_disc, features=features, head=0)
    clf.fit(*data, features=dataset["feature_names"], head=0)
    clf.plot("TAN Iris head=0")
-def test_KDB_wrong_num_features(data, clf):
+def test_TAN_wrong_num_features(data_disc, clf):
    with pytest.raises(
        ValueError,
        match="Number of features does not match the number of columns in X",
    ):
-        clf.fit(*data, features=["feature_1", "feature_2"])
+        clf.fit(*data_disc, features=["feature_1", "feature_2"])
-def test_TAN_wrong_hyperparam(data, clf):
+def test_TAN_wrong_hyperparam(data_disc, clf):
    with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
-        clf.fit(*data, wrong_param="wrong_param")
+        clf.fit(*data_disc, wrong_param="wrong_param")
-def test_TAN_head_out_of_range(data, clf):
+def test_TAN_head_out_of_range(data_disc, clf):
    with pytest.raises(ValueError, match="Head index out of range"):
-        clf.fit(*data, head=4)
+        clf.fit(*data_disc, head=4)
-def test_TAN_error_size_predict(data, clf):
+def test_TAN_error_size_predict(data_disc, clf):
-    X, y = data
+    X, y = data_disc
    clf.fit(X, y)
    with pytest.raises(ValueError):
        X_diff_size = np.ones((10, X.shape[1] + 1))
--- a/bayesclass/tests/test_TANNew.py
+++ b/bayesclass/tests/test_TANNew.py
@@ -0,0 +1,120 @@
 import pytest
 import numpy as np
 from matplotlib.testing.decorators import image_comparison
 from matplotlib.testing.conftest import mpl_test_settings
 from bayesclass.clfs import TANNew
 from .._version import __version__
@pytest.fixture
 def clf():
    return TANNew(random_state=17)
 def test_TANNew_default_hyperparameters(data, clf):
    # Test default values of hyperparameters
    assert not clf.show_progress
    assert clf.random_state == 17
    clf = TANNew(show_progress=True)
    assert clf.show_progress
    assert clf.random_state is None
    clf.fit(*data)
    assert clf.head_ == 0
    assert clf.class_name_ == "class"
    assert clf.feature_names_in_ == [
        "feature_0",
        "feature_1",
        "feature_2",
        "feature_3",
    ]
 def test_TANNew_version(clf):
    """Check TANNew version."""
    assert __version__ == clf.version()
 def test_TANNew_nodes_edges(clf, data):
    assert clf.nodes_edges() == (0, 0)
    clf.fit(*data, head="random")
    assert clf.nodes_leaves() == (5, 7)
 def test_TANNew_states(clf, data):
    assert clf.states_ == 0
    clf.fit(*data)
    assert clf.states_ == 18
    assert clf.depth_ == clf.states_
 def test_TANNew_random_head(clf, data):
    clf.fit(*data, head="random")
    assert clf.head_ == 3
 def test_TANNew_local_discretization(clf, data):
    expected = [-1, [0, -1], [0, -1], [1, -1]]
    clf.fit(*data)
    for feature in range(4):
        assert (
            expected[feature] == clf.estimator_.discretizer_.target_[feature]
        )
 def test_TANNew_classifier(data, clf):
    clf.fit(*data)
    attribs = [
        "classes_",
        "X_",
        "y_",
        "head_",
        "feature_names_in_",
        "class_name_",
    ]
    for attr in attribs:
        assert hasattr(clf, attr)
    X = data[0]
    y = data[1]
    y_pred = clf.predict(X)
    assert y_pred.shape == (X.shape[0],)
    assert sum(y == y_pred) == 146
@image_comparison(
    baseline_images=["line_dashes_TANNew"],
    remove_text=True,
    extensions=["png"],
 )
 def test_TANNew_plot(data, features, clf):
    # mpl_test_settings will automatically clean these internal side effects
    mpl_test_settings
    clf.fit(*data, features=features, head=0)
    clf.plot("TANNew Iris head=0")
 def test_TANNew_wrong_num_features(data, clf):
    with pytest.raises(
        ValueError,
        match="Number of features does not match the number of columns in X",
    ):
        clf.fit(*data, features=["feature_1", "feature_2"])
 def test_TANNew_wrong_hyperparam(data, clf):
    with pytest.raises(ValueError, match="Unexpected argument: wrong_param"):
        clf.fit(*data, wrong_param="wrong_param")
 def test_TANNew_head_out_of_range(data, clf):
    with pytest.raises(ValueError, match="Head index out of range"):
        clf.fit(*data, head=4)
 def test_TANNew_error_size_predict(data, clf):
    X, y = data
    clf.fit(X, y)
    with pytest.raises(ValueError):
        X_diff_size = np.ones((10, X.shape[1] + 1))
        clf.predict(X_diff_size)
--- a/bayesclass/tests/test_common.py
+++ b/bayesclass/tests/test_common.py
@@ -1,14 +1,29 @@
 import pytest
 import numpy as np
 from sklearn.utils.estimator_checks import check_estimator
-from bayesclass.clfs import TAN, KDB, AODE
+from bayesclass.clfs import BayesBase, TAN, KDB, AODE
-@pytest.mark.parametrize("estimator", [TAN(), KDB(k=2), AODE()])
+def test_more_tags():
-# @pytest.mark.parametrize("estimator", [AODE()])
+    expected = {
-def test_all_estimators(estimator):
+        "requires_positive_X": True,
        "requires_positive_y": True,
        "preserve_dtype": [np.int32, np.int64],
        "requires_y": True,
    }
    clf = BayesBase(None, True)
    computed = clf._more_tags()
    for key, value in expected.items():
        assert key in computed
        assert computed[key] == value
 # @pytest.mark.parametrize("estimators", [TAN(), KDB(k=2), AODE()])
@pytest.mark.parametrize("estimators", [AODE()])
 def test_all_estimators(estimators):
    i = 0
-    for estimator, test in check_estimator(estimator, generate_only=True):
+    for estimator, test in check_estimator(estimators, generate_only=True):
        print(i := i + 1, test)
        # test(estimator)
--- a/patch_pgmpy_0.1.22.diff
+++ b/patch_pgmpy_0.1.22.diff
@@ -0,0 +1,32 @@
 diff --git a/pgmpy/models/BayesianNetwork.py b/pgmpy/models/BayesianNetwork.py
 index bd90122d..70ae38f7 100644
 --- a/pgmpy/models/BayesianNetwork.py
 +++ b/pgmpy/models/BayesianNetwork.py
@@ -27,7 +27,7 @@ class BayesianNetwork(DAG):
     Base class for Bayesian Models.
     """
 -    def __init__(self, ebunch=None, latents=set()):
 +    def __init__(self, ebunch=None, latents=set(), show_progress=False):
         """
         Initializes a Bayesian Model.
         A models stores nodes and edges with conditional probability
@@ -95,6 +95,7 @@ class BayesianNetwork(DAG):
         >>> len(G)  # number of nodes in graph
         3
         """
 +        self.show_progress = show_progress
         super(BayesianNetwork, self).__init__(ebunch=ebunch, latents=latents)
         self.cpds = []
         self.cardinalities = defaultdict(int)
@@ -738,7 +739,9 @@ class BayesianNetwork(DAG):
                     show_progress=False,
                 )
                 for index, data_point in tqdm(
 -                    data_unique.iterrows(), total=data_unique.shape[0]
 +                    data_unique.iterrows(),
 +                    total=data_unique.shape[0],
 +                    disable=not self.show_progress,
                 )
             )
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools", "setuptools-scm", "wheel"]
+requires = ["setuptools", "setuptools-scm", "cython", "wheel", "torch"]
 build-backend = "setuptools.build_meta"
 [tool.setuptools]
@@ -25,6 +25,7 @@ dependencies = [
  "pgmpy",
  "networkx",
  "matplotlib",
  "fimdlp",
 ]
 requires-python = ">=3.8"
 classifiers = [
@@ -38,9 +39,7 @@ classifiers = [
  "Operating System :: OS Independent",
  "Programming Language :: Python",
  "Programming Language :: Python",
-  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.11",
  "Programming Language :: Python :: 3.9",
  "Programming Language :: Python :: 3.10",
 ]
 [project.optional-dependencies]
@@ -60,7 +59,7 @@ show_missing = true
 [tool.black]
 line-length = 79
-target_version = ['py38', 'py39', 'py310']
+target_version = ['py311']
 include = '\.pyi?$'
 exclude = '''
 /(
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 numpy
 scipy
 pandas
 scikit-learn
 matplotlib
 networkx
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,41 @@
 """
    Calling
    $python setup.py build_ext --inplace
    will build the extension library in the current file.
 """
 from setuptools import Extension, setup
 from torch.utils.cpp_extension import (
    BuildExtension,
    CppExtension,
    include_paths,
 )
 setup(
    ext_modules=[
        Extension(
            name="bayesclass.cppSelectFeatures",
            sources=[
                "bayesclass/cSelectFeatures.pyx",
                "bayesclass/FeatureSelect.cpp",
            ],
            language="c++",
            include_dirs=["bayesclass"],
            extra_compile_args=[
                "-std=c++17",
            ],
        ),
        CppExtension(
            name="bayesclass.BayesNet",
            sources=[
                "bayesclass/BayesNetwork.pyx",
                "bayesclass/Network.cc",
                "bayesclass/Node.cc",
                "bayesclass/Metrics.cc",
            ],
            include_dirs=include_paths(),
        ),
    ],
    cmdclass={"build_ext": BuildExtension},
 )
Author	SHA1	Message	Date
Ricardo Montañana	d1cafc230b	Fix some small mistakes	2023-07-13 17:11:08 +02:00
Ricardo Montañana	99083ceede	Fix KDB algorithm argsort	2023-07-13 16:59:37 +02:00
Ricardo Montañana	64f1500176	Refactor cpp library methods	2023-07-12 12:59:02 +02:00
Ricardo Montañana	aef22306ef	Complete refactor of KDB with BayesNet library	2023-07-12 12:07:01 +02:00
Ricardo Montañana	2ff38f73e7	refactor conditionalEdgeWeights	2023-07-12 11:20:05 +02:00
Ricardo Montañana	1af3edd050	Adding Metrics	2023-07-12 03:24:40 +02:00
Ricardo Montañana	8b6624e08a	Add getStates	2023-07-11 21:28:29 +02:00
Ricardo Montañana	36cc875615	Refator kdb with new BayesNetwork	2023-07-08 10:40:33 +02:00
Ricardo Montañana	260997c872	transpose dimensions of X in BayesNetwork	2023-07-08 01:13:29 +02:00
Ricardo Montañana	8a9c86a22d	Update BayesNetwork class	2023-07-08 00:39:10 +02:00
Ricardo Montañana	4bad5ccfee	Complete integration with BayesNet	2023-07-07 19:19:52 +02:00
Ricardo Montañana	5866e19fae	Add predict_proba	2023-07-07 00:36:14 +02:00
Ricardo Montañana	61e4c176eb	First try to link with bayesnet	2023-07-07 00:23:47 +02:00
Ricardo Montañana	ea473fc604	First complete boostAODE	2023-06-26 10:09:28 +02:00
Ricardo Montañana	9d7e787f6c	Finish cppSelectFeatures	2023-06-23 20:07:26 +02:00
Ricardo Montañana	d7425e5af0	Remove unneeded small value added to logs	2023-06-23 01:25:23 +02:00
Ricardo Montañana	30cc744033	Chcked mutual_info with sklearn	2023-06-23 01:21:24 +02:00
Ricardo Montañana	0094d500d4	Begin cython structure	2023-06-22 17:56:34 +02:00
Ricardo Montañana	99321043ec	Complete feature_selection with weighted entropy	2023-06-21 16:40:29 +02:00
Ricardo Montañana	fbaa5eb7d3	continue feature selection	2023-06-21 14:42:33 +02:00
Ricardo Montañana	0b27d9d9b0	Begin implementation	2023-06-21 11:27:14 +02:00
Ricardo Montañana	212f7e5584	Add test_BoostAODE	2023-06-18 16:51:38 +02:00
Ricardo Montañana	a797381c00	Continue BootAODE	2023-06-17 17:06:37 +02:00
Ricardo Montañana	3812d271e5	Add BoostAODE initial model	2023-06-15 14:28:35 +02:00
Ricardo Montañana	923a06b3be	Patch pgmpy 0.1.22 show_progress	2023-06-15 14:22:24 +02:00
Ricardo Montañana	c906d6a361	Add weights to KDB classifier	2023-06-15 14:13:15 +02:00
Ricardo Montañana Gómez	f0f7c43944	Merge pull request #3 from Doctorado-ML/localdiscretization Localdiscretization	2023-05-15 11:42:52 +02:00
Ricardo Montañana	f9b35f61f0	Use ancest-order to process local discretization Fix local discretization Refactor tests Unifiy iris dataset from sklearn with iris.arff	2023-04-20 01:20:33 +02:00
Ricardo Montañana	74cd8a6aa2	Add local discretization tests	2023-04-08 11:44:25 +02:00
Ricardo Montañana	9843f5f8db	Refactor AODE & AODENew	2023-04-07 16:22:40 +02:00
Ricardo Montañana	c6390d9da9	Comment out the integrity check in Proposal	2023-03-30 23:23:23 +02:00
Ricardo Montañana	c9afafbf60	Fix AODENew tests	2023-03-30 21:03:42 +02:00
Ricardo Montañana	3af05c9511	First AODENew implementation working	2023-03-30 12:20:56 +02:00
Ricardo Montañana	80b1ab3699	Refactor AODE	2023-03-29 19:05:55 +02:00
Ricardo Montañana	5a772b0bca	Begin AODENew with tests	2023-03-29 11:18:42 +02:00
Ricardo Montañana	ea251aca05	Begin AODE implementation	2023-03-23 22:15:38 +01:00
Ricardo Montañana	7b66097728	Add messages to check_integrity	2023-03-23 22:10:03 +01:00
Ricardo Montañana	ea8c5b805e	Add KDBNew and TANNew tests	2023-03-23 14:13:01 +01:00
Ricardo Montañana	2ffc06b232	Update feature states setting for datasets	2023-02-13 17:34:15 +01:00
Ricardo Montañana	a5244f1c7f	remove trace messages for first try	2023-02-12 11:25:40 +01:00
Ricardo Montañana	42ac57eb79	Continue with New estimators	2023-02-07 18:02:35 +01:00
Ricardo Montañana	63a2feef3a	Begin refactorization of new estimators	2023-02-07 09:42:42 +01:00
Ricardo Montañana	3e049ac89d	default_features_class_name	2023-02-05 20:18:44 +01:00
Ricardo Montañana	2a6547c71d	Complete KDBNew	2023-02-05 00:30:25 +01:00
Ricardo Montañana	de45a94c9b	Add KDBNew estimator	2023-02-04 17:39:32 +01:00
Ricardo Montañana	9019b878f0	docs: 📝 Add text comment to KDB algorithm	2023-02-01 23:42:32 +01:00
Ricardo Montañana	bba9255605	Merge branch 'localdiscretization' of github.com:/doctorado-ml/bayesclass into localdiscretization	2023-02-01 23:41:40 +01:00
Ricardo Montañana	41ca6fad5e	fix: 🐛 Change exit condition in KDB add_m_edges method Change test if every conditional weight is less or equal to zero for less or equal to theta Add text comments to KDB algorithm	2023-02-01 23:40:42 +01:00
Ricardo Montañana	c88591dd64	fix: 🐛 Change exit condition in KDB add_m_edges method Change test if every conditional weight is less or equal to zero for less or equal to theta	2023-02-01 23:33:05 +01:00
Ricardo Montañana	8089e4fd57	docs: 📝 shorten comment lines length to <80	2023-01-30 19:27:27 +01:00
Ricardo Montañana	6f9488f281	Add version command to Makefile	2023-01-28 18:51:55 +01:00
Ricardo Montañana	e837c6cef7	feat: Add feature_names_in_ to classifiers	2023-01-27 19:25:01 +01:00
Ricardo Montañana	a4edc74e8d	Replace len(self.features_) by self.n_features_in_	2023-01-27 12:34:34 +01:00
Ricardo Montañana Gómez	4d416959ad	fix: 🐛 Fix depth_ property as an alias of states_	2023-01-22 14:15:19 +01:00
Ricardo Montañana Gómez	bdd3f483d9	feat: 🧐 Add nodes, edges and states info to models	2023-01-22 14:01:54 +01:00
Ricardo Montañana Gómez	8fd796155d	test: 🧪 Add cycle test in KDB to get 100% coverage	2023-01-17 11:33:55 +01:00
Ricardo Montañana Gómez	d08aea4681	fix AODE state_names mistake	2023-01-12 14:05:27 +01:00
Ricardo Montañana Gómez	dd2e0a3b7e	Update state_names hyperparameter to fit tests Add computed nodes to classifiers	2023-01-12 12:04:54 +01:00
Ricardo Montañana	65d41488cb	Fix AODE state_names	2022-12-29 00:45:10 +01:00
Ricardo Montañana	e7300366ca	Add fit_params to model fit	2022-12-28 19:15:34 +01:00