Tests XSpode & XBAODE

2025-03-12 13:46:04 +01:00
parent 71b05cc1a7
commit 3bdb14bd65
12 changed files with 450 additions and 644 deletions
--- a/bayesnet/classifiers/XSPODE.cc
+++ b/bayesnet/classifiers/XSPODE.cc
@@ -3,14 +3,14 @@
 // SPDX-FileType: SOURCE
 // SPDX-License-Identifier: MIT
 // ***************************************************************
-#include "XSPODE.h"
-#include "bayesnet/utils/TensorUtils.h"
 #include <algorithm>
 #include <cmath>
 #include <limits>
 #include <numeric>
 #include <sstream>
 #include <stdexcept>
+#include "XSPODE.h"
+#include "bayesnet/utils/TensorUtils.h"

 namespace bayesnet {

@@ -35,7 +35,7 @@ namespace bayesnet {
    Classifier::setHyperparameters(hyperparameters);
  }

-  void XSpode::fit(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
+  void XSpode::fitx(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
  {
    m = X.size(1);
    n = X.size(0);
@@ -390,9 +390,8 @@ namespace bayesnet {
  }
  int XSpode::getNumberOfEdges() const
  {
-    return nFeatures_ * (2 * nFeatures_ - 1);
+    return 2 * nFeatures_ + 1;
  }
-  std::vector<int>& XSpode::getStates() { return states_; }

  // ------------------------------------------------------
  // Predict overrides (classifier interface)
--- a/bayesnet/classifiers/XSPODE.h
+++ b/bayesnet/classifiers/XSPODE.h
@@ -29,7 +29,7 @@ namespace bayesnet {
        int getClassNumStates() const override;
        std::vector<int>& getStates();
        std::vector<std::string> graph(const std::string& title) const override { return std::vector<std::string>({ title }); }
-        void fit(torch::Tensor& X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing);
+        void fitx(torch::Tensor& X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing);
        void setHyperparameters(const nlohmann::json& hyperparameters_) override;

        //
--- a/bayesnet/ensembles/Ensemble.cc
+++ b/bayesnet/ensembles/Ensemble.cc
@@ -85,6 +85,7 @@ namespace bayesnet {
        torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
        for (auto i = 0; i < n_models; ++i) {
            auto ypredict = models[i]->predict_proba(X);
+            /*std::cout << "model " << i << " prediction: " << ypredict << " significance " << significanceModels[i] << std::endl;*/
            y_pred += ypredict * significanceModels[i];
        }
        auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
@@ -193,4 +194,4 @@ namespace bayesnet {
        }
        return nstates;
    }
-}
+}
--- a/bayesnet/ensembles/WA2DE.cc
+++ b/bayesnet/ensembles/WA2DE.cc
@@ -1,267 +0,0 @@
-// ***************************************************************
-// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
-// SPDX-FileType: SOURCE
-// SPDX-License-Identifier: MIT
-// ***************************************************************
-#include "WA2DE.h"
-namespace bayesnet {
-    WA2DE::WA2DE(bool predict_voting)
-        : num_classes_(0), num_attributes_(0), total_count_(0.0), weighted_a2de_(false), smoothing_factor_(1.0)
-    {
-        validHyperparameters = { "predict_voting" };
-        std::cout << "WA2DE classifier created.\n";
-    }
-
-    void bayesnet::WA2DE::setHyperparameters(const nlohmann::json& hyperparameters_)
-    {
-        auto hyperparameters = hyperparameters_;
-        if (hyperparameters.contains("predict_voting")) {
-            predict_voting = hyperparameters["predict_voting"];
-            hyperparameters.erase("predict_voting");
-        }
-        Classifier::setHyperparameters(hyperparameters);
-    }
-
-
-    void WA2DE::buildModel(const torch::Tensor& weights)
-    {
-        for (int c = 0; c < num_classes_; ++c) {
-            class_counts_[c] += 1e-4; // Laplace smoothing
-        }
-        for (int a = 0; a < num_attributes_; ++a) {
-            for (int v = 0; v < attribute_cardinalities_[a]; ++v) {
-                for (int c = 0; c < num_classes_; ++c) {
-                    freq_attr_class_[a][v][c] =
-                        (freq_attr_class_[a][v][c] + 1.0) / (class_counts_[c] + attribute_cardinalities_[a]);
-                }
-            }
-        }
-
-        for (int sp = 0; sp < num_attributes_; ++sp) {
-            for (int spv = 0; spv < attribute_cardinalities_[sp]; ++spv) {
-                for (int ch = 0; ch < num_attributes_; ++ch) {
-                    if (sp != ch) {
-                        for (int chv = 0; chv < attribute_cardinalities_[ch]; ++chv) {
-                            for (int c = 0; c < num_classes_; ++c) {
-                                freq_pair_class_[sp][spv][ch][chv][c] =
-                                    (freq_pair_class_[sp][spv][ch][chv][c] + 1.0) /
-                                    (class_counts_[c] + attribute_cardinalities_[sp] * attribute_cardinalities_[ch]);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        std::cout << "Model probabilities computed.\n";
-    }
-    void WA2DE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
-    {
-        auto data = dataset.clone();
-        auto labels = data[-1];
-        // Remove class row from data
-        data = data.index({ at::indexing::Slice(0, -1) });
-        std::cout << "Training A2DE model...\n";
-        std::cout << "Data: " << data.sizes() << std::endl;
-        std::cout << "Labels: " << labels.sizes() << std::endl;
-        std::cout << std::string(80, '-') << std::endl;
-        if (data.dim() != 2 || labels.dim() != 1) {
-            throw std::invalid_argument("Invalid input dimensions.");
-        }
-        num_attributes_ = data.size(0);
-        num_classes_ = labels.max().item<int>() + 1;
-        total_count_ = data.size(1);
-        std::cout << "Number of attributes: " << num_attributes_ << std::endl;
-        std::cout << "Number of classes: " << num_classes_ << std::endl;
-        std::cout << "Total count: " << total_count_ << std::endl;
-
-        // Compute cardinalities
-        attribute_cardinalities_.clear();
-        for (int i = 0; i < num_attributes_; ++i) {
-            attribute_cardinalities_.push_back(data[i].max().item<int>() + 1);
-        }
-        std::cout << "Attribute cardinalities: ";
-        for (int i = 0; i < num_attributes_; ++i) {
-            std::cout << attribute_cardinalities_[i] << " ";
-        }
-        std::cout << std::endl;
-        // output the map of states
-        std::cout << "States: ";
-        for (int i = 0; i < states.size() - 1; i++) {
-            std::cout << features[i] << " " << states[features[i]].size() << std::endl;
-        }
-
-        // Resize storage
-        class_counts_.resize(num_classes_, 0.0);
-        freq_attr_class_.resize(num_attributes_);
-        freq_pair_class_.resize(num_attributes_);
-
-        for (int i = 0; i < num_attributes_; ++i) {
-            freq_attr_class_[i].resize(attribute_cardinalities_[i], std::vector<double>(num_classes_, 0.0));
-            freq_pair_class_[i].resize(attribute_cardinalities_[i]); // Ensure first level exists
-            for (int j = 0; j < attribute_cardinalities_[i]; ++j) {
-                freq_pair_class_[i][j].resize(num_attributes_); // Ensure second level exists
-                for (int k = 0; k < num_attributes_; ++k) {
-                    if (i != k) {
-                        freq_pair_class_[i][j][k].resize(attribute_cardinalities_[k]); // Ensure third level exists
-                        for (int l = 0; l < attribute_cardinalities_[k]; ++l) {
-                            freq_pair_class_[i][j][k][l].resize(num_classes_, 0.0); // Finally, initialize with 0.0
-                        }
-                    }
-                }
-            }
-        }
-        // Count frequencies
-        auto data_cpu = data.to(torch::kCPU);
-        auto labels_cpu = labels.to(torch::kCPU);
-        int32_t* data_ptr = data_cpu.data_ptr<int32_t>();
-        int32_t* labels_ptr = labels_cpu.data_ptr<int32_t>();
-
-        for (int i = 0; i < total_count_; ++i) {
-            int class_label = labels_ptr[i];
-            class_counts_[class_label] += 1.0;
-
-            std::vector<int> attr_values(num_attributes_);
-            for (int a = 0; a < num_attributes_; ++a) {
-                attr_values[a] = toIntValue(a, data_ptr[i * num_attributes_ + a]);
-                freq_attr_class_[a][attr_values[a]][class_label] += 1.0;
-            }
-
-            // Pairwise counts
-            for (int sp = 0; sp < num_attributes_; ++sp) {
-                for (int ch = 0; ch < num_attributes_; ++ch) {
-                    if (sp != ch) {
-                        freq_pair_class_[sp][attr_values[sp]][ch][attr_values[ch]][class_label] += 1.0;
-                    }
-                }
-            }
-        }
-        std::cout << "Verifying Frequency Counts:\n";
-        for (int c = 0; c < num_classes_; ++c) {
-            std::cout << "Class " << c << " Count: " << class_counts_[c] << std::endl;
-        }
-
-        for (int a = 0; a < num_attributes_; ++a) {
-            for (int v = 0; v < attribute_cardinalities_[a]; ++v) {
-                std::cout << "P(A[" << a << "]=" << v << "|C): ";
-                for (int c = 0; c < num_classes_; ++c) {
-                    std::cout << freq_attr_class_[a][v][c] << " ";
-                }
-                std::cout << std::endl;
-            }
-        }
-
-    }
-
-    torch::Tensor WA2DE::computeProbabilities(const torch::Tensor& data) const
-    {
-        int M = data.size(1);
-        auto output = torch::zeros({ M, num_classes_ }, torch::kF64);
-
-        auto data_cpu = data.to(torch::kCPU);
-        int32_t* data_ptr = data_cpu.data_ptr<int32_t>();
-
-        for (int i = 0; i < M; ++i) {
-            std::vector<int> attr_values(num_attributes_);
-            for (int a = 0; a < num_attributes_; ++a) {
-                attr_values[a] = toIntValue(a, data_ptr[i * num_attributes_ + a]);
-            }
-
-            std::vector<double> log_prob(num_classes_, 0.0);
-            for (int c = 0; c < num_classes_; ++c) {
-                log_prob[c] = std::log((class_counts_[c] + smoothing_factor_) / (total_count_ + num_classes_ * smoothing_factor_));
-
-                double sum_log = 0.0;
-                for (int sp = 0; sp < num_attributes_; ++sp) {
-                    double sp_log = log_prob[c];
-                    for (int ch = 0; ch < num_attributes_; ++ch) {
-                        if (sp == ch) continue;
-                        double num = freq_pair_class_[sp][attr_values[sp]][ch][attr_values[ch]][c] + smoothing_factor_;
-                        double denom = class_counts_[c] + attribute_cardinalities_[sp] * attribute_cardinalities_[ch] * smoothing_factor_;
-                        sp_log += std::log(num / denom);
-                    }
-                    sum_log += std::exp(sp_log);
-                }
-                log_prob[c] = std::log(sum_log / num_attributes_);
-            }
-
-            double max_log = *std::max_element(log_prob.begin(), log_prob.end());
-            double sum_exp = 0.0;
-            for (int c = 0; c < num_classes_; ++c) {
-                sum_exp += std::exp(log_prob[c] - max_log);
-            }
-            double log_sum_exp = max_log + std::log(sum_exp);
-
-            for (int c = 0; c < num_classes_; ++c) {
-                output[i][c] = std::exp(log_prob[c] - log_sum_exp);
-            }
-        }
-
-        return output.to(torch::kF32);
-    }
-    int WA2DE::toIntValue(int attributeIndex, float value) const
-    {
-        int v = static_cast<int>(value);
-        return std::max(0, std::min(v, attribute_cardinalities_[attributeIndex] - 1));
-    }
-    torch::Tensor WA2DE::AODEConditionalProb(const torch::Tensor& data)
-    {
-        int M = data.size(1);  // Number of test samples
-        torch::Tensor output = torch::zeros({ M, num_classes_ }, torch::kF32);
-
-        auto data_cpu = data.to(torch::kCPU);
-        int32_t* data_ptr = data_cpu.data_ptr<int32_t>();
-
-        for (int i = 0; i < M; ++i) {
-            std::vector<int> attr_values(num_attributes_);
-            for (int a = 0; a < num_attributes_; ++a) {
-                attr_values[a] = toIntValue(a, data_ptr[i * num_attributes_ + a]);
-            }
-
-            std::vector<double> log_prob(num_classes_, 0.0);
-            for (int c = 0; c < num_classes_; ++c) {
-                log_prob[c] = std::log(class_counts_[c] / total_count_);
-
-                double sum_log = 0.0;
-                for (int sp = 0; sp < num_attributes_; ++sp) {
-                    double sp_log = log_prob[c];
-                    for (int ch = 0; ch < num_attributes_; ++ch) {
-                        if (sp == ch) continue;
-                        double prob = freq_pair_class_[sp][attr_values[sp]][ch][attr_values[ch]][c];
-                        sp_log += std::log(prob);
-                    }
-                    sum_log += std::exp(sp_log);
-                }
-                log_prob[c] = std::log(sum_log / num_attributes_);
-            }
-
-            double max_log = *std::max_element(log_prob.begin(), log_prob.end());
-            double sum_exp = 0.0;
-            for (int c = 0; c < num_classes_; ++c) {
-                sum_exp += std::exp(log_prob[c] - max_log);
-            }
-            double log_sum_exp = max_log + std::log(sum_exp);
-
-            for (int c = 0; c < num_classes_; ++c) {
-                output[i][c] = std::exp(log_prob[c] - log_sum_exp);
-            }
-        }
-
-        return output;
-    }
-
-    double WA2DE::score(const torch::Tensor& X, const torch::Tensor& y)
-    {
-        torch::Tensor preds = AODEConditionalProb(X);
-        torch::Tensor pred_labels = preds.argmax(1);
-
-        auto correct = pred_labels.eq(y).sum().item<int>();
-        auto total = y.size(0);
-
-        return static_cast<double>(correct) / total;
-    }
-
-    std::vector<std::string> WA2DE::graph(const std::string& title) const
-    {
-        return { title, "Graph visualization not implemented." };
-    }
-}
--- a/bayesnet/ensembles/WA2DE.h
+++ b/bayesnet/ensembles/WA2DE.h
@@ -1,52 +0,0 @@
-// ***************************************************************
-// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
-// SPDX-FileType: SOURCE
-// SPDX-License-Identifier: MIT
-// ***************************************************************
-#ifndef WA2DE_H
-#define WA2DE_H
-#include "Ensemble.h"
-#include <torch/torch.h>
-#include <vector>
-#include <map>
-#include <nlohmann/json.hpp>
-namespace bayesnet {
-    /**
-     * Geoffrey I. Webb's A2DE (Averaged 2-Dependence Estimators) classifier
-     * Implements the A2DE algorithm as an ensemble of SPODE models.
-     */
-    class WA2DE : public Ensemble {
-    public:
-        explicit WA2DE(bool predict_voting = false);
-        virtual ~WA2DE() {};
-
-        // Override method to set hyperparameters
-        void setHyperparameters(const nlohmann::json& hyperparameters) override;
-
-        // Graph visualization function
-        std::vector<std::string> graph(const std::string& title = "A2DE") const override;
-        torch::Tensor computeProbabilities(const torch::Tensor& data) const;
-        double score(const torch::Tensor& X, const torch::Tensor& y);
-    protected:
-        // Model-building function
-        void buildModel(const torch::Tensor& weights) override;
-        void trainModel(const torch::Tensor& data, const Smoothing_t smoothing) override;
-    private:
-        int num_classes_;                // Number of classes
-        int num_attributes_;             // Number of attributes
-        std::vector<int> attribute_cardinalities_; // Cardinalities of attributes
-
-        // Frequency counts (similar to Java implementation)
-        std::vector<double> class_counts_;  // Class frequency
-        std::vector<std::vector<std::vector<double>>> freq_attr_class_; // P(A | C)
-        std::vector<std::vector<std::vector<std::vector<std::vector<double>>>>> freq_pair_class_; // P(A_i, A_j | C)
-
-        double total_count_; // Total instance count
-
-        bool weighted_a2de_; // Whether to use weighted A2DE
-        double smoothing_factor_; // Smoothing parameter (default: Laplace)
-        torch::Tensor AODEConditionalProb(const torch::Tensor& data);
-        int toIntValue(int attributeIndex, float value) const;
-    };
-}
-#endif
--- a/bayesnet/ensembles/XBAODE.cc
+++ b/bayesnet/ensembles/XBAODE.cc
@@ -3,183 +3,200 @@
 // SPDX-FileType: SOURCE
 // SPDX-License-Identifier: MIT
 // ***************************************************************
-#include <random> 
-#include <set>
-#include <functional>
-#include <limits.h>
-#include <tuple>
 #include "XBAODE.h"
 #include "bayesnet/classifiers/XSPODE.h"
 #include "bayesnet/utils/TensorUtils.h"
+#include <limits.h>
+#include <random>
+#include <tuple>

 namespace bayesnet {
-    XBAODE::XBAODE()
-    {
-        validHyperparameters = { "alpha_block", "order", "convergence", "convergence_best", "bisection", "threshold", "maxTolerance",
-            "predict_voting", "select_features" };
-    }
-    void XBAODE::add_model(std::unique_ptr<Classifier> model, double significance)
-    {
-        models.push_back(std::move(model));
-        n_models++;
-        significanceModels.push_back(significance);
-    }
-    void XBAODE::remove_last_model()
-    {
-        models.pop_back();
-        significanceModels.pop_back();
-        n_models--;
-    }
-    std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing)
-    {
-        torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
-        std::vector<int> featuresSelected = featureSelection(weights_);
-        for (const int& feature : featuresSelected) {
-            std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
-            // model->fit(dataset, features, className, states, weights_, smoothing);
-            dynamic_cast<XSpode*>(model.get())->fit(X_train, y_train, weights_, smoothing);
-            add_model(std::move(model), 1.0);
-        }
-        notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
-        return featuresSelected;
-    }
-    void XBAODE::trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
-    {
-        X_train_ = TensorUtils::to_matrix(X_train);
-        y_train_ = TensorUtils::to_vector<int>(y_train);
-        X_test_ = TensorUtils::to_matrix(X_test);
-        y_test_ = TensorUtils::to_vector<int>(y_test);
-        significanceModels.resize(n, 0.0); // n initialized in Classifier.cc
-        fitted = true;
-        double alpha_t;
-        torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
-        bool finished = false;
-        std::vector<int> featuresUsed;
-        n_models = 0;
-        if (selectFeatures) {
-            featuresUsed = initializeModels(smoothing);
-            std::cout << "features used: " << featuresUsed.size() << std::endl;
-            auto ypred = predict(X_train_);
-            auto ypred_t = torch::tensor(ypred);
-            std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
-            // Update significance of the models
-            for (const int& feature : featuresUsed) {
-                significanceModels.pop_back();
-            }
-            for (const int& feature : featuresUsed) {
-                significanceModels.push_back(alpha_t);
-            }
-            // VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models);
-            if (finished) {
-                return;
-            }
-        }
-        int numItemsPack = 0; // The counter of the models inserted in the current pack
-        // Variables to control the accuracy finish condition
-        double priorAccuracy = 0.0;
-        double improvement = 1.0;
-        double convergence_threshold = 1e-4;
-        int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
-        // Step 0: Set the finish condition
-        // epsilon sub t > 0.5 => inverse the weights_ policy
-        // validation error is not decreasing
-        // run out of features
-        bool ascending = order_algorithm == bayesnet::Orders.ASC;
-        std::mt19937 g{ 173 };
-        while (!finished) {
-            // Step 1: Build ranking with mutual information
-            auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
-            if (order_algorithm == bayesnet::Orders.RAND) {
-                std::shuffle(featureSelection.begin(), featureSelection.end(), g);
-            }
-            // Remove used features
-            featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(), [&](auto x)
-                { return std::find(featuresUsed.begin(), featuresUsed.end(), x) != featuresUsed.end();}),
-                featureSelection.end()
-            );
-            int k = bisection ? pow(2, tolerance) : 1;
-            int counter = 0; // The model counter of the current pack
-            // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
-            while (counter++ < k && featureSelection.size() > 0) {
-                auto feature = featureSelection[0];
-                featureSelection.erase(featureSelection.begin());
-                std::unique_ptr<Classifier> model;
-                model = std::make_unique<XSpode>(feature);
-                dynamic_cast<XSpode*>(model.get())->fit(X_train, y_train, weights_, smoothing); // using exclusive XSpode fit method
-                // DEBUG
-                std::cout << "Model fitted." << std::endl;
-                std::cout << dynamic_cast<XSpode*>(model.get())->to_string() << std::endl;
-                // DEBUG
-                std::vector<int> ypred;
-                if (alpha_block) {
-                    //
-                    // Compute the prediction with the current ensemble + model
-                    //
-                    // Add the model to the ensemble
-                    add_model(std::move(model), 1.0);
-                    // Compute the prediction
-                    ypred = predict(X_train_);
-                    // Remove the model from the ensemble
-                    significanceModels.pop_back();
-                    remove_last_model();
-                } else {
-                    ypred = model->predict(X_train_);
-                }
-                // Step 3.1: Compute the classifier amout of say
-                auto ypred_t = torch::tensor(ypred);
-                std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
-                // Step 3.4: Store classifier and its accuracy to weigh its future vote
-                numItemsPack++;
-                featuresUsed.push_back(feature);
-                add_model(std::move(model), alpha_t);
-                // VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size());
-            } // End of the pack
-            if (convergence && !finished) {
-                auto y_val_predict = predict(X_test);
-                double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
-                if (priorAccuracy == 0) {
-                    priorAccuracy = accuracy;
-                } else {
-                    improvement = accuracy - priorAccuracy;
-                }
-                if (improvement < convergence_threshold) {
-                    // VLOG_SCOPE_F(3, "  (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
-                    tolerance++;
-                } else {
-                    // VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
-                    tolerance = 0; // Reset the counter if the model performs better
-                    numItemsPack = 0;
-                }
-                if (convergence_best) {
-                    // Keep the best accuracy until now as the prior accuracy
-                    priorAccuracy = std::max(accuracy, priorAccuracy);
-                } else {
-                    // Keep the last accuray obtained as the prior accuracy
-                    priorAccuracy = accuracy;
-                }
-            }
-            // VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
-            finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
-        }
-        if (tolerance > maxTolerance) {
-            if (numItemsPack < n_models) {
-                notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
-                // VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
-                for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) {
-                    remove_last_model();
-                    significanceModels[featuresUsed[i]] = 0.0;
-                }
-                // VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features used.", n_models, featuresUsed.size());
-            } else {
-                notes.push_back("Convergence threshold reached & 0 models eliminated");
-                // VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
-            }
-        }
-        if (featuresUsed.size() != features.size()) {
-            notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
-            status = bayesnet::WARNING;
-        }
-        notes.push_back("Number of models: " + std::to_string(n_models));
-        return;
-    }
+XBAODE::XBAODE() : Boost(false) {
+  validHyperparameters = {
+      "alpha_block",      "order",          "convergence",
+      "convergence_best", "bisection",      "threshold",
+      "maxTolerance",     "predict_voting", "select_features"};
 }
+void XBAODE::add_model(std::unique_ptr<Classifier> model, double significance) {
+  models.push_back(std::move(model));
+  n_models++;
+  significanceModels.push_back(significance);
+}
+void XBAODE::remove_last_model() {
+  models.pop_back();
+  significanceModels.pop_back();
+  n_models--;
+}
+std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing) {
+  torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
+  std::vector<int> featuresSelected = featureSelection(weights_);
+  for (const int &feature : featuresSelected) {
+    std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
+    model->fit(dataset, features, className, states, weights_, smoothing);
+    add_model(std::move(model), 1.0);
+  }
+  notes.push_back("Used features in initialization: " +
+                  std::to_string(featuresSelected.size()) + " of " +
+                  std::to_string(features.size()) + " with " +
+                  select_features_algorithm);
+  return featuresSelected;
+}
+void XBAODE::trainModel(const torch::Tensor &weights,
+                        const bayesnet::Smoothing_t smoothing) {
+  X_train_ = TensorUtils::to_matrix(X_train);
+  y_train_ = TensorUtils::to_vector<int>(y_train);
+  X_test_ = TensorUtils::to_matrix(X_test);
+  y_test_ = TensorUtils::to_vector<int>(y_test);
+  fitted = true;
+  double alpha_t;
+  torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
+  bool finished = false;
+  std::vector<int> featuresUsed;
+  n_models = 0;
+  if (selectFeatures) {
+    featuresUsed = initializeModels(smoothing);
+    auto ypred = predict(X_train_);
+    auto ypred_t = torch::tensor(ypred);
+    std::tie(weights_, alpha_t, finished) =
+        update_weights(y_train, ypred_t, weights_);
+    // Update significance of the models
+    for (const int &feature : featuresUsed) {
+      significanceModels.pop_back();
+    }
+    for (const int &feature : featuresUsed) {
+      significanceModels.push_back(alpha_t);
+    }
+    // VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t,
+    // n_models);
+    if (finished) {
+      return;
+    }
+  }
+  int numItemsPack =
+      0; // The counter of the models inserted in the current pack
+  // Variables to control the accuracy finish condition
+  double priorAccuracy = 0.0;
+  double improvement = 1.0;
+  double convergence_threshold = 1e-4;
+  int tolerance =
+      0; // number of times the accuracy is lower than the convergence_threshold
+  // Step 0: Set the finish condition
+  // epsilon sub t > 0.5 => inverse the weights_ policy
+  // validation error is not decreasing
+  // run out of features
+  bool ascending = order_algorithm == bayesnet::Orders.ASC;
+  std::mt19937 g{173};
+  while (!finished) {
+    // Step 1: Build ranking with mutual information
+    auto featureSelection = metrics.SelectKBestWeighted(
+        weights_, ascending, n); // Get all the features sorted
+    if (order_algorithm == bayesnet::Orders.RAND) {
+      std::shuffle(featureSelection.begin(), featureSelection.end(), g);
+    }
+    // Remove used features
+    featureSelection.erase(
+        remove_if(featureSelection.begin(), featureSelection.end(), [&](auto x) {
+                    return std::find(featuresUsed.begin(), featuresUsed.end(), x) != featuresUsed.end();
+                  }),
+        featureSelection.end());
+    int k = bisection ? pow(2, tolerance) : 1;
+    int counter = 0; // The model counter of the current pack
+    // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k,
+    // featureSelection.size());
+    while (counter++ < k && featureSelection.size() > 0) {
+      auto feature = featureSelection[0];
+      featureSelection.erase(featureSelection.begin());
+      std::unique_ptr<Classifier> model;
+      model = std::make_unique<XSpode>(feature);
+      model->fit(dataset, features, className, states, weights_, smoothing);
+      /*dynamic_cast<XSpode*>(model.get())->fitx(X_train, y_train, weights_,
+       * smoothing); // using exclusive XSpode fit method*/
+      // DEBUG
+      /*std::cout << dynamic_cast<XSpode*>(model.get())->to_string() <<
+       * std::endl;*/
+      // DEBUG
+      std::vector<int> ypred;
+      if (alpha_block) {
+        //
+        // Compute the prediction with the current ensemble + model
+        //
+        // Add the model to the ensemble
+        add_model(std::move(model), 1.0);
+        // Compute the prediction
+        ypred = predict(X_train_);
+        // Remove the model from the ensemble
+        remove_last_model();
+      } else {
+        ypred = model->predict(X_train_);
+      }
+      // Step 3.1: Compute the classifier amout of say
+      auto ypred_t = torch::tensor(ypred);
+      std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
+      // Step 3.4: Store classifier and its accuracy to weigh its future vote
+      numItemsPack++;
+      featuresUsed.push_back(feature);
+      add_model(std::move(model), alpha_t);
+      // VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d
+      // featuresUsed: %zu", finished, numItemsPack, n_models,
+      // featuresUsed.size());
+    } // End of the pack
+    if (convergence && !finished) {
+      auto y_val_predict = predict(X_test);
+      double accuracy = (y_val_predict == y_test).sum().item<double>() /
+                        (double)y_test.size(0);
+      if (priorAccuracy == 0) {
+        priorAccuracy = accuracy;
+      } else {
+        improvement = accuracy - priorAccuracy;
+      }
+      if (improvement < convergence_threshold) {
+        // VLOG_SCOPE_F(3, "  (improvement<threshold) tolerance: %d
+        // numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
+        // numItemsPack, improvement, priorAccuracy, accuracy);
+        tolerance++;
+      } else {
+        // VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d
+        // numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
+        // numItemsPack, improvement, priorAccuracy, accuracy);
+        tolerance = 0; // Reset the counter if the model performs better
+        numItemsPack = 0;
+      }
+      if (convergence_best) {
+        // Keep the best accuracy until now as the prior accuracy
+        priorAccuracy = std::max(accuracy, priorAccuracy);
+      } else {
+        // Keep the last accuray obtained as the prior accuracy
+        priorAccuracy = accuracy;
+      }
+    }
+    // VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size:
+    // %zu", tolerance, featuresUsed.size(), features.size());
+    finished = finished || tolerance > maxTolerance ||
+               featuresUsed.size() == features.size();
+  }
+  if (tolerance > maxTolerance) {
+    if (numItemsPack < n_models) {
+      notes.push_back("Convergence threshold reached & " +
+                      std::to_string(numItemsPack) + " models eliminated");
+      // VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated
+      // of %d", numItemsPack, n_models);
+      for (int i = featuresUsed.size() - 1;
+           i >= featuresUsed.size() - numItemsPack; --i) {
+        remove_last_model();
+      }
+      // VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features
+      // used.", n_models, featuresUsed.size());
+    } else {
+      notes.push_back("Convergence threshold reached & 0 models eliminated");
+      // VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated
+      // n_models=%d numItemsPack=%d", n_models, numItemsPack);
+    }
+  }
+  if (featuresUsed.size() != features.size()) {
+    notes.push_back( "Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
+    status = bayesnet::WARNING;
+  }
+  notes.push_back("Number of models: " + std::to_string(n_models));
+  return;
+}
+} // namespace bayesnet