Reformat source

2025-03-22 10:31:54 +01:00
parent bf08b0de89
commit 306d3a4b55
2 changed files with 39 additions and 69 deletions
--- a/bayesnet/ensembles/BoostAODE.cc
+++ b/bayesnet/ensembles/BoostAODE.cc
@@ -4,25 +4,26 @@
 // SPDX-License-Identifier: MIT
 // ***************************************************************

-#include <random> 
-#include <set>
-#include <limits.h>
-#include <tuple>
 #include "BoostAODE.h"
 #include "bayesnet/classifiers/SPODE.h"
-#include <loguru.hpp>
+#include <limits.h>
 #include <loguru.cpp>
+#include <loguru.hpp>
+#include <random>
+#include <set>
+#include <tuple>

-namespace bayesnet {
+namespace bayesnet
+{

    BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting)
    {
    }
    std::vector<int> BoostAODE::initializeModels(const Smoothing_t smoothing)
    {
-        torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
+        torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
        std::vector<int> featuresSelected = featureSelection(weights_);
-        for (const int& feature : featuresSelected) {
+        for (const int &feature : featuresSelected) {
            std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
            model->fit(dataset, features, className, states, weights_, smoothing);
            models.push_back(std::move(model));
@@ -32,7 +33,7 @@ namespace bayesnet {
        notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
        return featuresSelected;
    }
-    void BoostAODE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
+    void BoostAODE::trainModel(const torch::Tensor &weights, const Smoothing_t smoothing)
    {
        //
        // Logging setup
@@ -45,7 +46,7 @@ namespace bayesnet {
        // as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
        fitted = true;
        double alpha_t = 0;
-        torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
+        torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
        bool finished = false;
        std::vector<int> featuresUsed;
        n_models = 0;
@@ -73,7 +74,7 @@ namespace bayesnet {
        // validation error is not decreasing
        // run out of features
        bool ascending = order_algorithm == Orders.ASC;
-        std::mt19937 g{ 173 };
+        std::mt19937 g{173};
        while (!finished) {
            // Step 1: Build ranking with mutual information
            auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
@@ -81,10 +82,8 @@ namespace bayesnet {
                std::shuffle(featureSelection.begin(), featureSelection.end(), g);
            }
            // Remove used features
-            featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x)
-                { return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
-                end(featureSelection)
-            );
+            featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x) { return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed); }),
+                                   end(featureSelection));
            int k = bisection ? pow(2, tolerance) : 1;
            int counter = 0; // The model counter of the current pack
            // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
@@ -176,7 +175,7 @@ namespace bayesnet {
        }
        notes.push_back("Number of models: " + std::to_string(n_models));
    }
-    std::vector<std::string> BoostAODE::graph(const std::string& title) const
+    std::vector<std::string> BoostAODE::graph(const std::string &title) const
    {
        return Ensemble::graph(title);
    }
--- a/bayesnet/ensembles/XBAODE.cc
+++ b/bayesnet/ensembles/XBAODE.cc
@@ -17,8 +17,7 @@ namespace bayesnet
    {
        torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
        std::vector<int> featuresSelected = featureSelection(weights_);
-        for (const int &feature : featuresSelected)
-        {
+        for (const int &feature : featuresSelected) {
            std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
            model->fit(dataset, features, className, states, weights_, smoothing);
            add_model(std::move(model), 1.0);
@@ -31,8 +30,7 @@ namespace bayesnet
    {
        X_train_ = TensorUtils::to_matrix(X_train);
        y_train_ = TensorUtils::to_vector<int>(y_train);
-        if (convergence)
-        {
+        if (convergence) {
            X_test_ = TensorUtils::to_matrix(X_test);
            y_test_ = TensorUtils::to_vector<int>(y_test);
        }
@@ -42,25 +40,21 @@ namespace bayesnet
        bool finished = false;
        std::vector<int> featuresUsed;
        n_models = 0;
-        if (selectFeatures)
-        {
+        if (selectFeatures) {
            featuresUsed = initializeModels(smoothing);
            auto ypred = predict(X_train_);
            auto ypred_t = torch::tensor(ypred);
            std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
            // Update significance of the models
-            for (const int &feature : featuresUsed)
-            {
+            for (const int &feature : featuresUsed) {
                significanceModels.pop_back();
            }
-            for (const int &feature : featuresUsed)
-            {
+            for (const int &feature : featuresUsed) {
                significanceModels.push_back(alpha_t);
            }
            // VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t,
            // n_models);
-            if (finished)
-            {
+            if (finished) {
                return;
            }
        }
@@ -76,18 +70,15 @@ namespace bayesnet
        // run out of features
        bool ascending = order_algorithm == bayesnet::Orders.ASC;
        std::mt19937 g{173};
-        while (!finished)
-        {
+        while (!finished) {
            // Step 1: Build ranking with mutual information
            auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
-            if (order_algorithm == bayesnet::Orders.RAND)
-            {
+            if (order_algorithm == bayesnet::Orders.RAND) {
                std::shuffle(featureSelection.begin(), featureSelection.end(), g);
            }
            // Remove used features
            featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(),
-                                             [&](auto x)
-                                             {
+                                             [&](auto x) {
                                                 return std::find(featuresUsed.begin(), featuresUsed.end(), x) !=
                                                        featuresUsed.end();
                                             }),
@@ -96,8 +87,7 @@ namespace bayesnet
            int counter = 0; // The model counter of the current pack
            // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k,
            // featureSelection.size());
-            while (counter++ < k && featureSelection.size() > 0)
-            {
+            while (counter++ < k && featureSelection.size() > 0) {
                auto feature = featureSelection[0];
                featureSelection.erase(featureSelection.begin());
                std::unique_ptr<Classifier> model;
@@ -110,8 +100,7 @@ namespace bayesnet
                 * std::endl;*/
                // DEBUG
                std::vector<int> ypred;
-                if (alpha_block)
-                {
+                if (alpha_block) {
                    //
                    // Compute the prediction with the current ensemble + model
                    //
@@ -122,9 +111,7 @@ namespace bayesnet
                    model = std::move(models.back());
                    // Remove the model from the ensemble
                    remove_last_model();
-                }
-                else
-                {
+                } else {
                    ypred = model->predict(X_train_);
                }
                // Step 3.1: Compute the classifier amout of say
@@ -138,40 +125,30 @@ namespace bayesnet
                // featuresUsed: %zu", finished, numItemsPack, n_models,
                // featuresUsed.size());
            } // End of the pack
-            if (convergence && !finished)
-            {
+            if (convergence && !finished) {
                auto y_val_predict = predict(X_test);
                double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
-                if (priorAccuracy == 0)
-                {
+                if (priorAccuracy == 0) {
                    priorAccuracy = accuracy;
-                }
-                else
-                {
+                } else {
                    improvement = accuracy - priorAccuracy;
                }
-                if (improvement < convergence_threshold)
-                {
+                if (improvement < convergence_threshold) {
                    // VLOG_SCOPE_F(3, "  (improvement<threshold) tolerance: %d
                    // numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
                    // numItemsPack, improvement, priorAccuracy, accuracy);
                    tolerance++;
-                }
-                else
-                {
+                } else {
                    // VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d
                    // numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
                    // numItemsPack, improvement, priorAccuracy, accuracy);
                    tolerance = 0; // Reset the counter if the model performs better
                    numItemsPack = 0;
                }
-                if (convergence_best)
-                {
+                if (convergence_best) {
                    // Keep the best accuracy until now as the prior accuracy
                    priorAccuracy = std::max(accuracy, priorAccuracy);
-                }
-                else
-                {
+                } else {
                    // Keep the last accuray obtained as the prior accuracy
                    priorAccuracy = accuracy;
                }
@@ -180,29 +157,23 @@ namespace bayesnet
            // %zu", tolerance, featuresUsed.size(), features.size());
            finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
        }
-        if (tolerance > maxTolerance)
-        {
-            if (numItemsPack < n_models)
-            {
+        if (tolerance > maxTolerance) {
+            if (numItemsPack < n_models) {
                notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
                // VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated
                // of %d", numItemsPack, n_models);
-                for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i)
-                {
+                for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) {
                    remove_last_model();
                }
                // VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features
                // used.", n_models, featuresUsed.size());
-            }
-            else
-            {
+            } else {
                notes.push_back("Convergence threshold reached & 0 models eliminated");
                // VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated
                // n_models=%d numItemsPack=%d", n_models, numItemsPack);
            }
        }
-        if (featuresUsed.size() != features.size())
-        {
+        if (featuresUsed.size() != features.size()) {
            notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " +
                            std::to_string(features.size()));
            status = bayesnet::WARNING;