From 6bf3b939bcc4d588ba8a251c1b085e30ad846a01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Mon, 17 Mar 2025 11:39:33 +0100 Subject: [PATCH] Add items to .clang-format --- CHANGELOG.md | 5 +- bayesnet/ensembles/XBAODE.cc | 351 +++++++++++++++++++---------------- tests/TestBoostA2DE.cc | 4 +- 3 files changed, 196 insertions(+), 164 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7ea351..a716115 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [1.0.7] 2025-03-16 +### Internal +- Add changes to .clang-format to ajust to vscode format style thanks to https://clang-format-configurator.site/ + +## [1.0.7] 2025-03-16 ### Added diff --git a/bayesnet/ensembles/XBAODE.cc b/bayesnet/ensembles/XBAODE.cc index ddf8115..567e3cf 100644 --- a/bayesnet/ensembles/XBAODE.cc +++ b/bayesnet/ensembles/XBAODE.cc @@ -10,175 +10,204 @@ #include #include -namespace bayesnet { -XBAODE::XBAODE() : Boost(false) { - validHyperparameters = {"alpha_block", "order", "convergence", "convergence_best", "bisection", - "threshold", "maxTolerance", "predict_voting", "select_features"}; -} -std::vector XBAODE::initializeModels(const Smoothing_t smoothing) { - torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64); - std::vector featuresSelected = featureSelection(weights_); - for (const int &feature : featuresSelected) { - std::unique_ptr model = std::make_unique(feature); - model->fit(dataset, features, className, states, weights_, smoothing); - add_model(std::move(model), 1.0); - } - notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + - std::to_string(features.size()) + " with " + select_features_algorithm); - return featuresSelected; -} -void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) { - X_train_ = TensorUtils::to_matrix(X_train); - y_train_ = TensorUtils::to_vector(y_train); - if (convergence) { - X_test_ = TensorUtils::to_matrix(X_test); - y_test_ = TensorUtils::to_vector(y_test); - } - fitted = true; - double alpha_t; - torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64); - bool finished = false; - std::vector featuresUsed; - n_models = 0; - if (selectFeatures) { - featuresUsed = initializeModels(smoothing); - auto ypred = predict(X_train_); - auto ypred_t = torch::tensor(ypred); - std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_); - // Update significance of the models - for (const int &feature : featuresUsed) { - significanceModels.pop_back(); - } - for (const int &feature : featuresUsed) { - significanceModels.push_back(alpha_t); - } - // VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, - // n_models); - if (finished) { - return; - } - } - int numItemsPack = 0; // The counter of the models inserted in the current pack - // Variables to control the accuracy finish condition - double priorAccuracy = 0.0; - double improvement = 1.0; - double convergence_threshold = 1e-4; - int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold - // Step 0: Set the finish condition - // epsilon sub t > 0.5 => inverse the weights_ policy - // validation error is not decreasing - // run out of features - bool ascending = order_algorithm == bayesnet::Orders.ASC; - std::mt19937 g{173}; - while (!finished) { - // Step 1: Build ranking with mutual information - auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted - if (order_algorithm == bayesnet::Orders.RAND) { - std::shuffle(featureSelection.begin(), featureSelection.end(), g); - } - // Remove used features - featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(), - [&](auto x) { - return std::find(featuresUsed.begin(), featuresUsed.end(), x) != - featuresUsed.end(); - }), - featureSelection.end()); - int k = bisection ? pow(2, tolerance) : 1; - int counter = 0; // The model counter of the current pack - // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, - // featureSelection.size()); - while (counter++ < k && featureSelection.size() > 0) { - auto feature = featureSelection[0]; - featureSelection.erase(featureSelection.begin()); - std::unique_ptr model; - model = std::make_unique(feature); +namespace bayesnet +{ + XBAODE::XBAODE() : Boost(false) {} + std::vector XBAODE::initializeModels(const Smoothing_t smoothing) + { + torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64); + std::vector featuresSelected = featureSelection(weights_); + for (const int &feature : featuresSelected) + { + std::unique_ptr model = std::make_unique(feature); model->fit(dataset, features, className, states, weights_, smoothing); - /*dynamic_cast(model.get())->fitx(X_train, y_train, weights_, - * smoothing); // using exclusive XSpode fit method*/ - // DEBUG - /*std::cout << dynamic_cast(model.get())->to_string() << - * std::endl;*/ - // DEBUG - std::vector ypred; - if (alpha_block) { - // - // Compute the prediction with the current ensemble + model - // - // Add the model to the ensemble - add_model(std::move(model), 1.0); - // Compute the prediction - ypred = predict(X_train_); - model = std::move(models.back()); - // Remove the model from the ensemble - remove_last_model(); - } else { - ypred = model->predict(X_train_); - } - // Step 3.1: Compute the classifier amout of say + add_model(std::move(model), 1.0); + } + notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + + std::to_string(features.size()) + " with " + select_features_algorithm); + return featuresSelected; + } + void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) + { + X_train_ = TensorUtils::to_matrix(X_train); + y_train_ = TensorUtils::to_vector(y_train); + if (convergence) + { + X_test_ = TensorUtils::to_matrix(X_test); + y_test_ = TensorUtils::to_vector(y_test); + } + fitted = true; + double alpha_t; + torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64); + bool finished = false; + std::vector featuresUsed; + n_models = 0; + if (selectFeatures) + { + featuresUsed = initializeModels(smoothing); + auto ypred = predict(X_train_); auto ypred_t = torch::tensor(ypred); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_); - // Step 3.4: Store classifier and its accuracy to weigh its future vote - numItemsPack++; - featuresUsed.push_back(feature); - add_model(std::move(model), alpha_t); - // VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d - // featuresUsed: %zu", finished, numItemsPack, n_models, - // featuresUsed.size()); - } // End of the pack - if (convergence && !finished) { - auto y_val_predict = predict(X_test); - double accuracy = (y_val_predict == y_test).sum().item() / (double)y_test.size(0); - if (priorAccuracy == 0) { - priorAccuracy = accuracy; - } else { - improvement = accuracy - priorAccuracy; + // Update significance of the models + for (const int &feature : featuresUsed) + { + significanceModels.pop_back(); } - if (improvement < convergence_threshold) { - // VLOG_SCOPE_F(3, " (improvement=threshold) Reset. tolerance: %d - // numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, - // numItemsPack, improvement, priorAccuracy, accuracy); - tolerance = 0; // Reset the counter if the model performs better - numItemsPack = 0; + for (const int &feature : featuresUsed) + { + significanceModels.push_back(alpha_t); } - if (convergence_best) { - // Keep the best accuracy until now as the prior accuracy - priorAccuracy = std::max(accuracy, priorAccuracy); - } else { - // Keep the last accuray obtained as the prior accuracy - priorAccuracy = accuracy; + // VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, + // n_models); + if (finished) + { + return; } } - // VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: - // %zu", tolerance, featuresUsed.size(), features.size()); - finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size(); - } - if (tolerance > maxTolerance) { - if (numItemsPack < n_models) { - notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated"); - // VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated - // of %d", numItemsPack, n_models); - for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) { - remove_last_model(); + int numItemsPack = 0; // The counter of the models inserted in the current pack + // Variables to control the accuracy finish condition + double priorAccuracy = 0.0; + double improvement = 1.0; + double convergence_threshold = 1e-4; + int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold + // Step 0: Set the finish condition + // epsilon sub t > 0.5 => inverse the weights_ policy + // validation error is not decreasing + // run out of features + bool ascending = order_algorithm == bayesnet::Orders.ASC; + std::mt19937 g{173}; + while (!finished) + { + // Step 1: Build ranking with mutual information + auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted + if (order_algorithm == bayesnet::Orders.RAND) + { + std::shuffle(featureSelection.begin(), featureSelection.end(), g); } - // VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features - // used.", n_models, featuresUsed.size()); - } else { - notes.push_back("Convergence threshold reached & 0 models eliminated"); - // VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated - // n_models=%d numItemsPack=%d", n_models, numItemsPack); + // Remove used features + featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(), + [&](auto x) + { + return std::find(featuresUsed.begin(), featuresUsed.end(), x) != + featuresUsed.end(); + }), + featureSelection.end()); + int k = bisection ? pow(2, tolerance) : 1; + int counter = 0; // The model counter of the current pack + // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, + // featureSelection.size()); + while (counter++ < k && featureSelection.size() > 0) + { + auto feature = featureSelection[0]; + featureSelection.erase(featureSelection.begin()); + std::unique_ptr model; + model = std::make_unique(feature); + model->fit(dataset, features, className, states, weights_, smoothing); + /*dynamic_cast(model.get())->fitx(X_train, y_train, weights_, + * smoothing); // using exclusive XSpode fit method*/ + // DEBUG + /*std::cout << dynamic_cast(model.get())->to_string() << + * std::endl;*/ + // DEBUG + std::vector ypred; + if (alpha_block) + { + // + // Compute the prediction with the current ensemble + model + // + // Add the model to the ensemble + add_model(std::move(model), 1.0); + // Compute the prediction + ypred = predict(X_train_); + model = std::move(models.back()); + // Remove the model from the ensemble + remove_last_model(); + } + else + { + ypred = model->predict(X_train_); + } + // Step 3.1: Compute the classifier amout of say + auto ypred_t = torch::tensor(ypred); + std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_); + // Step 3.4: Store classifier and its accuracy to weigh its future vote + numItemsPack++; + featuresUsed.push_back(feature); + add_model(std::move(model), alpha_t); + // VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d + // featuresUsed: %zu", finished, numItemsPack, n_models, + // featuresUsed.size()); + } // End of the pack + if (convergence && !finished) + { + auto y_val_predict = predict(X_test); + double accuracy = (y_val_predict == y_test).sum().item() / (double)y_test.size(0); + if (priorAccuracy == 0) + { + priorAccuracy = accuracy; + } + else + { + improvement = accuracy - priorAccuracy; + } + if (improvement < convergence_threshold) + { + // VLOG_SCOPE_F(3, " (improvement=threshold) Reset. tolerance: %d + // numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, + // numItemsPack, improvement, priorAccuracy, accuracy); + tolerance = 0; // Reset the counter if the model performs better + numItemsPack = 0; + } + if (convergence_best) + { + // Keep the best accuracy until now as the prior accuracy + priorAccuracy = std::max(accuracy, priorAccuracy); + } + else + { + // Keep the last accuray obtained as the prior accuracy + priorAccuracy = accuracy; + } + } + // VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: + // %zu", tolerance, featuresUsed.size(), features.size()); + finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size(); } + if (tolerance > maxTolerance) + { + if (numItemsPack < n_models) + { + notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated"); + // VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated + // of %d", numItemsPack, n_models); + for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) + { + remove_last_model(); + } + // VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features + // used.", n_models, featuresUsed.size()); + } + else + { + notes.push_back("Convergence threshold reached & 0 models eliminated"); + // VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated + // n_models=%d numItemsPack=%d", n_models, numItemsPack); + } + } + if (featuresUsed.size() != features.size()) + { + notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + + std::to_string(features.size())); + status = bayesnet::WARNING; + } + notes.push_back("Number of models: " + std::to_string(n_models)); + return; } - if (featuresUsed.size() != features.size()) { - notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + - std::to_string(features.size())); - status = bayesnet::WARNING; - } - notes.push_back("Number of models: " + std::to_string(n_models)); - return; -} } // namespace bayesnet diff --git a/tests/TestBoostA2DE.cc b/tests/TestBoostA2DE.cc index 141983f..ed5159d 100644 --- a/tests/TestBoostA2DE.cc +++ b/tests/TestBoostA2DE.cc @@ -162,7 +162,7 @@ TEST_CASE("Bisection Best", "[BoostA2DE]") {"maxTolerance", 3}, {"convergence", true}, {"block_update", false}, - {"convergence_best", false}, + {"convergence_best", true}, }); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 480); @@ -215,4 +215,4 @@ TEST_CASE("Test graph b2a2de", "[BoostA2DE]") REQUIRE(graph.size() == 26); REQUIRE(graph[0] == "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n"); REQUIRE(graph[1] == "\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n"); -} \ No newline at end of file +}