Add items to .clang-format

This commit is contained in:
2025-03-17 11:39:33 +01:00
parent 7076efc2a1
commit 6bf3b939bc
3 changed files with 196 additions and 164 deletions

View File

@@ -7,8 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
## [1.0.7] 2025-03-16 ### Internal
- Add changes to .clang-format to ajust to vscode format style thanks to https://clang-format-configurator.site/
## [1.0.7] 2025-03-16
### Added ### Added

View File

@@ -10,15 +10,15 @@
#include <random> #include <random>
#include <tuple> #include <tuple>
namespace bayesnet { namespace bayesnet
XBAODE::XBAODE() : Boost(false) { {
validHyperparameters = {"alpha_block", "order", "convergence", "convergence_best", "bisection", XBAODE::XBAODE() : Boost(false) {}
"threshold", "maxTolerance", "predict_voting", "select_features"}; std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing)
} {
std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing) {
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64); torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
std::vector<int> featuresSelected = featureSelection(weights_); std::vector<int> featuresSelected = featureSelection(weights_);
for (const int &feature : featuresSelected) { for (const int &feature : featuresSelected)
{
std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature); std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
model->fit(dataset, features, className, states, weights_, smoothing); model->fit(dataset, features, className, states, weights_, smoothing);
add_model(std::move(model), 1.0); add_model(std::move(model), 1.0);
@@ -26,11 +26,13 @@ std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing) {
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " +
std::to_string(features.size()) + " with " + select_features_algorithm); std::to_string(features.size()) + " with " + select_features_algorithm);
return featuresSelected; return featuresSelected;
} }
void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) { void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing)
{
X_train_ = TensorUtils::to_matrix(X_train); X_train_ = TensorUtils::to_matrix(X_train);
y_train_ = TensorUtils::to_vector<int>(y_train); y_train_ = TensorUtils::to_vector<int>(y_train);
if (convergence) { if (convergence)
{
X_test_ = TensorUtils::to_matrix(X_test); X_test_ = TensorUtils::to_matrix(X_test);
y_test_ = TensorUtils::to_vector<int>(y_test); y_test_ = TensorUtils::to_vector<int>(y_test);
} }
@@ -40,21 +42,25 @@ void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_
bool finished = false; bool finished = false;
std::vector<int> featuresUsed; std::vector<int> featuresUsed;
n_models = 0; n_models = 0;
if (selectFeatures) { if (selectFeatures)
{
featuresUsed = initializeModels(smoothing); featuresUsed = initializeModels(smoothing);
auto ypred = predict(X_train_); auto ypred = predict(X_train_);
auto ypred_t = torch::tensor(ypred); auto ypred_t = torch::tensor(ypred);
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
// Update significance of the models // Update significance of the models
for (const int &feature : featuresUsed) { for (const int &feature : featuresUsed)
{
significanceModels.pop_back(); significanceModels.pop_back();
} }
for (const int &feature : featuresUsed) { for (const int &feature : featuresUsed)
{
significanceModels.push_back(alpha_t); significanceModels.push_back(alpha_t);
} }
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, // VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t,
// n_models); // n_models);
if (finished) { if (finished)
{
return; return;
} }
} }
@@ -70,15 +76,18 @@ void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_
// run out of features // run out of features
bool ascending = order_algorithm == bayesnet::Orders.ASC; bool ascending = order_algorithm == bayesnet::Orders.ASC;
std::mt19937 g{173}; std::mt19937 g{173};
while (!finished) { while (!finished)
{
// Step 1: Build ranking with mutual information // Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
if (order_algorithm == bayesnet::Orders.RAND) { if (order_algorithm == bayesnet::Orders.RAND)
{
std::shuffle(featureSelection.begin(), featureSelection.end(), g); std::shuffle(featureSelection.begin(), featureSelection.end(), g);
} }
// Remove used features // Remove used features
featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(), featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(),
[&](auto x) { [&](auto x)
{
return std::find(featuresUsed.begin(), featuresUsed.end(), x) != return std::find(featuresUsed.begin(), featuresUsed.end(), x) !=
featuresUsed.end(); featuresUsed.end();
}), }),
@@ -87,7 +96,8 @@ void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_
int counter = 0; // The model counter of the current pack int counter = 0; // The model counter of the current pack
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k,
// featureSelection.size()); // featureSelection.size());
while (counter++ < k && featureSelection.size() > 0) { while (counter++ < k && featureSelection.size() > 0)
{
auto feature = featureSelection[0]; auto feature = featureSelection[0];
featureSelection.erase(featureSelection.begin()); featureSelection.erase(featureSelection.begin());
std::unique_ptr<Classifier> model; std::unique_ptr<Classifier> model;
@@ -100,7 +110,8 @@ void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_
* std::endl;*/ * std::endl;*/
// DEBUG // DEBUG
std::vector<int> ypred; std::vector<int> ypred;
if (alpha_block) { if (alpha_block)
{
// //
// Compute the prediction with the current ensemble + model // Compute the prediction with the current ensemble + model
// //
@@ -111,7 +122,9 @@ void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_
model = std::move(models.back()); model = std::move(models.back());
// Remove the model from the ensemble // Remove the model from the ensemble
remove_last_model(); remove_last_model();
} else { }
else
{
ypred = model->predict(X_train_); ypred = model->predict(X_train_);
} }
// Step 3.1: Compute the classifier amout of say // Step 3.1: Compute the classifier amout of say
@@ -125,30 +138,40 @@ void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_
// featuresUsed: %zu", finished, numItemsPack, n_models, // featuresUsed: %zu", finished, numItemsPack, n_models,
// featuresUsed.size()); // featuresUsed.size());
} // End of the pack } // End of the pack
if (convergence && !finished) { if (convergence && !finished)
{
auto y_val_predict = predict(X_test); auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0); double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) { if (priorAccuracy == 0)
{
priorAccuracy = accuracy; priorAccuracy = accuracy;
} else { }
else
{
improvement = accuracy - priorAccuracy; improvement = accuracy - priorAccuracy;
} }
if (improvement < convergence_threshold) { if (improvement < convergence_threshold)
{
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d // VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, // numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
// numItemsPack, improvement, priorAccuracy, accuracy); // numItemsPack, improvement, priorAccuracy, accuracy);
tolerance++; tolerance++;
} else { }
else
{
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d // VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, // numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
// numItemsPack, improvement, priorAccuracy, accuracy); // numItemsPack, improvement, priorAccuracy, accuracy);
tolerance = 0; // Reset the counter if the model performs better tolerance = 0; // Reset the counter if the model performs better
numItemsPack = 0; numItemsPack = 0;
} }
if (convergence_best) { if (convergence_best)
{
// Keep the best accuracy until now as the prior accuracy // Keep the best accuracy until now as the prior accuracy
priorAccuracy = std::max(accuracy, priorAccuracy); priorAccuracy = std::max(accuracy, priorAccuracy);
} else { }
else
{
// Keep the last accuray obtained as the prior accuracy // Keep the last accuray obtained as the prior accuracy
priorAccuracy = accuracy; priorAccuracy = accuracy;
} }
@@ -157,28 +180,34 @@ void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_
// %zu", tolerance, featuresUsed.size(), features.size()); // %zu", tolerance, featuresUsed.size(), features.size());
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size(); finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
} }
if (tolerance > maxTolerance) { if (tolerance > maxTolerance)
if (numItemsPack < n_models) { {
if (numItemsPack < n_models)
{
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated"); notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated // VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated
// of %d", numItemsPack, n_models); // of %d", numItemsPack, n_models);
for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) { for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i)
{
remove_last_model(); remove_last_model();
} }
// VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features // VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features
// used.", n_models, featuresUsed.size()); // used.", n_models, featuresUsed.size());
} else { }
else
{
notes.push_back("Convergence threshold reached & 0 models eliminated"); notes.push_back("Convergence threshold reached & 0 models eliminated");
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated // VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated
// n_models=%d numItemsPack=%d", n_models, numItemsPack); // n_models=%d numItemsPack=%d", n_models, numItemsPack);
} }
} }
if (featuresUsed.size() != features.size()) { if (featuresUsed.size() != features.size())
{
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " +
std::to_string(features.size())); std::to_string(features.size()));
status = bayesnet::WARNING; status = bayesnet::WARNING;
} }
notes.push_back("Number of models: " + std::to_string(n_models)); notes.push_back("Number of models: " + std::to_string(n_models));
return; return;
} }
} // namespace bayesnet } // namespace bayesnet

View File

@@ -162,7 +162,7 @@ TEST_CASE("Bisection Best", "[BoostA2DE]")
{"maxTolerance", 3}, {"maxTolerance", 3},
{"convergence", true}, {"convergence", true},
{"block_update", false}, {"block_update", false},
{"convergence_best", false}, {"convergence_best", true},
}); });
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing); clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
REQUIRE(clf.getNumberOfNodes() == 480); REQUIRE(clf.getNumberOfNodes() == 480);