Add hyperparameter convergence_best

move test libraries to test folder
This commit is contained in:
2024-04-30 00:52:09 +02:00
parent f014928411
commit ae469b8146
721 changed files with 206095 additions and 2496 deletions

View File

@@ -13,13 +13,14 @@
#include "bayesnet/feature_selection/FCBF.h"
#include "bayesnet/feature_selection/IWSS.h"
#include "BoostAODE.h"
#include "lib/log/loguru.cpp"
namespace bayesnet {
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
{
validHyperparameters = {
"maxModels", "bisection", "order", "convergence", "threshold",
"maxModels", "bisection", "order", "convergence", "convergence_best", "threshold",
"select_features", "maxTolerance", "predict_voting", "block_update"
};
@@ -70,6 +71,10 @@ namespace bayesnet {
convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence");
}
if (hyperparameters.contains("convergence_best")) {
convergence_best = hyperparameters["convergence_best"];
hyperparameters.erase("convergence_best");
}
if (hyperparameters.contains("bisection")) {
bisection = hyperparameters["bisection"];
hyperparameters.erase("bisection");
@@ -262,6 +267,13 @@ namespace bayesnet {
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
//
// Logging setup
//
loguru::set_thread_name("BoostAODE");
loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX);
// Algorithm based on the adaboost algorithm for classification
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
fitted = true;
@@ -304,8 +316,9 @@ namespace bayesnet {
{ return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
end(featureSelection)
);
int k = pow(2, tolerance);
int k = bisection ? pow(2, tolerance) : 1;
int counter = 0; // The model counter of the current pack
VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
while (counter++ < k && featureSelection.size() > 0) {
auto feature = featureSelection[0];
featureSelection.erase(featureSelection.begin());
@@ -324,6 +337,7 @@ namespace bayesnet {
models.push_back(std::move(model));
significanceModels.push_back(alpha_t);
n_models++;
VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
}
if (block_update) {
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
@@ -337,20 +351,28 @@ namespace bayesnet {
improvement = accuracy - priorAccuracy;
}
if (improvement < convergence_threshold) {
VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
tolerance++;
} else {
VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
tolerance = 0; // Reset the counter if the model performs better
numItemsPack = 0;
}
// Keep the best accuracy until now as the prior accuracy
priorAccuracy = std::max(accuracy, priorAccuracy);
// priorAccuracy = accuracy;
if (convergence_best) {
// Keep the best accuracy until now as the prior accuracy
priorAccuracy = std::max(accuracy, priorAccuracy);
} else {
// Keep the last accuray obtained as the prior accuracy
priorAccuracy = accuracy;
}
}
VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
}
if (tolerance > maxTolerance) {
if (numItemsPack < n_models) {
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
for (int i = 0; i < numItemsPack; ++i) {
significanceModels.pop_back();
models.pop_back();
@@ -358,6 +380,7 @@ namespace bayesnet {
}
} else {
notes.push_back("Convergence threshold reached & 0 models eliminated");
VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
}
}
if (featuresUsed.size() != features.size()) {

View File

@@ -39,6 +39,7 @@ namespace bayesnet {
int maxTolerance = 3;
std::string order_algorithm; // order to process the KBest features asc, desc, rand
bool convergence = true; //if true, stop when the model does not improve
bool convergence_best = false; // wether to keep the best accuracy to the moment or the last accuracy as prior accuracy
bool selectFeatures = false; // if true, use feature selection
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
FeatureSelect* featureSelector = nullptr;