Compare commits

...

6 Commits

7 changed files with 29 additions and 28 deletions

View File

@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed ### Changed
- Sample app now is a separate target in the Makefile and shows how to use the library with a sample dataset - Sample app now is a separate target in the Makefile and shows how to use the library with a sample dataset
- The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy.
## [1.0.4] 2024-03-06 ## [1.0.4] 2024-03-06

View File

@@ -1,6 +1,6 @@
MIT License MIT License
Copyright (c) <year> <copyright holders> Copyright (c) 2023 Ricardo Montañana Gómez
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

View File

@@ -1,6 +1,9 @@
# BayesNet # BayesNet
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=flat&logo=c%2B%2B&logoColor=white)
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](<https://opensource.org/licenses/MIT>)
![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000)
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
Bayesian Network Classifiers using libtorch from scratch Bayesian Network Classifiers using libtorch from scratch

View File

@@ -5,7 +5,7 @@
namespace bayesnet { namespace bayesnet {
class AODE : public Ensemble { class AODE : public Ensemble {
public: public:
AODE(bool predict_voting = true); AODE(bool predict_voting = false);
virtual ~AODE() {}; virtual ~AODE() {};
void setHyperparameters(const nlohmann::json& hyperparameters) override; void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& title = "AODE") const override; std::vector<std::string> graph(const std::string& title = "AODE") const override;

View File

@@ -22,7 +22,7 @@ namespace bayesnet {
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting) BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
{ {
validHyperparameters = { validHyperparameters = {
"repeatSparent", "maxModels", "order", "convergence", "threshold", "maxModels", "order", "convergence", "threshold",
"select_features", "tolerance", "predict_voting", "predict_single" "select_features", "tolerance", "predict_voting", "predict_single"
}; };
@@ -63,10 +63,6 @@ namespace bayesnet {
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_) void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{ {
auto hyperparameters = hyperparameters_; auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
hyperparameters.erase("repeatSparent");
}
if (hyperparameters.contains("maxModels")) { if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"]; maxModels = hyperparameters["maxModels"];
hyperparameters.erase("maxModels"); hyperparameters.erase("maxModels");
@@ -216,7 +212,7 @@ namespace bayesnet {
double priorAccuracy = 0.0; double priorAccuracy = 0.0;
double delta = 1.0; double delta = 1.0;
double convergence_threshold = 1e-4; double convergence_threshold = 1e-4;
int count = 0; // number of times the accuracy is lower than the convergence_threshold int worse_model_count = 0; // number of times the accuracy is lower than the convergence_threshold
// Step 0: Set the finish condition // Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features // if not repeatSparent a finish condition is run out of features
// n_models == maxModels // n_models == maxModels
@@ -230,22 +226,15 @@ namespace bayesnet {
if (order_algorithm == Orders.RAND) { if (order_algorithm == Orders.RAND) {
std::shuffle(featureSelection.begin(), featureSelection.end(), g); std::shuffle(featureSelection.begin(), featureSelection.end(), g);
} }
auto feature = featureSelection[0]; // Remove used features
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) { featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x)
bool used = true; { return find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
for (const auto& feat : featureSelection) { end(featureSelection)
if (std::find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) { );
continue; if (featureSelection.empty()) {
}
used = false;
feature = feat;
break; break;
} }
if (used) { auto feature = featureSelection[0];
exitCondition = true;
continue;
}
}
std::unique_ptr<Classifier> model; std::unique_ptr<Classifier> model;
model = std::make_unique<SPODE>(feature); model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_);
@@ -274,11 +263,19 @@ namespace bayesnet {
delta = accuracy - priorAccuracy; delta = accuracy - priorAccuracy;
} }
if (delta < convergence_threshold) { if (delta < convergence_threshold) {
count++; worse_model_count++;
} else {
worse_model_count = 0; // Reset the counter if the model performs better
} }
priorAccuracy = accuracy; priorAccuracy = accuracy;
} }
exitCondition = n_models >= maxModels && repeatSparent || count > tolerance; exitCondition = n_models >= maxModels && repeatSparent || worse_model_count > tolerance;
}
if (worse_model_count > tolerance) {
notes.push_back("Convergence threshold reached & last model eliminated");
significanceModels.pop_back();
models.pop_back();
n_models--;
} }
if (featuresUsed.size() != features.size()) { if (featuresUsed.size() != features.size()) {
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size())); notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));

View File

@@ -7,7 +7,7 @@
namespace bayesnet { namespace bayesnet {
class BoostAODE : public Ensemble { class BoostAODE : public Ensemble {
public: public:
BoostAODE(bool predict_voting = true); BoostAODE(bool predict_voting = false);
virtual ~BoostAODE() = default; virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override; std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; void setHyperparameters(const nlohmann::json& hyperparameters) override;

View File

@@ -24,7 +24,7 @@ The hyperparameters defined in the algorithm are:
Default value is *-1* so every time any of those algorithms are called, the threshold has to be set to the desired value. Default value is *-1* so every time any of those algorithms are called, the threshold has to be set to the desired value.
- ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *true*. - ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *false*.
- ***predict_single*** (*boolean*): Sets whether the algorithm will use single-model prediction in the learning process. If set to *false*, all models trained up to that point will be used to calculate the prediction necessary to update the weights in the learning process. Default value: *true*. - ***predict_single*** (*boolean*): Sets whether the algorithm will use single-model prediction in the learning process. If set to *false*, all models trained up to that point will be used to calculate the prediction necessary to update the weights in the learning process. Default value: *true*.