Compare commits

..

6 Commits

7 changed files with 29 additions and 28 deletions

View File

@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Sample app now is a separate target in the Makefile and shows how to use the library with a sample dataset
- The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy.
## [1.0.4] 2024-03-06

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) <year> <copyright holders>
Copyright (c) 2023 Ricardo Montañana Gómez
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

View File

@@ -1,6 +1,9 @@
# BayesNet
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=flat&logo=c%2B%2B&logoColor=white)
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](<https://opensource.org/licenses/MIT>)
![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000)
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
Bayesian Network Classifiers using libtorch from scratch

View File

@@ -5,7 +5,7 @@
namespace bayesnet {
class AODE : public Ensemble {
public:
AODE(bool predict_voting = true);
AODE(bool predict_voting = false);
virtual ~AODE() {};
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& title = "AODE") const override;

View File

@@ -22,7 +22,7 @@ namespace bayesnet {
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
{
validHyperparameters = {
"repeatSparent", "maxModels", "order", "convergence", "threshold",
"maxModels", "order", "convergence", "threshold",
"select_features", "tolerance", "predict_voting", "predict_single"
};
@@ -63,10 +63,6 @@ namespace bayesnet {
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
hyperparameters.erase("repeatSparent");
}
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
hyperparameters.erase("maxModels");
@@ -216,7 +212,7 @@ namespace bayesnet {
double priorAccuracy = 0.0;
double delta = 1.0;
double convergence_threshold = 1e-4;
int count = 0; // number of times the accuracy is lower than the convergence_threshold
int worse_model_count = 0; // number of times the accuracy is lower than the convergence_threshold
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == maxModels
@@ -230,22 +226,15 @@ namespace bayesnet {
if (order_algorithm == Orders.RAND) {
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
}
auto feature = featureSelection[0];
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
bool used = true;
for (const auto& feat : featureSelection) {
if (std::find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
continue;
}
used = false;
feature = feat;
// Remove used features
featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x)
{ return find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
end(featureSelection)
);
if (featureSelection.empty()) {
break;
}
if (used) {
exitCondition = true;
continue;
}
}
auto feature = featureSelection[0];
std::unique_ptr<Classifier> model;
model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
@@ -274,11 +263,19 @@ namespace bayesnet {
delta = accuracy - priorAccuracy;
}
if (delta < convergence_threshold) {
count++;
worse_model_count++;
} else {
worse_model_count = 0; // Reset the counter if the model performs better
}
priorAccuracy = accuracy;
}
exitCondition = n_models >= maxModels && repeatSparent || count > tolerance;
exitCondition = n_models >= maxModels && repeatSparent || worse_model_count > tolerance;
}
if (worse_model_count > tolerance) {
notes.push_back("Convergence threshold reached & last model eliminated");
significanceModels.pop_back();
models.pop_back();
n_models--;
}
if (featuresUsed.size() != features.size()) {
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));

View File

@@ -7,7 +7,7 @@
namespace bayesnet {
class BoostAODE : public Ensemble {
public:
BoostAODE(bool predict_voting = true);
BoostAODE(bool predict_voting = false);
virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;

View File

@@ -24,7 +24,7 @@ The hyperparameters defined in the algorithm are:
Default value is *-1* so every time any of those algorithms are called, the threshold has to be set to the desired value.
- ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *true*.
- ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *false*.
- ***predict_single*** (*boolean*): Sets whether the algorithm will use single-model prediction in the learning process. If set to *false*, all models trained up to that point will be used to calculate the prediction necessary to update the weights in the learning process. Default value: *true*.