First approach to bisection

This commit is contained in:
Ricardo Montañana Gómez 2024-03-19 14:13:40 +01:00
parent 422129802a
commit 882d905a28
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
4 changed files with 67 additions and 58 deletions

View File

@ -22,8 +22,8 @@ namespace bayesnet {
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting) BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
{ {
validHyperparameters = { validHyperparameters = {
"maxModels", "order", "convergence", "threshold", "maxModels", "bisection", "order", "convergence", "threshold",
"select_features", "tolerance", "predict_voting" "select_features", "maxTolerance", "predict_voting"
}; };
} }
@ -75,13 +75,19 @@ namespace bayesnet {
convergence = hyperparameters["convergence"]; convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence"); hyperparameters.erase("convergence");
} }
if (hyperparameters.contains("bisection")) {
bisection = hyperparameters["bisection"];
hyperparameters.erase("bisection");
}
if (hyperparameters.contains("threshold")) { if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"]; threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold"); hyperparameters.erase("threshold");
} }
if (hyperparameters.contains("tolerance")) { if (hyperparameters.contains("maxTolerance")) {
tolerance = hyperparameters["tolerance"]; maxTolerance = hyperparameters["maxTolerance"];
hyperparameters.erase("tolerance"); if (maxTolerance < 1 || maxTolerance > 4)
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]");
hyperparameters.erase("maxTolerance");
} }
if (hyperparameters.contains("predict_voting")) { if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"]; predict_voting = hyperparameters["predict_voting"];
@ -167,17 +173,17 @@ namespace bayesnet {
fitted = true; fitted = true;
double alpha_t = 0; double alpha_t = 0;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool exitCondition = false; bool finished = false;
std::unordered_set<int> featuresUsed; std::unordered_set<int> featuresUsed;
if (selectFeatures) { if (selectFeatures) {
featuresUsed = initializeModels(); featuresUsed = initializeModels();
auto ypred = predict(X_train); auto ypred = predict(X_train);
std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
// Update significance of the models // Update significance of the models
for (int i = 0; i < n_models; ++i) { for (int i = 0; i < n_models; ++i) {
significanceModels[i] = alpha_t; significanceModels[i] = alpha_t;
} }
if (exitCondition) { if (finished) {
return; return;
} }
} }
@ -186,13 +192,14 @@ namespace bayesnet {
double priorAccuracy = 0.0; double priorAccuracy = 0.0;
double delta = 1.0; double delta = 1.0;
double convergence_threshold = 1e-4; double convergence_threshold = 1e-4;
int worse_model_count = 0; // number of times the accuracy is lower than the convergence_threshold int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
// Step 0: Set the finish condition // Step 0: Set the finish condition
// epsilon sub t > 0.5 => inverse the weights policy // epsilon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing // validation error is not decreasing
bool ascending = order_algorithm == Orders.ASC; bool ascending = order_algorithm == Orders.ASC;
std::mt19937 g{ 173 }; std::mt19937 g{ 173 };
while (!exitCondition) { torch::Tensor weights_backup;
while (!finished) {
// Step 1: Build ranking with mutual information // Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
if (order_algorithm == Orders.RAND) { if (order_algorithm == Orders.RAND) {
@ -203,25 +210,33 @@ namespace bayesnet {
{ return find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}), { return find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
end(featureSelection) end(featureSelection)
); );
if (featureSelection.empty()) { int k = pow(2, tolerance);
break; if (tolerance == 0) {
} }
auto feature = featureSelection[0]; int i = 0;
std::unique_ptr<Classifier> model; while (i < k && featureSelection.size() > 0) {
model = std::make_unique<SPODE>(feature); auto feature = featureSelection[0];
model->fit(dataset, features, className, states, weights_); featureSelection.erase(featureSelection.begin());
torch::Tensor ypred; std::unique_ptr<Classifier> model;
ypred = model->predict(X_train); model = std::make_unique<SPODE>(feature);
// Step 3.1: Compute the classifier amout of say model->fit(dataset, features, className, states, weights_);
std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_); torch::Tensor ypred;
if (exitCondition) { ypred = model->predict(X_train);
break; // Step 3.1: Compute the classifier amout of say
weights_backup = weights_.clone();
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
if (finished) {
finished = true;
weights_ = weights_backup.clone();
break;
}
// Step 3.4: Store classifier and its accuracy to weigh its future vote
featuresUsed.insert(feature);
models.push_back(std::move(model));
significanceModels.push_back(alpha_t);
n_models++;
} }
// Step 3.4: Store classifier and its accuracy to weigh its future vote
featuresUsed.insert(feature);
models.push_back(std::move(model));
significanceModels.push_back(alpha_t);
n_models++;
if (convergence) { if (convergence) {
auto y_val_predict = predict(X_test); auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0); double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
@ -231,19 +246,23 @@ namespace bayesnet {
delta = accuracy - priorAccuracy; delta = accuracy - priorAccuracy;
} }
if (delta < convergence_threshold) { if (delta < convergence_threshold) {
worse_model_count++; tolerance++;
} else { } else {
worse_model_count = 0; // Reset the counter if the model performs better tolerance = 0; // Reset the counter if the model performs better
} }
priorAccuracy = accuracy; // Keep the best accuracy until now as the prior accuracy
priorAccuracy = std::max(accuracy, priorAccuracy);
} }
exitCondition = worse_model_count > tolerance; finished = finished || tolerance == maxTolerance || featuresUsed.size() == features.size();
} }
if (worse_model_count > tolerance) { if (tolerance == maxTolerance) {
notes.push_back("Convergence threshold reached & last model eliminated"); notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
significanceModels.pop_back(); weights_ = weights_backup;
models.pop_back(); for (int i = 0; i < numItemsPack; ++i) {
n_models--; significanceModels.pop_back();
models.pop_back();
n_models--;
}
} }
if (featuresUsed.size() != features.size()) { if (featuresUsed.size() != features.size()) {
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size())); notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));

View File

@ -20,7 +20,7 @@ namespace bayesnet {
torch::Tensor X_train, y_train, X_test, y_test; torch::Tensor X_train, y_train, X_test, y_test;
// Hyperparameters // Hyperparameters
bool bisection = false; // if true, use bisection stratety to add k models at once to the ensemble bool bisection = false; // if true, use bisection stratety to add k models at once to the ensemble
int tolerance = 0; int maxTolerance = 1;
std::string order_algorithm; // order to process the KBest features asc, desc, rand std::string order_algorithm; // order to process the KBest features asc, desc, rand
bool convergence = false; //if true, stop when the model does not improve bool convergence = false; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection bool selectFeatures = false; // if true, use feature selection

View File

@ -8,13 +8,11 @@ The hyperparameters defined in the algorithm are:
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *false*. - ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *false*.
- ***maxModels*** (*int*): Maximum number of models (*SPODEs*) to build. This hyperparameter is only taken into account if ***repeatSparent*** is set to *true*. Default value: *0*.
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*. - ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*. - ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*.
- ***tolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *0*. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once. - ***maxTolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *1*. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once.
- ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same &alpha;<sub>t</sub>. Default value: *""*. - ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same &alpha;<sub>t</sub>. Default value: *""*.

View File

@ -24,24 +24,19 @@
13. $numModelsInPack \leftarrow 0$ 13. $numModelsInPack \leftarrow 0$
14.
15. // main loop 15. // main loop
16. While (!finished) 16. While (!finished)
1. $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$ 1. $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$
2. if $(bisection) \; k \leftarrow 2^{tolerance} \;$ else 2. $k \leftarrow 2^{tolerance}$
$k \leftarrow 1$
3. if ($k tolerance == 0$) $W_B \leftarrow W$; 3. if ($tolerance == 0$)
$numItemsPack \leftarrow0$ $numItemsPack \leftarrow0$
4. $P \leftarrow Head(\pi,k)$ // first k features in order 4. $P \leftarrow Head(\pi,k)$ // first k features in order
5. $spodes \leftarrow \emptyset$
6. $i \leftarrow 0$ 6. $i \leftarrow 0$
7. While ($i < size(P)$) 7. While ($i < size(P)$)
@ -58,32 +53,29 @@
6. $\hat{y}[] \leftarrow spode.Predict(D[W])$ 6. $\hat{y}[] \leftarrow spode.Predict(D[W])$
7. $e \leftarrow error(\hat{y}[], y[])$ 7. $\epsilon \leftarrow error(\hat{y}[], y[])$
8. $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-e}{e} \right )$ 8. $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-\epsilon}{\epsilon} \right )$
9. if ($\alpha > 0.5$) 9. if ($\epsilon > 0.5$)
1. $finished \leftarrow True$ 1. $finished \leftarrow True$
2. break 2. break
10. $spodes.add( (spode,\alpha_t) )$ 10. $AODE.add( (spode,\alpha_t) )$
11. $W \leftarrow UpdateWeights(D[W],\alpha,y[],\hat{y}[])$ 11. $W \leftarrow UpdateWeights(D[W],\alpha,y[],\hat{y}[])$
8. $AODE.add( spodes )$ 8. if ($convergence$ $\And$ $! finished$)
9. if ($convergence \And ! finished$) 1. $\hat{y}[] \leftarrow AODE.Predict(D[W])$
1. $\hat{y}[] \leftarrow Predict(D,spodes)$
2. $e \leftarrow error(\hat{y}[], y[])$ 2. $e \leftarrow error(\hat{y}[], y[])$
3. if $(e > (error+\delta))$ // result doesn't improve 3. if $(e > (error+\delta))$ // result doesn't improve
1. if 1. if $(tolerance == maxTolerance)\; finished\leftarrow True$
$(tolerance == maxTolerance) \;\; finished\leftarrow True$
2. else $tolerance \leftarrow tolerance+1$ 2. else $tolerance \leftarrow tolerance+1$
@ -93,7 +85,7 @@
2. $error \leftarrow min(error,e)$ 2. $error \leftarrow min(error,e)$
10. If $(Vars == \emptyset) \; finished \leftarrow True$ 9. if $(Vars == \emptyset) \; finished \leftarrow True$
17. if ($tolerance == maxTolerance$) // algorithm finished because of 17. if ($tolerance == maxTolerance$) // algorithm finished because of
lack of convergence lack of convergence