Remove predict_single max_models

This commit is contained in:
Ricardo Montañana Gómez 2024-03-19 11:35:43 +01:00
parent eb97a5a14b
commit 422129802a
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
5 changed files with 182 additions and 50 deletions

View File

@ -23,7 +23,7 @@ namespace bayesnet {
{
validHyperparameters = {
"maxModels", "order", "convergence", "threshold",
"select_features", "tolerance", "predict_voting", "predict_single"
"select_features", "tolerance", "predict_voting"
};
}
@ -63,10 +63,6 @@ namespace bayesnet {
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
hyperparameters.erase("maxModels");
}
if (hyperparameters.contains("order")) {
std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
order_algorithm = hyperparameters["order"];
@ -79,10 +75,6 @@ namespace bayesnet {
convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence");
}
if (hyperparameters.contains("predict_single")) {
predict_single = hyperparameters["predict_single"];
hyperparameters.erase("predict_single");
}
if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold");
@ -168,24 +160,10 @@ namespace bayesnet {
delete featureSelector;
return featuresUsed;
}
torch::Tensor BoostAODE::ensemble_predict(torch::Tensor& X, SPODE* model)
{
if (initialize_prob_table) {
initialize_prob_table = false;
prob_table = model->predict_proba(X) * 1.0;
} else {
prob_table += model->predict_proba(X) * 1.0;
}
// prob_table doesn't store probabilities but the sum of them
// to have them we need to divide by the sum of the "weights" used to
// consider the results obtanined in the model's predict_proba.
return prob_table.argmax(1);
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
// Algorithm based on the adaboost algorithm for classification
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
initialize_prob_table = true;
fitted = true;
double alpha_t = 0;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
@ -203,19 +181,13 @@ namespace bayesnet {
return;
}
}
bool resetMaxModels = false;
if (maxModels == 0) {
maxModels = .1 * n > 10 ? .1 * n : n;
resetMaxModels = true; // Flag to unset maxModels
}
int numItemsPack = 0;
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double delta = 1.0;
double convergence_threshold = 1e-4;
int worse_model_count = 0; // number of times the accuracy is lower than the convergence_threshold
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == maxModels
// epsilon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing
bool ascending = order_algorithm == Orders.ASC;
@ -239,11 +211,7 @@ namespace bayesnet {
model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
torch::Tensor ypred;
if (predict_single) {
ypred = model->predict(X_train);
} else {
ypred = ensemble_predict(X_train, dynamic_cast<SPODE*>(model.get()));
}
ypred = model->predict(X_train);
// Step 3.1: Compute the classifier amout of say
std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_);
if (exitCondition) {
@ -269,7 +237,7 @@ namespace bayesnet {
}
priorAccuracy = accuracy;
}
exitCondition = n_models >= maxModels && repeatSparent || worse_model_count > tolerance;
exitCondition = worse_model_count > tolerance;
}
if (worse_model_count > tolerance) {
notes.push_back("Convergence threshold reached & last model eliminated");
@ -282,9 +250,6 @@ namespace bayesnet {
status = WARNING;
}
notes.push_back("Number of models: " + std::to_string(n_models));
if (resetMaxModels) {
maxModels = 0;
}
}
std::vector<std::string> BoostAODE::graph(const std::string& title) const
{

View File

@ -16,20 +16,15 @@ namespace bayesnet {
void trainModel(const torch::Tensor& weights) override;
private:
std::unordered_set<int> initializeModels();
torch::Tensor ensemble_predict(torch::Tensor& X, SPODE* model);
torch::Tensor dataset_;
torch::Tensor X_train, y_train, X_test, y_test;
// Hyperparameters
bool repeatSparent = false; // if true, a feature can be selected more than once
int maxModels = 0;
bool bisection = false; // if true, use bisection stratety to add k models at once to the ensemble
int tolerance = 0;
bool predict_single = true; // wether the last model is used to predict in training or the whole ensemble
std::string order_algorithm; // order to process the KBest features asc, desc, rand
bool convergence = false; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection
std::string select_features_algorithm = "desc"; // Selected feature selection algorithm
bool initialize_prob_table; // if true, initialize the prob_table with the first model (used in train)
torch::Tensor prob_table; // Table of probabilities for ensemble predicting if predict_single is false
FeatureSelect* featureSelector = nullptr;
double threshold = -1;
};

View File

@ -1,12 +1,12 @@
# BoostAODE Algorithm Operation
The algorithm is based on the AdaBoost algorithm with some new proposals that can be activated using the following hyperparameters.
## Algorithm
## Hyperparameters
The hyperparameters defined in the algorithm are:
- ***repeatSparent*** (*boolean*): Allows dataset variables to be repeated as parents of an *SPODE*. Default value: *false*.
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *false*.
- ***maxModels*** (*int*): Maximum number of models (*SPODEs*) to build. This hyperparameter is only taken into account if ***repeatSparent*** is set to *true*. Default value: *0*.
@ -14,7 +14,7 @@ The hyperparameters defined in the algorithm are:
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*.
- ***tolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *0*.
- ***tolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *0*. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once.
- ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same &alpha;<sub>t</sub>. Default value: *""*.
@ -26,8 +26,6 @@ The hyperparameters defined in the algorithm are:
- ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *false*.
- ***predict_single*** (*boolean*): Sets whether the algorithm will use single-model prediction in the learning process. If set to *false*, all models trained up to that point will be used to calculate the prediction necessary to update the weights in the learning process. Default value: *true*.
## Operation
The algorithm performs the following steps:

105
docs/algorithm.md Normal file
View File

@ -0,0 +1,105 @@
1. // initialization
2. $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$
3. $W \leftarrow W_0$
4. $Vars \leftarrow {\cal{X}}$
5. $\delta \leftarrow 10^{-4}$
6. $convergence \leftarrow True$
7. $maxTolerancia \leftarrow 3$
8. $bisection \leftarrow False$
9. $error \leftarrow \inf$
10. $finished \leftarrow False$
11. $AODE \leftarrow \emptyset$ // the ensemble
12. $tolerance \leftarrow 0$
13. $numModelsInPack \leftarrow 0$
14.
15. // main loop
16. While (!finished)
1. $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$
2. if $(bisection) \; k \leftarrow 2^{tolerance} \;$ else
$k \leftarrow 1$
3. if ($k tolerance == 0$) $W_B \leftarrow W$;
$numItemsPack \leftarrow0$
4. $P \leftarrow Head(\pi,k)$ // first k features in order
5. $spodes \leftarrow \emptyset$
6. $i \leftarrow 0$
7. While ($i < size(P)$)
1. $X \leftarrow P[i]$
2. $i \leftarrow i + 1$
3. $numItemsPack \leftarrow numItemsPack + 1$
4. $Vars.remove(X)$
5. $spode \leftarrow BuildSpode(X, {\cal{X}}, D[W])$
6. $\hat{y}[] \leftarrow spode.Predict(D[W])$
7. $e \leftarrow error(\hat{y}[], y[])$
8. $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-e}{e} \right )$
9. if ($\alpha > 0.5$)
1. $finished \leftarrow True$
2. break
10. $spodes.add( (spode,\alpha_t) )$
11. $W \leftarrow UpdateWeights(D[W],\alpha,y[],\hat{y}[])$
8. $AODE.add( spodes )$
9. if ($convergence \And ! finished$)
1. $\hat{y}[] \leftarrow Predict(D,spodes)$
2. $e \leftarrow error(\hat{y}[], y[])$
3. if $(e > (error+\delta))$ // result doesn't improve
1. if
$(tolerance == maxTolerance) \;\; finished\leftarrow True$
2. else $tolerance \leftarrow tolerance+1$
4. else
1. $tolerance \leftarrow 0$
2. $error \leftarrow min(error,e)$
10. If $(Vars == \emptyset) \; finished \leftarrow True$
17. if ($tolerance == maxTolerance$) // algorithm finished because of
lack of convergence
1. $removeModels(AODE, numItemsPack)$
2. $W \leftarrow W_B$
18. Return $AODE$

69
docs/algorithm.tex Normal file
View File

@ -0,0 +1,69 @@
\begin{enumerate}
\item[] // initialization
\item $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$
\item $W \leftarrow W_0$
\item $Vars \leftarrow {\cal{X}}$
\item $\delta \leftarrow 10^{-4}$
\item $convergence \leftarrow True$
\item $maxTolerancia \leftarrow 3$
\item $bisection \leftarrow False$
\item $error \leftarrow \inf$
\item $finished \leftarrow False$
\item $AODE \leftarrow \emptyset$ \hspace*{2cm} // the ensemble
\item $tolerance \leftarrow 0$
\item $numModelsInPack \leftarrow 0$
\item[]
\newpage
\item[] // main loop
\item While (!finished)
\begin{enumerate}
\item $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$
\item if $(bisection) \; k \leftarrow 2^{tolerance} \;$ else $k \leftarrow 1$
\item if ($k tolerance == 0$) $W_B \leftarrow W$; $numItemsPack \leftarrow0$
\item $P \leftarrow Head(\pi,k)$ \hspace*{2cm} // first k features in order
\item $spodes \leftarrow \emptyset$
\item $i \leftarrow 0$
\item While ($ i < size(P)$)
\begin{enumerate}
\item $X \leftarrow P[i]$
\item $i \leftarrow i + 1$
\item $numItemsPack \leftarrow numItemsPack + 1$
\item $Vars.remove(X)$
\item $spode \leftarrow BuildSpode(X, {\cal{X}}, D[W])$
\item $\hat{y}[] \leftarrow spode.Predict(D[W])$
\item $e \leftarrow error(\hat{y}[], y[])$
\item $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-e}{e} \right )$
\item if ($\alpha > 0.5$)
\begin{enumerate}
\item $finished \leftarrow True$
\item break
\end{enumerate}
\item $spodes.add( (spode,\alpha_t) )$
\item $W \leftarrow UpdateWeights(D[W],\alpha,y[],\hat{y}[])$
\end{enumerate}
\item $AODE.add( spodes )$
\item if ($convergence \And ! finished$)
\begin{enumerate}
\item $\hat{y}[] \leftarrow Predict(D,spodes)$
\item $e \leftarrow error(\hat{y}[], y[])$
\item if $(e > (error+\delta))$ \hspace*{2cm} // result doesn't improve
\begin{enumerate}
\item if $(tolerance == maxTolerance) \;\; finished\leftarrow True$
\item else $tolerance \leftarrow tolerance+1$
\end{enumerate}
\item else
\begin{enumerate}
\item $tolerance \leftarrow 0$
\item $error \leftarrow min(error,e)$
\end{enumerate}
\end{enumerate}
\item If $(Vars == \emptyset) \; finished \leftarrow True$
\end{enumerate}
\item if ($tolerance == maxTolerance$) // algorithm finished because of lack of convergence
\begin{enumerate}
\item $removeModels(AODE, numItemsPack)$
\item $W \leftarrow W_B$
\end{enumerate}
\item Return $AODE$
\end{enumerate}