From 422129802ac0c00f7b39fc37f71ec043c65e4fa4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Tue, 19 Mar 2024 11:35:43 +0100
Subject: [PATCH] Remove predict_single max_models

---
 bayesnet/ensembles/BoostAODE.cc |  43 ++-----------
 bayesnet/ensembles/BoostAODE.h  |   7 +--
 docs/BoostAODE.md               |   8 +--
 docs/algorithm.md               | 105 ++++++++++++++++++++++++++++++++
 docs/algorithm.tex              |  69 +++++++++++++++++++++
 5 files changed, 182 insertions(+), 50 deletions(-)
 create mode 100644 docs/algorithm.md
 create mode 100644 docs/algorithm.tex

diff --git a/bayesnet/ensembles/BoostAODE.cc b/bayesnet/ensembles/BoostAODE.cc
index f9c2205..ffcd432 100644
--- a/bayesnet/ensembles/BoostAODE.cc
+++ b/bayesnet/ensembles/BoostAODE.cc
@@ -23,7 +23,7 @@ namespace bayesnet {
     {
         validHyperparameters = {
             "maxModels", "order", "convergence", "threshold",
-            "select_features", "tolerance", "predict_voting", "predict_single"
+            "select_features", "tolerance", "predict_voting"
         };
 
     }
@@ -63,10 +63,6 @@ namespace bayesnet {
     void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
     {
         auto hyperparameters = hyperparameters_;
-        if (hyperparameters.contains("maxModels")) {
-            maxModels = hyperparameters["maxModels"];
-            hyperparameters.erase("maxModels");
-        }
         if (hyperparameters.contains("order")) {
             std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
             order_algorithm = hyperparameters["order"];
@@ -79,10 +75,6 @@ namespace bayesnet {
             convergence = hyperparameters["convergence"];
             hyperparameters.erase("convergence");
         }
-        if (hyperparameters.contains("predict_single")) {
-            predict_single = hyperparameters["predict_single"];
-            hyperparameters.erase("predict_single");
-        }
         if (hyperparameters.contains("threshold")) {
             threshold = hyperparameters["threshold"];
             hyperparameters.erase("threshold");
@@ -168,24 +160,10 @@ namespace bayesnet {
         delete featureSelector;
         return featuresUsed;
     }
-    torch::Tensor BoostAODE::ensemble_predict(torch::Tensor& X, SPODE* model)
-    {
-        if (initialize_prob_table) {
-            initialize_prob_table = false;
-            prob_table = model->predict_proba(X) * 1.0;
-        } else {
-            prob_table += model->predict_proba(X) * 1.0;
-        }
-        // prob_table doesn't store probabilities but the sum of them
-        // to have them we need to divide by the sum of the "weights" used to 
-        // consider the results obtanined in the model's predict_proba.
-        return prob_table.argmax(1);
-    }
     void BoostAODE::trainModel(const torch::Tensor& weights)
     {
         // Algorithm based on the adaboost algorithm for classification
         // as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
-        initialize_prob_table = true;
         fitted = true;
         double alpha_t = 0;
         torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
@@ -203,19 +181,13 @@ namespace bayesnet {
                 return;
             }
         }
-        bool resetMaxModels = false;
-        if (maxModels == 0) {
-            maxModels = .1 * n > 10 ? .1 * n : n;
-            resetMaxModels = true; // Flag to unset maxModels
-        }
+        int numItemsPack = 0;
         // Variables to control the accuracy finish condition
         double priorAccuracy = 0.0;
         double delta = 1.0;
         double convergence_threshold = 1e-4;
         int worse_model_count = 0; // number of times the accuracy is lower than the convergence_threshold
         // Step 0: Set the finish condition
-        // if not repeatSparent a finish condition is run out of features
-        // n_models == maxModels
         // epsilon sub t > 0.5 => inverse the weights policy
         // validation error is not decreasing
         bool ascending = order_algorithm == Orders.ASC;
@@ -239,11 +211,7 @@ namespace bayesnet {
             model = std::make_unique<SPODE>(feature);
             model->fit(dataset, features, className, states, weights_);
             torch::Tensor ypred;
-            if (predict_single) {
-                ypred = model->predict(X_train);
-            } else {
-                ypred = ensemble_predict(X_train, dynamic_cast<SPODE*>(model.get()));
-            }
+            ypred = model->predict(X_train);
             // Step 3.1: Compute the classifier amout of say
             std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_);
             if (exitCondition) {
@@ -269,7 +237,7 @@ namespace bayesnet {
                 }
                 priorAccuracy = accuracy;
             }
-            exitCondition = n_models >= maxModels && repeatSparent || worse_model_count > tolerance;
+            exitCondition = worse_model_count > tolerance;
         }
         if (worse_model_count > tolerance) {
             notes.push_back("Convergence threshold reached & last model eliminated");
@@ -282,9 +250,6 @@ namespace bayesnet {
             status = WARNING;
         }
         notes.push_back("Number of models: " + std::to_string(n_models));
-        if (resetMaxModels) {
-            maxModels = 0;
-        }
     }
     std::vector<std::string> BoostAODE::graph(const std::string& title) const
     {
diff --git a/bayesnet/ensembles/BoostAODE.h b/bayesnet/ensembles/BoostAODE.h
index 6e7b546..a0332db 100644
--- a/bayesnet/ensembles/BoostAODE.h
+++ b/bayesnet/ensembles/BoostAODE.h
@@ -16,20 +16,15 @@ namespace bayesnet {
         void trainModel(const torch::Tensor& weights) override;
     private:
         std::unordered_set<int> initializeModels();
-        torch::Tensor ensemble_predict(torch::Tensor& X, SPODE* model);
         torch::Tensor dataset_;
         torch::Tensor X_train, y_train, X_test, y_test;
         // Hyperparameters
-        bool repeatSparent = false; // if true, a feature can be selected more than once
-        int maxModels = 0;
+        bool bisection = false; // if true, use bisection stratety to add k models at once to the ensemble
         int tolerance = 0;
-        bool predict_single = true; // wether the last model is used to predict in training or the whole ensemble
         std::string order_algorithm; // order to process the KBest features asc, desc, rand
         bool convergence = false; //if true, stop when the model does not improve
         bool selectFeatures = false; // if true, use feature selection
         std::string select_features_algorithm = "desc"; // Selected feature selection algorithm
-        bool initialize_prob_table; // if true, initialize the prob_table with the first model (used in train)
-        torch::Tensor prob_table; // Table of probabilities for ensemble predicting if predict_single is false
         FeatureSelect* featureSelector = nullptr;
         double threshold = -1;
     };
diff --git a/docs/BoostAODE.md b/docs/BoostAODE.md
index 1279407..2b45e91 100644
--- a/docs/BoostAODE.md
+++ b/docs/BoostAODE.md
@@ -1,12 +1,12 @@
 # BoostAODE Algorithm Operation
 
-The algorithm is based on the AdaBoost algorithm with some new proposals that can be activated using the following hyperparameters.
+## Algorithm
 
 ## Hyperparameters
 
 The hyperparameters defined in the algorithm are:
 
-- ***repeatSparent*** (*boolean*): Allows dataset variables to be repeated as parents of an *SPODE*. Default value: *false*.
+- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *false*.
 
 - ***maxModels*** (*int*): Maximum number of models (*SPODEs*) to build. This hyperparameter is only taken into account if ***repeatSparent*** is set to *true*. Default value: *0*.
 
@@ -14,7 +14,7 @@ The hyperparameters defined in the algorithm are:
 
 - ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*.
 
-- ***tolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *0*.
+- ***tolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *0*. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once.
 
 - ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same &alpha;<sub>t</sub>. Default value: *""*.
 
@@ -26,8 +26,6 @@ The hyperparameters defined in the algorithm are:
 
 - ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *false*.
 
-- ***predict_single*** (*boolean*): Sets whether the algorithm will use single-model prediction in the learning process. If set to *false*, all models trained up to that point will be used to calculate the prediction necessary to update the weights in the learning process. Default value: *true*.
-
 ## Operation
 
 The algorithm performs the following steps:
diff --git a/docs/algorithm.md b/docs/algorithm.md
new file mode 100644
index 0000000..2adfc5b
--- /dev/null
+++ b/docs/algorithm.md
@@ -0,0 +1,105 @@
+1. // initialization
+
+2. $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$
+
+3. $W \leftarrow W_0$
+
+4. $Vars \leftarrow {\cal{X}}$
+
+5. $\delta \leftarrow 10^{-4}$
+
+6. $convergence \leftarrow True$
+
+7. $maxTolerancia \leftarrow 3$
+
+8. $bisection \leftarrow False$
+
+9. $error \leftarrow \inf$
+
+10. $finished \leftarrow False$
+
+11. $AODE \leftarrow \emptyset$ // the ensemble
+
+12. $tolerance \leftarrow 0$
+
+13. $numModelsInPack \leftarrow 0$
+
+14.
+
+15. // main loop
+
+16. While (!finished)
+
+    1. $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$
+
+    2. if $(bisection) \; k \leftarrow 2^{tolerance} \;$ else
+        $k \leftarrow 1$
+
+    3. if ($k tolerance == 0$) $W_B \leftarrow W$;
+        $numItemsPack \leftarrow0$
+
+    4. $P \leftarrow Head(\pi,k)$ // first k features in order
+
+    5. $spodes \leftarrow \emptyset$
+
+    6. $i \leftarrow 0$
+
+    7. While ($i < size(P)$)
+
+        1. $X \leftarrow P[i]$
+
+        2. $i \leftarrow i + 1$
+
+        3. $numItemsPack \leftarrow numItemsPack + 1$
+
+        4. $Vars.remove(X)$
+
+        5. $spode \leftarrow BuildSpode(X, {\cal{X}}, D[W])$
+
+        6. $\hat{y}[] \leftarrow spode.Predict(D[W])$
+
+        7. $e \leftarrow error(\hat{y}[], y[])$
+
+        8. $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-e}{e} \right )$
+
+        9. if ($\alpha > 0.5$)
+
+            1. $finished \leftarrow True$
+
+            2. break
+
+        10. $spodes.add( (spode,\alpha_t) )$
+
+        11. $W \leftarrow UpdateWeights(D[W],\alpha,y[],\hat{y}[])$
+
+    8. $AODE.add( spodes )$
+
+    9. if ($convergence \And ! finished$)
+
+        1. $\hat{y}[] \leftarrow Predict(D,spodes)$
+
+        2. $e \leftarrow error(\hat{y}[], y[])$
+
+        3. if $(e > (error+\delta))$ // result doesn't improve
+
+            1. if
+                $(tolerance == maxTolerance) \;\; finished\leftarrow True$
+
+            2. else $tolerance \leftarrow tolerance+1$
+
+        4. else
+
+            1. $tolerance \leftarrow 0$
+
+            2. $error \leftarrow min(error,e)$
+
+    10. If $(Vars == \emptyset) \; finished \leftarrow True$
+
+17. if ($tolerance == maxTolerance$) // algorithm finished because of
+    lack of convergence
+
+    1. $removeModels(AODE, numItemsPack)$
+
+    2. $W \leftarrow W_B$
+
+18. Return $AODE$
diff --git a/docs/algorithm.tex b/docs/algorithm.tex
new file mode 100644
index 0000000..15ab8b0
--- /dev/null
+++ b/docs/algorithm.tex
@@ -0,0 +1,69 @@
+\begin{enumerate}
+\item[] // initialization
+\item $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$
+\item $W \leftarrow W_0$
+\item $Vars \leftarrow {\cal{X}}$
+\item $\delta \leftarrow 10^{-4}$
+\item $convergence \leftarrow True$
+\item $maxTolerancia \leftarrow 3$
+\item $bisection \leftarrow False$
+\item $error \leftarrow \inf$ 
+\item $finished \leftarrow False$
+\item $AODE \leftarrow \emptyset$ \hspace*{2cm} // the ensemble
+\item $tolerance \leftarrow 0$
+\item $numModelsInPack \leftarrow 0$
+\item[] 
+\newpage
+\item[] // main loop
+\item While (!finished)
+\begin{enumerate}
+    \item $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$
+    \item if $(bisection) \; k \leftarrow 2^{tolerance} \;$ else $k \leftarrow 1$
+    \item if ($k tolerance == 0$) $W_B \leftarrow W$; $numItemsPack \leftarrow0$
+    \item $P \leftarrow Head(\pi,k)$ \hspace*{2cm} //  first k features in order
+    \item $spodes \leftarrow \emptyset$
+    \item $i \leftarrow 0$
+    \item While ($ i < size(P)$)
+    \begin{enumerate}
+        \item $X \leftarrow P[i]$
+        \item $i \leftarrow i + 1$
+        \item $numItemsPack \leftarrow numItemsPack + 1$
+        \item $Vars.remove(X)$
+        \item $spode \leftarrow BuildSpode(X, {\cal{X}}, D[W])$
+        \item $\hat{y}[] \leftarrow spode.Predict(D[W])$
+        \item $e \leftarrow error(\hat{y}[], y[])$
+        \item $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-e}{e} \right )$
+        \item if ($\alpha > 0.5$)
+        \begin{enumerate}
+            \item $finished \leftarrow True$
+            \item break
+        \end{enumerate}
+        \item $spodes.add( (spode,\alpha_t) )$
+        \item $W \leftarrow UpdateWeights(D[W],\alpha,y[],\hat{y}[])$
+    \end{enumerate}       
+    \item $AODE.add( spodes )$
+    \item if ($convergence \And ! finished$) 
+    \begin{enumerate}       
+        \item $\hat{y}[] \leftarrow Predict(D,spodes)$
+        \item $e \leftarrow error(\hat{y}[], y[])$
+        \item if $(e > (error+\delta))$ \hspace*{2cm} // result doesn't improve
+        \begin{enumerate}
+            \item if $(tolerance == maxTolerance) \;\; finished\leftarrow True$
+            \item else $tolerance \leftarrow tolerance+1$
+        \end{enumerate}
+        \item else
+        \begin{enumerate}
+            \item $tolerance \leftarrow 0$
+            \item $error \leftarrow min(error,e)$
+         \end{enumerate}
+    \end{enumerate}
+    \item If $(Vars == \emptyset) \; finished \leftarrow True$
+
+\end{enumerate}
+\item if ($tolerance == maxTolerance$) // algorithm finished because of lack of convergence
+\begin{enumerate}
+    \item $removeModels(AODE, numItemsPack)$
+    \item $W \leftarrow W_B$
+\end{enumerate}
+\item Return $AODE$
+\end{enumerate}
\ No newline at end of file