From 6e854dfda385363748d1611f59b54971b5f61658 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Wed, 20 Mar 2024 23:33:02 +0100 Subject: [PATCH] Fix metrics error in BoostAODE Convergence Update algorithm --- bayesnet/classifiers/Classifier.cc | 2 +- bayesnet/ensembles/BoostAODE.cc | 35 ++++++++-------- bayesnet/ensembles/BoostAODE.h | 4 +- bayesnet/utils/BayesMetrics.cc | 2 +- docs/algorithm.md | 67 ++++++++++++++++++++---------- docs/algorithm.tex | 53 +++++++++++++---------- 6 files changed, 98 insertions(+), 65 deletions(-) diff --git a/bayesnet/classifiers/Classifier.cc b/bayesnet/classifiers/Classifier.cc index 03c8b57..eed8d91 100644 --- a/bayesnet/classifiers/Classifier.cc +++ b/bayesnet/classifiers/Classifier.cc @@ -10,7 +10,7 @@ namespace bayesnet { this->className = className; this->states = states; m = dataset.size(1); - n = dataset.size(0) - 1; + n = features.size(); checkFitParameters(); auto n_classes = states.at(className).size(); metrics = Metrics(dataset, features, className, n_classes); diff --git a/bayesnet/ensembles/BoostAODE.cc b/bayesnet/ensembles/BoostAODE.cc index c449b73..1bb0ad1 100644 --- a/bayesnet/ensembles/BoostAODE.cc +++ b/bayesnet/ensembles/BoostAODE.cc @@ -8,7 +8,6 @@ #include "bayesnet/feature_selection/IWSS.h" #include "BoostAODE.h" -#define LOGURU_WITH_STREAMS 1 #include "bayesnet/utils/loguru.cpp" namespace bayesnet { @@ -41,8 +40,8 @@ namespace bayesnet { if (convergence) { // Prepare train & validation sets from train data auto fold = folding::StratifiedKFold(5, y_, 271); - dataset_ = torch::clone(dataset); // save input dataset + dataset_ = torch::clone(dataset); auto [train, test] = fold.getFold(0); auto train_t = torch::tensor(train); auto test_t = torch::tensor(test); @@ -54,9 +53,9 @@ namespace bayesnet { dataset = X_train; m = X_train.size(1); auto n_classes = states.at(className).size(); - metrics = Metrics(dataset, features, className, n_classes); // Build dataset with train data buildDataset(y_train); + metrics = Metrics(dataset, features, className, n_classes); } else { // Use all data to train X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }); @@ -137,9 +136,9 @@ namespace bayesnet { } return { weights, alpha_t, terminate }; } - std::unordered_set BoostAODE::initializeModels() + std::vector BoostAODE::initializeModels() { - std::unordered_set featuresUsed; + std::vector featuresUsed; torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); int maxFeatures = 0; if (select_features_algorithm == SelectFeatures.CFS) { @@ -158,7 +157,7 @@ namespace bayesnet { featureSelector->fit(); auto cfsFeatures = featureSelector->getFeatures(); for (const int& feature : cfsFeatures) { - featuresUsed.insert(feature); + featuresUsed.push_back(feature); std::unique_ptr model = std::make_unique(feature); model->fit(dataset, features, className, states, weights_); models.push_back(std::move(model)); @@ -183,7 +182,7 @@ namespace bayesnet { double alpha_t = 0; torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); bool finished = false; - std::unordered_set featuresUsed; + std::vector featuresUsed; if (selectFeatures) { featuresUsed = initializeModels(); auto ypred = predict(X_train); @@ -199,7 +198,7 @@ namespace bayesnet { int numItemsPack = 0; // The counter of the models inserted in the current pack // Variables to control the accuracy finish condition double priorAccuracy = 0.0; - double delta = 1.0; + double improvement = 1.0; double convergence_threshold = 1e-4; int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold // Step 0: Set the finish condition @@ -222,9 +221,9 @@ namespace bayesnet { ); int k = pow(2, tolerance); int counter = 0; // The model counter of the current pack - VLOG_SCOPE_F(1, "k=%d", k); + VLOG_SCOPE_F(1, "k=%d featureSelection.size: %d", k, featureSelection.size()); while (counter++ < k && featureSelection.size() > 0) { - VLOG_SCOPE_F(2, "counter: %d numItemsPack: %d featureSelection.size: %d", counter, numItemsPack, featureSelection.size()); + VLOG_SCOPE_F(2, "counter: %d numItemsPack: %d", counter, numItemsPack); auto feature = featureSelection[0]; featureSelection.erase(featureSelection.begin()); std::unique_ptr model; @@ -240,10 +239,11 @@ namespace bayesnet { } // Step 3.4: Store classifier and its accuracy to weigh its future vote numItemsPack++; - featuresUsed.insert(feature); + featuresUsed.push_back(feature); models.push_back(std::move(model)); significanceModels.push_back(alpha_t); n_models++; + VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %d", numItemsPack, n_models, featuresUsed.size()); } if (convergence && !finished) { auto y_val_predict = predict(X_test); @@ -252,20 +252,21 @@ namespace bayesnet { priorAccuracy = accuracy; VLOG_SCOPE_F(3, "First accuracy: %f", priorAccuracy); } else { - delta = accuracy - priorAccuracy; + improvement = accuracy - priorAccuracy; } - if (delta < convergence_threshold) { - VLOG_SCOPE_F(3, "(delta=threshold) Reset. tolerance: %d numItemsPack: %d delta: %f prior: %f current: %f", tolerance, numItemsPack, delta, priorAccuracy, accuracy); + VLOG_SCOPE_F(3, "*(improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy); tolerance = 0; // Reset the counter if the model performs better numItemsPack = 0; } // Keep the best accuracy until now as the prior accuracy - // priorAccuracy = std::max(accuracy, priorAccuracy); - priorAccuracy = accuracy; + priorAccuracy = std::max(accuracy, priorAccuracy); + // priorAccuracy = accuracy; } + VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %d features.size: %d", tolerance, featuresUsed.size(), features.size()); finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size(); } if (tolerance > maxTolerance) { diff --git a/bayesnet/ensembles/BoostAODE.h b/bayesnet/ensembles/BoostAODE.h index 2378a0c..5849b2a 100644 --- a/bayesnet/ensembles/BoostAODE.h +++ b/bayesnet/ensembles/BoostAODE.h @@ -15,8 +15,8 @@ namespace bayesnet { void buildModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override; private: - std::unordered_set initializeModels(); - torch::Tensor dataset_; + std::vector initializeModels(); + torch::Tensor dataset_; // Backup the original dataset torch::Tensor X_train, y_train, X_test, y_test; // Hyperparameters bool bisection = false; // if true, use bisection stratety to add k models at once to the ensemble diff --git a/bayesnet/utils/BayesMetrics.cc b/bayesnet/utils/BayesMetrics.cc index 4617581..5f6aa77 100644 --- a/bayesnet/utils/BayesMetrics.cc +++ b/bayesnet/utils/BayesMetrics.cc @@ -24,7 +24,7 @@ namespace bayesnet { std::vector Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k) { // Return the K Best features - auto n = samples.size(0) - 1; + auto n = features.size(); if (k == 0) { k = n; } diff --git a/docs/algorithm.md b/docs/algorithm.md index 1dbed69..b1fed1a 100644 --- a/docs/algorithm.md +++ b/docs/algorithm.md @@ -1,3 +1,17 @@ +# Algorithm + +- // notation + +- $n$ features ${\cal{X}} = \{X_1, \dots, X_n\}$ and the class $Y$ + +- $m$ instances. + +- $D = \{ (x_1^i, \dots, x_n^i, y^i) \}_{i=1}^{m}$ + +- $W$ a weights vector. $W_0$ are the initial weights. + +- $D[W]$ dataset with weights $W$ for the instances. + 1. // initialization 2. $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$ @@ -8,35 +22,38 @@ 5. $\delta \leftarrow 10^{-4}$ -6. $convergence \leftarrow True$ +6. $convergence \leftarrow True$ // hyperparameter -7. $maxTolerancia \leftarrow 3$ +7. $maxTolerancia \leftarrow 3$ // hyperparameter -8. $bisection \leftarrow False$ +8. $bisection \leftarrow False$ // hyperparameter -9. $error \leftarrow \inf$ +9. $finished \leftarrow False$ -10. $finished \leftarrow False$ +10. $AODE \leftarrow \emptyset$ // the ensemble -11. $AODE \leftarrow \emptyset$ // the ensemble +11. $tolerance \leftarrow 0$ -12. $tolerance \leftarrow 0$ +12. $numModelsInPack \leftarrow 0$ -13. $numModelsInPack \leftarrow 0$ +13. $maxAccuracy \leftarrow -1$ + +14. 15. // main loop -16. While (!finished) +16. While $(\lnot finished)$ 1. $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$ 2. $k \leftarrow 2^{tolerance}$ - 3. if ($tolerance == 0$) - $numItemsPack \leftarrow0$ + 3. if ($tolerance == 0$) $numItemsPack \leftarrow0$ 4. $P \leftarrow Head(\pi,k)$ // first k features in order + 5. $spodes \leftarrow \emptyset$ + 6. $i \leftarrow 0$ 7. While ($i < size(P)$) @@ -63,35 +80,39 @@ 2. break - 10. $AODE.add( (spode,\alpha_t) )$ + 10. $spodes.add( (spode,\alpha_t) )$ 11. $W \leftarrow UpdateWeights(D[W],\alpha,y[],\hat{y}[])$ - 8. if ($convergence$ $\And$ $! finished$) + 8. $AODE.add( spodes )$ + + 9. if ($convergence \land \lnot finished$) 1. $\hat{y}[] \leftarrow AODE.Predict(D[W])$ - 2. $e \leftarrow error(\hat{y}[], y[])$ + 2. $actualAccuracy \leftarrow accuracy(\hat{y}[], y[])$ - 3. if $(e > (error+\delta))$ // result doesn't improve + 3. $if (maxAccuracy == -1)\; maxAccuracy \leftarrow actualAccuracy$ - 1. if $(tolerance == maxTolerance)\; finished\leftarrow True$ + 4. if $((accuracy - maxAccuracy) < \delta)$ // result doesn't + improve enough - 2. else $tolerance \leftarrow tolerance+1$ + 1. $tolerance \leftarrow tolerance + 1$ - 4. else + 5. else 1. $tolerance \leftarrow 0$ - 2. $error \leftarrow min(error,e)$ + 2. $numItemsPack \leftarrow 0$ - 9. if $(Vars == \emptyset) \; finished \leftarrow True$ + 10. If + $(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$ -17. if ($tolerance == maxTolerance$) // algorithm finished because of + 11. $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$ + +17. if ($tolerance > maxTolerance$) // algorithm finished because of lack of convergence 1. $removeModels(AODE, numItemsPack)$ - 2. $W \leftarrow W_B$ - 18. Return $AODE$ diff --git a/docs/algorithm.tex b/docs/algorithm.tex index 15ab8b0..3de221c 100644 --- a/docs/algorithm.tex +++ b/docs/algorithm.tex @@ -1,25 +1,37 @@ +\section{Algorithm} +\begin{itemize} +\item[] // notation +\item $n$ features ${\cal{X}} = \{X_1, \dots, X_n\}$ and the class $Y$ +\item $m$ instances. +\item $D = \{ (x_1^i, \dots, x_n^i, y^i) \}_{i=1}^{m}$ +\item $W$ a weights vector. $W_0$ are the initial weights. +\item $D[W]$ dataset with weights $W$ for the instances. +\end{itemize} +\bigskip + + \begin{enumerate} \item[] // initialization \item $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$ \item $W \leftarrow W_0$ \item $Vars \leftarrow {\cal{X}}$ \item $\delta \leftarrow 10^{-4}$ -\item $convergence \leftarrow True$ -\item $maxTolerancia \leftarrow 3$ -\item $bisection \leftarrow False$ -\item $error \leftarrow \inf$ +\item $convergence \leftarrow True$ // hyperparameter +\item $maxTolerancia \leftarrow 3$ // hyperparameter +\item $bisection \leftarrow False$ // hyperparameter \item $finished \leftarrow False$ \item $AODE \leftarrow \emptyset$ \hspace*{2cm} // the ensemble \item $tolerance \leftarrow 0$ \item $numModelsInPack \leftarrow 0$ +\item $maxAccuracy \leftarrow -1$ \item[] \newpage \item[] // main loop -\item While (!finished) +\item While $(\lnot finished)$ \begin{enumerate} \item $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$ - \item if $(bisection) \; k \leftarrow 2^{tolerance} \;$ else $k \leftarrow 1$ - \item if ($k tolerance == 0$) $W_B \leftarrow W$; $numItemsPack \leftarrow0$ + \item $k \leftarrow 2^{tolerance}$ + \item if ($tolerance == 0$) $numItemsPack \leftarrow0$ \item $P \leftarrow Head(\pi,k)$ \hspace*{2cm} // first k features in order \item $spodes \leftarrow \emptyset$ \item $i \leftarrow 0$ @@ -31,9 +43,9 @@ \item $Vars.remove(X)$ \item $spode \leftarrow BuildSpode(X, {\cal{X}}, D[W])$ \item $\hat{y}[] \leftarrow spode.Predict(D[W])$ - \item $e \leftarrow error(\hat{y}[], y[])$ - \item $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-e}{e} \right )$ - \item if ($\alpha > 0.5$) + \item $\epsilon \leftarrow error(\hat{y}[], y[])$ + \item $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-\epsilon}{\epsilon} \right )$ + \item if ($\epsilon > 0.5$) \begin{enumerate} \item $finished \leftarrow True$ \item break @@ -42,28 +54,27 @@ \item $W \leftarrow UpdateWeights(D[W],\alpha,y[],\hat{y}[])$ \end{enumerate} \item $AODE.add( spodes )$ - \item if ($convergence \And ! finished$) + \item if ($convergence \land \lnot finished$) \begin{enumerate} - \item $\hat{y}[] \leftarrow Predict(D,spodes)$ - \item $e \leftarrow error(\hat{y}[], y[])$ - \item if $(e > (error+\delta))$ \hspace*{2cm} // result doesn't improve + \item $\hat{y}[] \leftarrow AODE.Predict(D[W])$ + \item $actualAccuracy \leftarrow accuracy(\hat{y}[], y[])$ + \item $if (maxAccuracy == -1)\; maxAccuracy \leftarrow actualAccuracy$ + \item if $((accuracy - maxAccuracy) < \delta)$\hspace*{2cm} // result doesn't improve enough \begin{enumerate} - \item if $(tolerance == maxTolerance) \;\; finished\leftarrow True$ - \item else $tolerance \leftarrow tolerance+1$ + \item $tolerance \leftarrow tolerance + 1$ \end{enumerate} \item else \begin{enumerate} \item $tolerance \leftarrow 0$ - \item $error \leftarrow min(error,e)$ + \item $numItemsPack \leftarrow 0$ \end{enumerate} \end{enumerate} - \item If $(Vars == \emptyset) \; finished \leftarrow True$ - + \item If $(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$ + \item $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$ \end{enumerate} -\item if ($tolerance == maxTolerance$) // algorithm finished because of lack of convergence +\item if ($tolerance > maxTolerance$) \hspace*{1cm} // algorithm finished because of lack of convergence \begin{enumerate} \item $removeModels(AODE, numItemsPack)$ - \item $W \leftarrow W_B$ \end{enumerate} \item Return $AODE$ \end{enumerate} \ No newline at end of file