55 KiB
55 KiB
<html lang="en">
<head>
</head>
</html>
LCOV - code coverage report | ||||||||||||||||||||||
![]() | ||||||||||||||||||||||
|
||||||||||||||||||||||
![]() |
Line data Source code 1 : // *************************************************************** 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez 3 : // SPDX-FileType: SOURCE 4 : // SPDX-License-Identifier: MIT 5 : // *************************************************************** 6 : 7 : #include <set> 8 : #include <functional> 9 : #include <limits.h> 10 : #include <tuple> 11 : #include <folding.hpp> 12 : #include "bayesnet/feature_selection/CFS.h" 13 : #include "bayesnet/feature_selection/FCBF.h" 14 : #include "bayesnet/feature_selection/IWSS.h" 15 : #include "BoostAODE.h" 16 : 17 : namespace bayesnet { 18 : 19 41 : BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting) 20 : { 21 410 : validHyperparameters = { 22 : "maxModels", "bisection", "order", "convergence", "threshold", 23 : "select_features", "maxTolerance", "predict_voting", "block_update" 24 410 : }; 25 : 26 123 : } 27 21 : void BoostAODE::buildModel(const torch::Tensor& weights) 28 : { 29 : // Models shall be built in trainModel 30 21 : models.clear(); 31 21 : significanceModels.clear(); 32 21 : n_models = 0; 33 : // Prepare the validation dataset 34 63 : auto y_ = dataset.index({ -1, "..." }); 35 21 : if (convergence) { 36 : // Prepare train & validation sets from train data 37 17 : auto fold = folding::StratifiedKFold(5, y_, 271); 38 17 : auto [train, test] = fold.getFold(0); 39 17 : auto train_t = torch::tensor(train); 40 17 : auto test_t = torch::tensor(test); 41 : // Get train and validation sets 42 85 : X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t }); 43 51 : y_train = dataset.index({ -1, train_t }); 44 85 : X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t }); 45 51 : y_test = dataset.index({ -1, test_t }); 46 17 : dataset = X_train; 47 17 : m = X_train.size(1); 48 17 : auto n_classes = states.at(className).size(); 49 : // Build dataset with train data 50 17 : buildDataset(y_train); 51 17 : metrics = Metrics(dataset, features, className, n_classes); 52 17 : } else { 53 : // Use all data to train 54 16 : X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }); 55 4 : y_train = y_; 56 : } 57 203 : } 58 20 : void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_) 59 : { 60 20 : auto hyperparameters = hyperparameters_; 61 20 : if (hyperparameters.contains("order")) { 62 25 : std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND }; 63 5 : order_algorithm = hyperparameters["order"]; 64 5 : if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) { 65 1 : throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC + ", " + Orders.RAND + "]"); 66 : } 67 4 : hyperparameters.erase("order"); 68 5 : } 69 19 : if (hyperparameters.contains("convergence")) { 70 7 : convergence = hyperparameters["convergence"]; 71 7 : hyperparameters.erase("convergence"); 72 : } 73 19 : if (hyperparameters.contains("bisection")) { 74 6 : bisection = hyperparameters["bisection"]; 75 6 : hyperparameters.erase("bisection"); 76 : } 77 19 : if (hyperparameters.contains("threshold")) { 78 6 : threshold = hyperparameters["threshold"]; 79 6 : hyperparameters.erase("threshold"); 80 : } 81 19 : if (hyperparameters.contains("maxTolerance")) { 82 9 : maxTolerance = hyperparameters["maxTolerance"]; 83 9 : if (maxTolerance < 1 || maxTolerance > 4) 84 3 : throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]"); 85 6 : hyperparameters.erase("maxTolerance"); 86 : } 87 16 : if (hyperparameters.contains("predict_voting")) { 88 1 : predict_voting = hyperparameters["predict_voting"]; 89 1 : hyperparameters.erase("predict_voting"); 90 : } 91 16 : if (hyperparameters.contains("select_features")) { 92 9 : auto selectedAlgorithm = hyperparameters["select_features"]; 93 45 : std::vector<std::string> algos = { SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF }; 94 9 : selectFeatures = true; 95 9 : select_features_algorithm = selectedAlgorithm; 96 9 : if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) { 97 1 : throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " + SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]"); 98 : } 99 8 : hyperparameters.erase("select_features"); 100 10 : } 101 15 : if (hyperparameters.contains("block_update")) { 102 2 : block_update = hyperparameters["block_update"]; 103 2 : hyperparameters.erase("block_update"); 104 : } 105 15 : Classifier::setHyperparameters(hyperparameters); 106 34 : } 107 108 : std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights) 108 : { 109 108 : bool terminate = false; 110 108 : double alpha_t = 0; 111 108 : auto mask_wrong = ypred != ytrain; 112 108 : auto mask_right = ypred == ytrain; 113 108 : auto masked_weights = weights * mask_wrong.to(weights.dtype()); 114 108 : double epsilon_t = masked_weights.sum().item<double>(); 115 108 : if (epsilon_t > 0.5) { 116 : // Inverse the weights policy (plot ln(wt)) 117 : // "In each round of AdaBoost, there is a sanity check to ensure that the current base 118 : // learner is better than random guess" (Zhi-Hua Zhou, 2012) 119 4 : terminate = true; 120 : } else { 121 104 : double wt = (1 - epsilon_t) / epsilon_t; 122 104 : alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt); 123 : // Step 3.2: Update weights for next classifier 124 : // Step 3.2.1: Update weights of wrong samples 125 104 : weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights; 126 : // Step 3.2.2: Update weights of right samples 127 104 : weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights; 128 : // Step 3.3: Normalise the weights 129 104 : double totalWeights = torch::sum(weights).item<double>(); 130 104 : weights = weights / totalWeights; 131 : } 132 216 : return { weights, alpha_t, terminate }; 133 108 : } 134 5 : std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights) 135 : { 136 : /* Update Block algorithm 137 : k = # of models in block 138 : n_models = # of models in ensemble to make predictions 139 : n_models_bak = # models saved 140 : models = vector of models to make predictions 141 : models_bak = models not used to make predictions 142 : significances_bak = backup of significances vector 143 : 144 : Case list 145 : A) k = 1, n_models = 1 => n = 0 , n_models = n + k 146 : B) k = 1, n_models = n + 1 => n_models = n + k 147 : C) k > 1, n_models = k + 1 => n= 1, n_models = n + k 148 : D) k > 1, n_models = k => n = 0, n_models = n + k 149 : E) k > 1, n_models = k + n => n_models = n + k 150 : 151 : A, D) n=0, k > 0, n_models == k 152 : 1. n_models_bak <- n_models 153 : 2. significances_bak <- significances 154 : 3. significances = vector(k, 1) 155 : 4. Don’t move any classifiers out of models 156 : 5. n_models <- k 157 : 6. Make prediction, compute alpha, update weights 158 : 7. Don’t restore any classifiers to models 159 : 8. significances <- significances_bak 160 : 9. Update last k significances 161 : 10. n_models <- n_models_bak 162 : 163 : B, C, E) n > 0, k > 0, n_models == n + k 164 : 1. n_models_bak <- n_models 165 : 2. significances_bak <- significances 166 : 3. significances = vector(k, 1) 167 : 4. Move first n classifiers to models_bak 168 : 5. n_models <- k 169 : 6. Make prediction, compute alpha, update weights 170 : 7. Insert classifiers in models_bak to be the first n models 171 : 8. significances <- significances_bak 172 : 9. Update last k significances 173 : 10. n_models <- n_models_bak 174 : */ 175 : // 176 : // Make predict with only the last k models 177 : // 178 5 : std::unique_ptr<Classifier> model; 179 5 : std::vector<std::unique_ptr<Classifier>> models_bak; 180 : // 1. n_models_bak <- n_models 2. significances_bak <- significances 181 5 : auto significance_bak = significanceModels; 182 5 : auto n_models_bak = n_models; 183 : // 3. significances = vector(k, 1) 184 5 : significanceModels = std::vector<double>(k, 1.0); 185 : // 4. Move first n classifiers to models_bak 186 : // backup the first n_models - k models (if n_models == k, don't backup any) 187 20 : for (int i = 0; i < n_models - k; ++i) { 188 15 : model = std::move(models[0]); 189 15 : models.erase(models.begin()); 190 15 : models_bak.push_back(std::move(model)); 191 : } 192 5 : assert(models.size() == k); 193 : // 5. n_models <- k 194 5 : n_models = k; 195 : // 6. Make prediction, compute alpha, update weights 196 5 : auto ypred = predict(X_train); 197 : // 198 : // Update weights 199 : // 200 : double alpha_t; 201 : bool terminate; 202 5 : std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights); 203 : // 204 : // Restore the models if needed 205 : // 206 : // 7. Insert classifiers in models_bak to be the first n models 207 : // if n_models_bak == k, don't restore any, because none of them were moved 208 5 : if (k != n_models_bak) { 209 : // Insert in the same order as they were extracted 210 4 : int bak_size = models_bak.size(); 211 19 : for (int i = 0; i < bak_size; ++i) { 212 15 : model = std::move(models_bak[bak_size - 1 - i]); 213 15 : models_bak.erase(models_bak.end() - 1); 214 15 : models.insert(models.begin(), std::move(model)); 215 : } 216 : } 217 : // 8. significances <- significances_bak 218 5 : significanceModels = significance_bak; 219 : // 220 : // Update the significance of the last k models 221 : // 222 : // 9. Update last k significances 223 21 : for (int i = 0; i < k; ++i) { 224 16 : significanceModels[n_models_bak - k + i] = alpha_t; 225 : } 226 : // 10. n_models <- n_models_bak 227 5 : n_models = n_models_bak; 228 10 : return { weights, alpha_t, terminate }; 229 5 : } 230 8 : std::vector<int> BoostAODE::initializeModels() 231 : { 232 8 : std::vector<int> featuresUsed; 233 8 : torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); 234 8 : int maxFeatures = 0; 235 8 : if (select_features_algorithm == SelectFeatures.CFS) { 236 2 : featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_); 237 6 : } else if (select_features_algorithm == SelectFeatures.IWSS) { 238 3 : if (threshold < 0 || threshold >0.5) { 239 2 : throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]"); 240 : } 241 1 : featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); 242 3 : } else if (select_features_algorithm == SelectFeatures.FCBF) { 243 3 : if (threshold < 1e-7 || threshold > 1) { 244 2 : throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]"); 245 : } 246 1 : featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); 247 : } 248 4 : featureSelector->fit(); 249 4 : auto cfsFeatures = featureSelector->getFeatures(); 250 4 : auto scores = featureSelector->getScores(); 251 25 : for (const int& feature : cfsFeatures) { 252 21 : featuresUsed.push_back(feature); 253 21 : std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature); 254 21 : model->fit(dataset, features, className, states, weights_); 255 21 : models.push_back(std::move(model)); 256 21 : significanceModels.push_back(1.0); // They will be updated later in trainModel 257 21 : n_models++; 258 21 : } 259 4 : notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); 260 4 : delete featureSelector; 261 8 : return featuresUsed; 262 12 : } 263 21 : void BoostAODE::trainModel(const torch::Tensor& weights) 264 : { 265 : // Algorithm based on the adaboost algorithm for classification 266 : // as explained in Ensemble methods (Zhi-Hua Zhou, 2012) 267 21 : fitted = true; 268 21 : double alpha_t = 0; 269 21 : torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); 270 21 : bool finished = false; 271 21 : std::vector<int> featuresUsed; 272 21 : if (selectFeatures) { 273 8 : featuresUsed = initializeModels(); 274 4 : auto ypred = predict(X_train); 275 4 : std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); 276 : // Update significance of the models 277 25 : for (int i = 0; i < n_models; ++i) { 278 21 : significanceModels[i] = alpha_t; 279 : } 280 4 : if (finished) { 281 0 : return; 282 : } 283 4 : } 284 17 : int numItemsPack = 0; // The counter of the models inserted in the current pack 285 : // Variables to control the accuracy finish condition 286 17 : double priorAccuracy = 0.0; 287 17 : double improvement = 1.0; 288 17 : double convergence_threshold = 1e-4; 289 17 : int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold 290 : // Step 0: Set the finish condition 291 : // epsilon sub t > 0.5 => inverse the weights policy 292 : // validation error is not decreasing 293 : // run out of features 294 17 : bool ascending = order_algorithm == Orders.ASC; 295 17 : std::mt19937 g{ 173 }; 296 99 : while (!finished) { 297 : // Step 1: Build ranking with mutual information 298 82 : auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted 299 82 : if (order_algorithm == Orders.RAND) { 300 9 : std::shuffle(featureSelection.begin(), featureSelection.end(), g); 301 : } 302 : // Remove used features 303 164 : featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x) 304 10764 : { return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}), 305 82 : end(featureSelection) 306 : ); 307 82 : int k = pow(2, tolerance); 308 82 : int counter = 0; // The model counter of the current pack 309 197 : while (counter++ < k && featureSelection.size() > 0) { 310 115 : auto feature = featureSelection[0]; 311 115 : featureSelection.erase(featureSelection.begin()); 312 115 : std::unique_ptr<Classifier> model; 313 115 : model = std::make_unique<SPODE>(feature); 314 115 : model->fit(dataset, features, className, states, weights_); 315 115 : alpha_t = 0.0; 316 115 : if (!block_update) { 317 99 : auto ypred = model->predict(X_train); 318 : // Step 3.1: Compute the classifier amout of say 319 99 : std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); 320 99 : } 321 : // Step 3.4: Store classifier and its accuracy to weigh its future vote 322 115 : numItemsPack++; 323 115 : featuresUsed.push_back(feature); 324 115 : models.push_back(std::move(model)); 325 115 : significanceModels.push_back(alpha_t); 326 115 : n_models++; 327 115 : } 328 82 : if (block_update) { 329 5 : std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_); 330 : } 331 82 : if (convergence && !finished) { 332 49 : auto y_val_predict = predict(X_test); 333 49 : double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0); 334 49 : if (priorAccuracy == 0) { 335 13 : priorAccuracy = accuracy; 336 : } else { 337 36 : improvement = accuracy - priorAccuracy; 338 : } 339 49 : if (improvement < convergence_threshold) { 340 32 : tolerance++; 341 : } else { 342 17 : tolerance = 0; // Reset the counter if the model performs better 343 17 : numItemsPack = 0; 344 : } 345 : // Keep the best accuracy until now as the prior accuracy 346 49 : priorAccuracy = std::max(accuracy, priorAccuracy); 347 : // priorAccuracy = accuracy; 348 49 : } 349 82 : finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size(); 350 82 : } 351 17 : if (tolerance > maxTolerance) { 352 2 : if (numItemsPack < n_models) { 353 2 : notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated"); 354 32 : for (int i = 0; i < numItemsPack; ++i) { 355 30 : significanceModels.pop_back(); 356 30 : models.pop_back(); 357 30 : n_models--; 358 : } 359 : } else { 360 0 : notes.push_back("Convergence threshold reached & 0 models eliminated"); 361 : } 362 : } 363 17 : if (featuresUsed.size() != features.size()) { 364 2 : notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size())); 365 2 : status = WARNING; 366 : } 367 17 : notes.push_back("Number of models: " + std::to_string(n_models)); 368 25 : } 369 1 : std::vector<std::string> BoostAODE::graph(const std::string& title) const 370 : { 371 1 : return Ensemble::graph(title); 372 : } 373 : } |
![]() |
Generated by: LCOV version 2.0-1 |
</html>