Line data Source code
1 : // ***************************************************************
2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
3 : // SPDX-FileType: SOURCE
4 : // SPDX-License-Identifier: MIT
5 : // ***************************************************************
6 :
7 : #include <set>
8 : #include <functional>
9 : #include <limits.h>
10 : #include <tuple>
11 : #include <folding.hpp>
12 : #include "bayesnet/feature_selection/CFS.h"
13 : #include "bayesnet/feature_selection/FCBF.h"
14 : #include "bayesnet/feature_selection/IWSS.h"
15 : #include "BoostAODE.h"
16 :
17 : namespace bayesnet {
18 :
19 41 : BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
20 : {
21 410 : validHyperparameters = {
22 : "maxModels", "bisection", "order", "convergence", "threshold",
23 : "select_features", "maxTolerance", "predict_voting", "block_update"
24 410 : };
25 :
26 123 : }
27 21 : void BoostAODE::buildModel(const torch::Tensor& weights)
28 : {
29 : // Models shall be built in trainModel
30 21 : models.clear();
31 21 : significanceModels.clear();
32 21 : n_models = 0;
33 : // Prepare the validation dataset
34 63 : auto y_ = dataset.index({ -1, "..." });
35 21 : if (convergence) {
36 : // Prepare train & validation sets from train data
37 17 : auto fold = folding::StratifiedKFold(5, y_, 271);
38 17 : auto [train, test] = fold.getFold(0);
39 17 : auto train_t = torch::tensor(train);
40 17 : auto test_t = torch::tensor(test);
41 : // Get train and validation sets
42 85 : X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
43 51 : y_train = dataset.index({ -1, train_t });
44 85 : X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
45 51 : y_test = dataset.index({ -1, test_t });
46 17 : dataset = X_train;
47 17 : m = X_train.size(1);
48 17 : auto n_classes = states.at(className).size();
49 : // Build dataset with train data
50 17 : buildDataset(y_train);
51 17 : metrics = Metrics(dataset, features, className, n_classes);
52 17 : } else {
53 : // Use all data to train
54 16 : X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
55 4 : y_train = y_;
56 : }
57 203 : }
58 20 : void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
59 : {
60 20 : auto hyperparameters = hyperparameters_;
61 20 : if (hyperparameters.contains("order")) {
62 25 : std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
63 5 : order_algorithm = hyperparameters["order"];
64 5 : if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
65 1 : throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC + ", " + Orders.RAND + "]");
66 : }
67 4 : hyperparameters.erase("order");
68 5 : }
69 19 : if (hyperparameters.contains("convergence")) {
70 7 : convergence = hyperparameters["convergence"];
71 7 : hyperparameters.erase("convergence");
72 : }
73 19 : if (hyperparameters.contains("bisection")) {
74 6 : bisection = hyperparameters["bisection"];
75 6 : hyperparameters.erase("bisection");
76 : }
77 19 : if (hyperparameters.contains("threshold")) {
78 6 : threshold = hyperparameters["threshold"];
79 6 : hyperparameters.erase("threshold");
80 : }
81 19 : if (hyperparameters.contains("maxTolerance")) {
82 9 : maxTolerance = hyperparameters["maxTolerance"];
83 9 : if (maxTolerance < 1 || maxTolerance > 4)
84 3 : throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]");
85 6 : hyperparameters.erase("maxTolerance");
86 : }
87 16 : if (hyperparameters.contains("predict_voting")) {
88 1 : predict_voting = hyperparameters["predict_voting"];
89 1 : hyperparameters.erase("predict_voting");
90 : }
91 16 : if (hyperparameters.contains("select_features")) {
92 9 : auto selectedAlgorithm = hyperparameters["select_features"];
93 45 : std::vector<std::string> algos = { SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF };
94 9 : selectFeatures = true;
95 9 : select_features_algorithm = selectedAlgorithm;
96 9 : if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
97 1 : throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " + SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
98 : }
99 8 : hyperparameters.erase("select_features");
100 10 : }
101 15 : if (hyperparameters.contains("block_update")) {
102 2 : block_update = hyperparameters["block_update"];
103 2 : hyperparameters.erase("block_update");
104 : }
105 15 : Classifier::setHyperparameters(hyperparameters);
106 34 : }
107 108 : std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
108 : {
109 108 : bool terminate = false;
110 108 : double alpha_t = 0;
111 108 : auto mask_wrong = ypred != ytrain;
112 108 : auto mask_right = ypred == ytrain;
113 108 : auto masked_weights = weights * mask_wrong.to(weights.dtype());
114 108 : double epsilon_t = masked_weights.sum().item<double>();
115 108 : if (epsilon_t > 0.5) {
116 : // Inverse the weights policy (plot ln(wt))
117 : // "In each round of AdaBoost, there is a sanity check to ensure that the current base
118 : // learner is better than random guess" (Zhi-Hua Zhou, 2012)
119 4 : terminate = true;
120 : } else {
121 104 : double wt = (1 - epsilon_t) / epsilon_t;
122 104 : alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
123 : // Step 3.2: Update weights for next classifier
124 : // Step 3.2.1: Update weights of wrong samples
125 104 : weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights;
126 : // Step 3.2.2: Update weights of right samples
127 104 : weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights;
128 : // Step 3.3: Normalise the weights
129 104 : double totalWeights = torch::sum(weights).item<double>();
130 104 : weights = weights / totalWeights;
131 : }
132 216 : return { weights, alpha_t, terminate };
133 108 : }
134 5 : std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
135 : {
136 : /* Update Block algorithm
137 : k = # of models in block
138 : n_models = # of models in ensemble to make predictions
139 : n_models_bak = # models saved
140 : models = vector of models to make predictions
141 : models_bak = models not used to make predictions
142 : significances_bak = backup of significances vector
143 :
144 : Case list
145 : A) k = 1, n_models = 1 => n = 0 , n_models = n + k
146 : B) k = 1, n_models = n + 1 => n_models = n + k
147 : C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
148 : D) k > 1, n_models = k => n = 0, n_models = n + k
149 : E) k > 1, n_models = k + n => n_models = n + k
150 :
151 : A, D) n=0, k > 0, n_models == k
152 : 1. n_models_bak <- n_models
153 : 2. significances_bak <- significances
154 : 3. significances = vector(k, 1)
155 : 4. Don’t move any classifiers out of models
156 : 5. n_models <- k
157 : 6. Make prediction, compute alpha, update weights
158 : 7. Don’t restore any classifiers to models
159 : 8. significances <- significances_bak
160 : 9. Update last k significances
161 : 10. n_models <- n_models_bak
162 :
163 : B, C, E) n > 0, k > 0, n_models == n + k
164 : 1. n_models_bak <- n_models
165 : 2. significances_bak <- significances
166 : 3. significances = vector(k, 1)
167 : 4. Move first n classifiers to models_bak
168 : 5. n_models <- k
169 : 6. Make prediction, compute alpha, update weights
170 : 7. Insert classifiers in models_bak to be the first n models
171 : 8. significances <- significances_bak
172 : 9. Update last k significances
173 : 10. n_models <- n_models_bak
174 : */
175 : //
176 : // Make predict with only the last k models
177 : //
178 5 : std::unique_ptr<Classifier> model;
179 5 : std::vector<std::unique_ptr<Classifier>> models_bak;
180 : // 1. n_models_bak <- n_models 2. significances_bak <- significances
181 5 : auto significance_bak = significanceModels;
182 5 : auto n_models_bak = n_models;
183 : // 3. significances = vector(k, 1)
184 5 : significanceModels = std::vector<double>(k, 1.0);
185 : // 4. Move first n classifiers to models_bak
186 : // backup the first n_models - k models (if n_models == k, don't backup any)
187 20 : for (int i = 0; i < n_models - k; ++i) {
188 15 : model = std::move(models[0]);
189 15 : models.erase(models.begin());
190 15 : models_bak.push_back(std::move(model));
191 : }
192 5 : assert(models.size() == k);
193 : // 5. n_models <- k
194 5 : n_models = k;
195 : // 6. Make prediction, compute alpha, update weights
196 5 : auto ypred = predict(X_train);
197 : //
198 : // Update weights
199 : //
200 : double alpha_t;
201 : bool terminate;
202 5 : std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
203 : //
204 : // Restore the models if needed
205 : //
206 : // 7. Insert classifiers in models_bak to be the first n models
207 : // if n_models_bak == k, don't restore any, because none of them were moved
208 5 : if (k != n_models_bak) {
209 : // Insert in the same order as they were extracted
210 4 : int bak_size = models_bak.size();
211 19 : for (int i = 0; i < bak_size; ++i) {
212 15 : model = std::move(models_bak[bak_size - 1 - i]);
213 15 : models_bak.erase(models_bak.end() - 1);
214 15 : models.insert(models.begin(), std::move(model));
215 : }
216 : }
217 : // 8. significances <- significances_bak
218 5 : significanceModels = significance_bak;
219 : //
220 : // Update the significance of the last k models
221 : //
222 : // 9. Update last k significances
223 21 : for (int i = 0; i < k; ++i) {
224 16 : significanceModels[n_models_bak - k + i] = alpha_t;
225 : }
226 : // 10. n_models <- n_models_bak
227 5 : n_models = n_models_bak;
228 10 : return { weights, alpha_t, terminate };
229 5 : }
230 8 : std::vector<int> BoostAODE::initializeModels()
231 : {
232 8 : std::vector<int> featuresUsed;
233 8 : torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
234 8 : int maxFeatures = 0;
235 8 : if (select_features_algorithm == SelectFeatures.CFS) {
236 2 : featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
237 6 : } else if (select_features_algorithm == SelectFeatures.IWSS) {
238 3 : if (threshold < 0 || threshold >0.5) {
239 2 : throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]");
240 : }
241 1 : featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
242 3 : } else if (select_features_algorithm == SelectFeatures.FCBF) {
243 3 : if (threshold < 1e-7 || threshold > 1) {
244 2 : throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]");
245 : }
246 1 : featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
247 : }
248 4 : featureSelector->fit();
249 4 : auto cfsFeatures = featureSelector->getFeatures();
250 4 : auto scores = featureSelector->getScores();
251 25 : for (const int& feature : cfsFeatures) {
252 21 : featuresUsed.push_back(feature);
253 21 : std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
254 21 : model->fit(dataset, features, className, states, weights_);
255 21 : models.push_back(std::move(model));
256 21 : significanceModels.push_back(1.0); // They will be updated later in trainModel
257 21 : n_models++;
258 21 : }
259 4 : notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
260 4 : delete featureSelector;
261 8 : return featuresUsed;
262 12 : }
263 21 : void BoostAODE::trainModel(const torch::Tensor& weights)
264 : {
265 : // Algorithm based on the adaboost algorithm for classification
266 : // as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
267 21 : fitted = true;
268 21 : double alpha_t = 0;
269 21 : torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
270 21 : bool finished = false;
271 21 : std::vector<int> featuresUsed;
272 21 : if (selectFeatures) {
273 8 : featuresUsed = initializeModels();
274 4 : auto ypred = predict(X_train);
275 4 : std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
276 : // Update significance of the models
277 25 : for (int i = 0; i < n_models; ++i) {
278 21 : significanceModels[i] = alpha_t;
279 : }
280 4 : if (finished) {
281 0 : return;
282 : }
283 4 : }
284 17 : int numItemsPack = 0; // The counter of the models inserted in the current pack
285 : // Variables to control the accuracy finish condition
286 17 : double priorAccuracy = 0.0;
287 17 : double improvement = 1.0;
288 17 : double convergence_threshold = 1e-4;
289 17 : int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
290 : // Step 0: Set the finish condition
291 : // epsilon sub t > 0.5 => inverse the weights policy
292 : // validation error is not decreasing
293 : // run out of features
294 17 : bool ascending = order_algorithm == Orders.ASC;
295 17 : std::mt19937 g{ 173 };
296 99 : while (!finished) {
297 : // Step 1: Build ranking with mutual information
298 82 : auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
299 82 : if (order_algorithm == Orders.RAND) {
300 9 : std::shuffle(featureSelection.begin(), featureSelection.end(), g);
301 : }
302 : // Remove used features
303 164 : featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x)
304 10764 : { return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
305 82 : end(featureSelection)
306 : );
307 82 : int k = pow(2, tolerance);
308 82 : int counter = 0; // The model counter of the current pack
309 197 : while (counter++ < k && featureSelection.size() > 0) {
310 115 : auto feature = featureSelection[0];
311 115 : featureSelection.erase(featureSelection.begin());
312 115 : std::unique_ptr<Classifier> model;
313 115 : model = std::make_unique<SPODE>(feature);
314 115 : model->fit(dataset, features, className, states, weights_);
315 115 : alpha_t = 0.0;
316 115 : if (!block_update) {
317 99 : auto ypred = model->predict(X_train);
318 : // Step 3.1: Compute the classifier amout of say
319 99 : std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
320 99 : }
321 : // Step 3.4: Store classifier and its accuracy to weigh its future vote
322 115 : numItemsPack++;
323 115 : featuresUsed.push_back(feature);
324 115 : models.push_back(std::move(model));
325 115 : significanceModels.push_back(alpha_t);
326 115 : n_models++;
327 115 : }
328 82 : if (block_update) {
329 5 : std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
330 : }
331 82 : if (convergence && !finished) {
332 49 : auto y_val_predict = predict(X_test);
333 49 : double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
334 49 : if (priorAccuracy == 0) {
335 13 : priorAccuracy = accuracy;
336 : } else {
337 36 : improvement = accuracy - priorAccuracy;
338 : }
339 49 : if (improvement < convergence_threshold) {
340 32 : tolerance++;
341 : } else {
342 17 : tolerance = 0; // Reset the counter if the model performs better
343 17 : numItemsPack = 0;
344 : }
345 : // Keep the best accuracy until now as the prior accuracy
346 49 : priorAccuracy = std::max(accuracy, priorAccuracy);
347 : // priorAccuracy = accuracy;
348 49 : }
349 82 : finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
350 82 : }
351 17 : if (tolerance > maxTolerance) {
352 2 : if (numItemsPack < n_models) {
353 2 : notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
354 32 : for (int i = 0; i < numItemsPack; ++i) {
355 30 : significanceModels.pop_back();
356 30 : models.pop_back();
357 30 : n_models--;
358 : }
359 : } else {
360 0 : notes.push_back("Convergence threshold reached & 0 models eliminated");
361 : }
362 : }
363 17 : if (featuresUsed.size() != features.size()) {
364 2 : notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
365 2 : status = WARNING;
366 : }
367 17 : notes.push_back("Number of models: " + std::to_string(n_models));
368 25 : }
369 1 : std::vector<std::string> BoostAODE::graph(const std::string& title) const
370 : {
371 1 : return Ensemble::graph(title);
372 : }
373 : }
|