1// ***************************************************************
2// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
3// SPDX-FileType: SOURCE
4// SPDX-License-Identifier: MIT
5// ***************************************************************
7#include "bayesnet/feature_selection/CFS.h"
8#include "bayesnet/feature_selection/FCBF.h"
9#include "bayesnet/feature_selection/IWSS.h"
13 Boost::Boost(
bool predict_voting) : Ensemble(predict_voting)
15 validHyperparameters = {
"order",
"convergence",
"convergence_best",
"bisection",
"threshold",
"maxTolerance",
16 "predict_voting",
"select_features",
"block_update" };
18 void Boost::setHyperparameters(
const nlohmann::json& hyperparameters_)
20 auto hyperparameters = hyperparameters_;
21 if (hyperparameters.contains(
"order")) {
22 std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
23 order_algorithm = hyperparameters[
"order"];
24 if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
25 throw std::invalid_argument(
"Invalid order algorithm, valid values [" + Orders.ASC +
", " + Orders.DESC +
", " + Orders.RAND +
"]");
27 hyperparameters.erase(
"order");
29 if (hyperparameters.contains(
"convergence")) {
30 convergence = hyperparameters[
"convergence"];
31 hyperparameters.erase(
"convergence");
33 if (hyperparameters.contains(
"convergence_best")) {
34 convergence_best = hyperparameters[
"convergence_best"];
35 hyperparameters.erase(
"convergence_best");
37 if (hyperparameters.contains(
"bisection")) {
38 bisection = hyperparameters[
"bisection"];
39 hyperparameters.erase(
"bisection");
41 if (hyperparameters.contains(
"threshold")) {
42 threshold = hyperparameters[
"threshold"];
43 hyperparameters.erase(
"threshold");
45 if (hyperparameters.contains(
"maxTolerance")) {
46 maxTolerance = hyperparameters[
"maxTolerance"];
47 if (maxTolerance < 1 || maxTolerance > 4)
48 throw std::invalid_argument(
"Invalid maxTolerance value, must be greater in [1, 4]");
49 hyperparameters.erase(
"maxTolerance");
51 if (hyperparameters.contains(
"predict_voting")) {
52 predict_voting = hyperparameters[
"predict_voting"];
53 hyperparameters.erase(
"predict_voting");
55 if (hyperparameters.contains(
"select_features")) {
56 auto selectedAlgorithm = hyperparameters[
"select_features"];
57 std::vector<std::string> algos = { SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF };
58 selectFeatures =
true;
59 select_features_algorithm = selectedAlgorithm;
60 if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
61 throw std::invalid_argument(
"Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS +
", " + SelectFeatures.CFS +
", " + SelectFeatures.FCBF +
"]");
63 hyperparameters.erase(
"select_features");
65 if (hyperparameters.contains(
"block_update")) {
66 block_update = hyperparameters[
"block_update"];
67 hyperparameters.erase(
"block_update");
69 Classifier::setHyperparameters(hyperparameters);
71 void Boost::buildModel(
const torch::Tensor& weights)
73 // Models shall be built in trainModel
75 significanceModels.clear();
77 // Prepare the validation dataset
78 auto y_ = dataset.index({ -1,
"..." });
80 // Prepare train & validation sets from train data
81 auto fold = folding::StratifiedKFold(5, y_, 271);
82 auto [train, test] = fold.getFold(0);
83 auto train_t = torch::tensor(train);
84 auto test_t = torch::tensor(test);
85 // Get train and validation sets
86 X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
87 y_train = dataset.index({ -1, train_t });
88 X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
89 y_test = dataset.index({ -1, test_t });
92 auto n_classes = states.at(className).size();
93 // Build dataset with train data
94 buildDataset(y_train);
95 metrics = Metrics(dataset, features, className, n_classes);
97 // Use all data to train
98 X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1),
"..." });
102 std::vector<int> Boost::featureSelection(torch::Tensor& weights_)
105 if (select_features_algorithm == SelectFeatures.CFS) {
106 featureSelector =
new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
107 }
else if (select_features_algorithm == SelectFeatures.IWSS) {
108 if (threshold < 0 || threshold >0.5) {
109 throw std::invalid_argument(
"Invalid threshold value for " + SelectFeatures.IWSS +
" [0, 0.5]");
111 featureSelector =
new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
112 }
else if (select_features_algorithm == SelectFeatures.FCBF) {
113 if (threshold < 1e-7 || threshold > 1) {
114 throw std::invalid_argument(
"Invalid threshold value for " + SelectFeatures.FCBF +
" [1e-7, 1]");
116 featureSelector =
new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
118 featureSelector->fit();
119 auto featuresUsed = featureSelector->getFeatures();
120 delete featureSelector;
123 std::tuple<torch::Tensor&, double, bool> Boost::update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
125 bool terminate =
false;
127 auto mask_wrong = ypred != ytrain;
128 auto mask_right = ypred == ytrain;
129 auto masked_weights = weights * mask_wrong.to(weights.dtype());
130 double epsilon_t = masked_weights.sum().item<
double>();
131 if (epsilon_t > 0.5) {
132 // Inverse the weights policy (plot ln(wt))
133 // "In each round of AdaBoost, there is a sanity check to ensure that the current base
134 // learner is better than random guess" (Zhi-Hua Zhou, 2012)
137 double wt = (1 - epsilon_t) / epsilon_t;
138 alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
139 // Step 3.2: Update weights for next classifier
140 // Step 3.2.1: Update weights of wrong samples
141 weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights;
142 // Step 3.2.2: Update weights of right samples
143 weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights;
144 // Step 3.3: Normalise the weights
145 double totalWeights = torch::sum(weights).item<
double>();
146 weights = weights / totalWeights;
148 return { weights, alpha_t, terminate };
150 std::tuple<torch::Tensor&, double, bool> Boost::update_weights_block(
int k, torch::Tensor& ytrain, torch::Tensor& weights)
152 /* Update Block algorithm
153 k = # of models in block
154 n_models = # of models in ensemble to make predictions
155 n_models_bak = # models saved
156 models = vector of models to make predictions
157 models_bak = models not used to make predictions
158 significances_bak = backup of significances vector
161 A) k = 1, n_models = 1 => n = 0 , n_models = n + k
162 B) k = 1, n_models = n + 1 => n_models = n + k
163 C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
164 D) k > 1, n_models = k => n = 0, n_models = n + k
165 E) k > 1, n_models = k + n => n_models = n + k
167 A, D) n=0, k > 0, n_models == k
168 1. n_models_bak <- n_models
169 2. significances_bak <- significances
170 3. significances = vector(k, 1)
171 4. Don’t move any classifiers out of models
173 6. Make prediction, compute alpha, update weights
174 7. Don’t restore any classifiers to models
175 8. significances <- significances_bak
176 9. Update last k significances
177 10. n_models <- n_models_bak
179 B, C, E) n > 0, k > 0, n_models == n + k
180 1. n_models_bak <- n_models
181 2. significances_bak <- significances
182 3. significances = vector(k, 1)
183 4. Move first n classifiers to models_bak
185 6. Make prediction, compute alpha, update weights
186 7. Insert classifiers in models_bak to be the first n models
187 8. significances <- significances_bak
188 9. Update last k significances
189 10. n_models <- n_models_bak
192 // Make predict with only the last k models
194 std::unique_ptr<Classifier> model;
195 std::vector<std::unique_ptr<Classifier>> models_bak;
196 // 1. n_models_bak <- n_models 2. significances_bak <- significances
197 auto significance_bak = significanceModels;
198 auto n_models_bak = n_models;
199 // 3. significances = vector(k, 1)
200 significanceModels = std::vector<double>(k, 1.0);
201 // 4. Move first n classifiers to models_bak
202 // backup the first n_models - k models (if n_models == k, don't backup any)
203 for (
int i = 0; i < n_models - k; ++i) {
204 model = std::move(models[0]);
205 models.erase(models.begin());
206 models_bak.push_back(std::move(model));
208 assert(models.size() == k);
211 // 6. Make prediction, compute alpha, update weights
212 auto ypred = predict(X_train);
218 std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
220 // Restore the models if needed
222 // 7. Insert classifiers in models_bak to be the first n models
223 // if n_models_bak == k, don't restore any, because none of them were moved
224 if (k != n_models_bak) {
225 // Insert in the same order as they were extracted
226 int bak_size = models_bak.size();
227 for (
int i = 0; i < bak_size; ++i) {
228 model = std::move(models_bak[bak_size - 1 - i]);
229 models_bak.erase(models_bak.end() - 1);
230 models.insert(models.begin(), std::move(model));
233 // 8. significances <- significances_bak
234 significanceModels = significance_bak;
236 // Update the significance of the last k models
238 // 9. Update last k significances
239 for (
int i = 0; i < k; ++i) {
240 significanceModels[n_models_bak - k + i] = alpha_t;
242 // 10. n_models <- n_models_bak
243 n_models = n_models_bak;
244 return { weights, alpha_t, terminate };