2024-04-11 16:02:49 +00:00
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
2023-08-20 18:31:23 +00:00
# include <set>
2023-10-10 16:16:43 +00:00
# include <functional>
# include <limits.h>
2024-03-05 11:10:58 +00:00
# include <tuple>
2024-03-07 23:37:36 +00:00
# include <folding.hpp>
2024-03-08 21:20:54 +00:00
# include "bayesnet/feature_selection/CFS.h"
# include "bayesnet/feature_selection/FCBF.h"
# include "bayesnet/feature_selection/IWSS.h"
2023-10-10 16:16:43 +00:00
# include "BoostAODE.h"
2023-08-15 14:16:04 +00:00
2024-03-20 10:30:02 +00:00
# include "bayesnet/utils/loguru.cpp"
2023-08-15 14:16:04 +00:00
namespace bayesnet {
2024-04-02 07:52:40 +00:00
2024-02-22 17:44:40 +00:00
BoostAODE : : BoostAODE ( bool predict_voting ) : Ensemble ( predict_voting )
2023-11-19 21:36:27 +00:00
{
2024-02-26 19:29:08 +00:00
validHyperparameters = {
2024-03-19 13:13:40 +00:00
" maxModels " , " bisection " , " order " , " convergence " , " threshold " ,
2024-04-08 21:36:05 +00:00
" select_features " , " maxTolerance " , " predict_voting " , " block_update "
2024-02-26 19:29:08 +00:00
} ;
2023-11-19 21:36:27 +00:00
}
2023-08-15 14:16:04 +00:00
void BoostAODE : : buildModel ( const torch : : Tensor & weights )
{
2023-08-18 11:43:13 +00:00
// Models shall be built in trainModel
2023-10-10 21:00:38 +00:00
models . clear ( ) ;
2024-02-24 17:36:09 +00:00
significanceModels . clear ( ) ;
2023-10-10 21:00:38 +00:00
n_models = 0 ;
2023-10-10 11:39:11 +00:00
// Prepare the validation dataset
2023-09-06 08:51:07 +00:00
auto y_ = dataset . index ( { - 1 , " ... " } ) ;
2023-09-10 17:50:36 +00:00
if ( convergence ) {
// Prepare train & validation sets from train data
2024-01-07 18:58:22 +00:00
auto fold = folding : : StratifiedKFold ( 5 , y_ , 271 ) ;
2023-09-10 17:50:36 +00:00
auto [ train , test ] = fold . getFold ( 0 ) ;
auto train_t = torch : : tensor ( train ) ;
auto test_t = torch : : tensor ( test ) ;
// Get train and validation sets
X_train = dataset . index ( { torch : : indexing : : Slice ( 0 , dataset . size ( 0 ) - 1 ) , train_t } ) ;
y_train = dataset . index ( { - 1 , train_t } ) ;
X_test = dataset . index ( { torch : : indexing : : Slice ( 0 , dataset . size ( 0 ) - 1 ) , test_t } ) ;
y_test = dataset . index ( { - 1 , test_t } ) ;
dataset = X_train ;
m = X_train . size ( 1 ) ;
auto n_classes = states . at ( className ) . size ( ) ;
// Build dataset with train data
buildDataset ( y_train ) ;
2024-03-20 22:33:02 +00:00
metrics = Metrics ( dataset , features , className , n_classes ) ;
2023-09-10 17:50:36 +00:00
} else {
// Use all data to train
X_train = dataset . index ( { torch : : indexing : : Slice ( 0 , dataset . size ( 0 ) - 1 ) , " ... " } ) ;
y_train = y_ ;
}
2023-10-10 11:39:11 +00:00
}
2023-11-29 11:33:50 +00:00
void BoostAODE : : setHyperparameters ( const nlohmann : : json & hyperparameters_ )
2023-10-10 11:39:11 +00:00
{
2023-11-29 11:33:50 +00:00
auto hyperparameters = hyperparameters_ ;
2024-02-26 16:07:57 +00:00
if ( hyperparameters . contains ( " order " ) ) {
2024-03-05 10:05:11 +00:00
std : : vector < std : : string > algos = { Orders . ASC , Orders . DESC , Orders . RAND } ;
2024-02-26 16:07:57 +00:00
order_algorithm = hyperparameters [ " order " ] ;
if ( std : : find ( algos . begin ( ) , algos . end ( ) , order_algorithm ) = = algos . end ( ) ) {
2024-03-05 10:05:11 +00:00
throw std : : invalid_argument ( " Invalid order algorithm, valid values [ " + Orders . ASC + " , " + Orders . DESC + " , " + Orders . RAND + " ] " ) ;
2024-02-26 16:07:57 +00:00
}
hyperparameters . erase ( " order " ) ;
2023-10-10 11:39:11 +00:00
}
if ( hyperparameters . contains ( " convergence " ) ) {
convergence = hyperparameters [ " convergence " ] ;
2023-11-29 11:33:50 +00:00
hyperparameters . erase ( " convergence " ) ;
2023-10-10 11:39:11 +00:00
}
2024-03-19 13:13:40 +00:00
if ( hyperparameters . contains ( " bisection " ) ) {
bisection = hyperparameters [ " bisection " ] ;
hyperparameters . erase ( " bisection " ) ;
}
2023-10-14 11:12:04 +00:00
if ( hyperparameters . contains ( " threshold " ) ) {
threshold = hyperparameters [ " threshold " ] ;
2023-11-29 11:33:50 +00:00
hyperparameters . erase ( " threshold " ) ;
}
2024-03-19 13:13:40 +00:00
if ( hyperparameters . contains ( " maxTolerance " ) ) {
maxTolerance = hyperparameters [ " maxTolerance " ] ;
if ( maxTolerance < 1 | | maxTolerance > 4 )
throw std : : invalid_argument ( " Invalid maxTolerance value, must be greater in [1, 4] " ) ;
hyperparameters . erase ( " maxTolerance " ) ;
2023-10-14 11:12:04 +00:00
}
2024-02-24 17:36:09 +00:00
if ( hyperparameters . contains ( " predict_voting " ) ) {
predict_voting = hyperparameters [ " predict_voting " ] ;
hyperparameters . erase ( " predict_voting " ) ;
}
2023-10-14 11:12:04 +00:00
if ( hyperparameters . contains ( " select_features " ) ) {
auto selectedAlgorithm = hyperparameters [ " select_features " ] ;
2024-03-06 17:24:27 +00:00
std : : vector < std : : string > algos = { SelectFeatures . IWSS , SelectFeatures . CFS , SelectFeatures . FCBF } ;
2023-10-14 11:12:04 +00:00
selectFeatures = true ;
2024-02-26 16:07:57 +00:00
select_features_algorithm = selectedAlgorithm ;
2023-11-08 17:45:35 +00:00
if ( std : : find ( algos . begin ( ) , algos . end ( ) , selectedAlgorithm ) = = algos . end ( ) ) {
2024-03-05 10:05:11 +00:00
throw std : : invalid_argument ( " Invalid selectFeatures value, valid values [ " + SelectFeatures . IWSS + " , " + SelectFeatures . CFS + " , " + SelectFeatures . FCBF + " ] " ) ;
2023-10-14 11:12:04 +00:00
}
2023-11-29 11:33:50 +00:00
hyperparameters . erase ( " select_features " ) ;
}
2024-04-08 21:36:05 +00:00
if ( hyperparameters . contains ( " block_update " ) ) {
block_update = hyperparameters [ " block_update " ] ;
hyperparameters . erase ( " block_update " ) ;
}
2024-04-07 22:55:30 +00:00
Classifier : : setHyperparameters ( hyperparameters ) ;
2023-09-06 08:51:07 +00:00
}
2024-03-05 11:10:58 +00:00
std : : tuple < torch : : Tensor & , double , bool > update_weights ( torch : : Tensor & ytrain , torch : : Tensor & ypred , torch : : Tensor & weights )
{
bool terminate = false ;
double alpha_t = 0 ;
auto mask_wrong = ypred ! = ytrain ;
auto mask_right = ypred = = ytrain ;
auto masked_weights = weights * mask_wrong . to ( weights . dtype ( ) ) ;
double epsilon_t = masked_weights . sum ( ) . item < double > ( ) ;
if ( epsilon_t > 0.5 ) {
// Inverse the weights policy (plot ln(wt))
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
// learner is better than random guess" (Zhi-Hua Zhou, 2012)
terminate = true ;
} else {
double wt = ( 1 - epsilon_t ) / epsilon_t ;
alpha_t = epsilon_t = = 0 ? 1 : 0.5 * log ( wt ) ;
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights + = mask_wrong . to ( weights . dtype ( ) ) * exp ( alpha_t ) * weights ;
// Step 3.2.2: Update weights of right samples
weights + = mask_right . to ( weights . dtype ( ) ) * exp ( - alpha_t ) * weights ;
// Step 3.3: Normalise the weights
double totalWeights = torch : : sum ( weights ) . item < double > ( ) ;
weights = weights / totalWeights ;
}
return { weights , alpha_t , terminate } ;
}
2024-04-09 22:55:36 +00:00
std : : tuple < torch : : Tensor & , double , bool > BoostAODE : : update_weights_block ( int k , torch : : Tensor & ytrain , torch : : Tensor & weights )
{
2024-04-10 22:02:43 +00:00
/* Update Block algorithm
k = # of models in block
n_models = # of models in ensemble to make predictions
n_models_bak = # models saved
models = vector of models to make predictions
models_bak = models not used to make predictions
significances_bak = backup of significances vector
Case list
A ) k = 1 , n_models = 1 = > n = 0 , n_models = n + k
B ) k = 1 , n_models = n + 1 = > n_models = n + k
C ) k > 1 , n_models = k + 1 = > n = 1 , n_models = n + k
D ) k > 1 , n_models = k = > n = 0 , n_models = n + k
E ) k > 1 , n_models = k + n = > n_models = n + k
A , D ) n = 0 , k > 0 , n_models = = k
1. n_models_bak < - n_models
2. significances_bak < - significances
3. significances = vector ( k , 1 )
4. Don ’ t move any classifiers out of models
5. n_models < - k
6. Make prediction , compute alpha , update weights
7. Don ’ t restore any classifiers to models
8. significances < - significances_bak
9. Update last k significances
10. n_models < - n_models_bak
B , C , E ) n > 0 , k > 0 , n_models = = n + k
1. n_models_bak < - n_models
2. significances_bak < - significances
3. significances = vector ( k , 1 )
4. Move first n classifiers to models_bak
5. n_models < - k
6. Make prediction , compute alpha , update weights
7. Insert classifiers in models_bak to be the first n models
8. significances < - significances_bak
9. Update last k significances
10. n_models < - n_models_bak
*/
2024-04-09 22:55:36 +00:00
//
// Make predict with only the last k models
//
std : : unique_ptr < Classifier > model ;
std : : vector < std : : unique_ptr < Classifier > > models_bak ;
2024-04-10 22:02:43 +00:00
// 1. n_models_bak <- n_models 2. significances_bak <- significances
2024-04-09 22:55:36 +00:00
auto significance_bak = significanceModels ;
auto n_models_bak = n_models ;
2024-04-10 22:02:43 +00:00
// 3. significances = vector(k, 1)
significanceModels = std : : vector < double > ( k , 1.0 ) ;
// 4. Move first n classifiers to models_bak
// backup the first n_models - k models (if n_models == k, don't backup any)
VLOG_SCOPE_F ( 1 , " upd_weights_block n_models=%d k=%d " , n_models , k ) ;
2024-04-09 22:55:36 +00:00
for ( int i = 0 ; i < n_models - k ; + + i ) {
model = std : : move ( models [ 0 ] ) ;
models . erase ( models . begin ( ) ) ;
models_bak . push_back ( std : : move ( model ) ) ;
}
assert ( models . size ( ) = = k ) ;
2024-04-10 22:02:43 +00:00
// 5. n_models <- k
2024-04-09 22:55:36 +00:00
n_models = k ;
2024-04-10 22:02:43 +00:00
// 6. Make prediction, compute alpha, update weights
2024-04-09 22:55:36 +00:00
auto ypred = predict ( X_train ) ;
//
// Update weights
//
double alpha_t ;
bool terminate ;
std : : tie ( weights , alpha_t , terminate ) = update_weights ( y_train , ypred , weights ) ;
//
// Restore the models if needed
//
2024-04-10 22:02:43 +00:00
// 7. Insert classifiers in models_bak to be the first n models
// if n_models_bak == k, don't restore any, because none of them were moved
2024-04-09 22:55:36 +00:00
if ( k ! = n_models_bak ) {
2024-04-10 22:02:43 +00:00
// Insert in the same order as they were extracted
int bak_size = models_bak . size ( ) ;
for ( int i = 0 ; i < bak_size ; + + i ) {
model = std : : move ( models_bak [ bak_size - 1 - i ] ) ;
models_bak . erase ( models_bak . end ( ) - 1 ) ;
2024-04-09 22:55:36 +00:00
models . insert ( models . begin ( ) , std : : move ( model ) ) ;
}
}
2024-04-10 22:02:43 +00:00
// 8. significances <- significances_bak
2024-04-09 22:55:36 +00:00
significanceModels = significance_bak ;
//
// Update the significance of the last k models
//
2024-04-10 22:02:43 +00:00
// 9. Update last k significances
2024-04-09 22:55:36 +00:00
for ( int i = 0 ; i < k ; + + i ) {
2024-04-10 22:02:43 +00:00
significanceModels [ n_models_bak - k + i ] = alpha_t ;
2024-04-09 22:55:36 +00:00
}
2024-04-10 22:02:43 +00:00
// 10. n_models <- n_models_bak
n_models = n_models_bak ;
2024-04-09 22:55:36 +00:00
return { weights , alpha_t , terminate } ;
}
2024-03-20 22:33:02 +00:00
std : : vector < int > BoostAODE : : initializeModels ( )
2023-10-10 09:52:39 +00:00
{
2024-03-20 22:33:02 +00:00
std : : vector < int > featuresUsed ;
2023-11-08 17:45:35 +00:00
torch : : Tensor weights_ = torch : : full ( { m } , 1.0 / m , torch : : kFloat64 ) ;
2023-10-13 10:29:25 +00:00
int maxFeatures = 0 ;
2024-03-05 10:05:11 +00:00
if ( select_features_algorithm = = SelectFeatures . CFS ) {
2023-10-14 11:12:04 +00:00
featureSelector = new CFS ( dataset , features , className , maxFeatures , states . at ( className ) . size ( ) , weights_ ) ;
2024-03-05 10:05:11 +00:00
} else if ( select_features_algorithm = = SelectFeatures . IWSS ) {
2023-10-14 11:12:04 +00:00
if ( threshold < 0 | | threshold > 0.5 ) {
2024-03-05 10:05:11 +00:00
throw std : : invalid_argument ( " Invalid threshold value for " + SelectFeatures . IWSS + " [0, 0.5] " ) ;
2023-10-14 11:12:04 +00:00
}
featureSelector = new IWSS ( dataset , features , className , maxFeatures , states . at ( className ) . size ( ) , weights_ , threshold ) ;
2024-03-05 10:05:11 +00:00
} else if ( select_features_algorithm = = SelectFeatures . FCBF ) {
2023-10-14 11:12:04 +00:00
if ( threshold < 1e-7 | | threshold > 1 ) {
2024-03-05 10:05:11 +00:00
throw std : : invalid_argument ( " Invalid threshold value for " + SelectFeatures . FCBF + " [1e-7, 1] " ) ;
2023-10-14 11:12:04 +00:00
}
featureSelector = new FCBF ( dataset , features , className , maxFeatures , states . at ( className ) . size ( ) , weights_ , threshold ) ;
}
featureSelector - > fit ( ) ;
auto cfsFeatures = featureSelector - > getFeatures ( ) ;
2024-04-02 20:53:00 +00:00
auto scores = featureSelector - > getScores ( ) ;
for ( int i = 0 ; i < cfsFeatures . size ( ) ; + + i ) {
LOG_F ( INFO , " Feature: %d Score: %f " , cfsFeatures [ i ] , scores [ i ] ) ;
}
2023-10-13 11:46:22 +00:00
for ( const int & feature : cfsFeatures ) {
2024-03-20 22:33:02 +00:00
featuresUsed . push_back ( feature ) ;
2023-11-08 17:45:35 +00:00
std : : unique_ptr < Classifier > model = std : : make_unique < SPODE > ( feature ) ;
2023-10-13 11:46:22 +00:00
model - > fit ( dataset , features , className , states , weights_ ) ;
models . push_back ( std : : move ( model ) ) ;
2024-04-09 22:55:36 +00:00
significanceModels . push_back ( 1.0 ) ; // They will be updated later in trainModel
2023-10-13 11:46:22 +00:00
n_models + + ;
2023-10-10 09:52:39 +00:00
}
2024-02-26 16:07:57 +00:00
notes . push_back ( " Used features in initialization: " + std : : to_string ( featuresUsed . size ( ) ) + " of " + std : : to_string ( features . size ( ) ) + " with " + select_features_algorithm ) ;
2023-10-14 11:12:04 +00:00
delete featureSelector ;
2023-10-11 09:33:29 +00:00
return featuresUsed ;
2023-10-10 09:52:39 +00:00
}
2023-08-16 17:05:18 +00:00
void BoostAODE : : trainModel ( const torch : : Tensor & weights )
{
2024-03-20 10:30:02 +00:00
//
// Logging setup
//
loguru : : set_thread_name ( " BoostAODE " ) ;
loguru : : g_stderr_verbosity = loguru : : Verbosity_OFF ; ;
loguru : : add_file ( " boostAODE.log " , loguru : : Truncate , loguru : : Verbosity_MAX ) ;
2024-03-06 16:04:16 +00:00
// Algorithm based on the adaboost algorithm for classification
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
2024-02-20 09:11:22 +00:00
fitted = true ;
2024-03-05 11:10:58 +00:00
double alpha_t = 0 ;
torch : : Tensor weights_ = torch : : full ( { m } , 1.0 / m , torch : : kFloat64 ) ;
2024-03-19 13:13:40 +00:00
bool finished = false ;
2024-03-20 22:33:02 +00:00
std : : vector < int > featuresUsed ;
2023-10-14 11:12:04 +00:00
if ( selectFeatures ) {
2023-10-11 09:33:29 +00:00
featuresUsed = initializeModels ( ) ;
2024-03-05 11:10:58 +00:00
auto ypred = predict ( X_train ) ;
2024-03-19 13:13:40 +00:00
std : : tie ( weights_ , alpha_t , finished ) = update_weights ( y_train , ypred , weights_ ) ;
2024-03-05 11:10:58 +00:00
// Update significance of the models
for ( int i = 0 ; i < n_models ; + + i ) {
significanceModels [ i ] = alpha_t ;
}
2024-03-19 13:13:40 +00:00
if ( finished ) {
2024-03-05 11:10:58 +00:00
return ;
}
2024-03-21 10:23:41 +00:00
LOG_F ( INFO , " Initial models: %d " , n_models ) ;
LOG_F ( INFO , " Significances: " ) ;
for ( int i = 0 ; i < n_models ; + + i ) {
LOG_F ( INFO , " i=%d significance=%f " , i , significanceModels [ i ] ) ;
}
2023-10-11 09:33:29 +00:00
}
2024-03-20 10:30:02 +00:00
int numItemsPack = 0 ; // The counter of the models inserted in the current pack
2023-09-07 09:27:35 +00:00
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0 ;
2024-03-20 22:33:02 +00:00
double improvement = 1.0 ;
2024-02-19 21:58:15 +00:00
double convergence_threshold = 1e-4 ;
2024-03-19 13:13:40 +00:00
int tolerance = 0 ; // number of times the accuracy is lower than the convergence_threshold
2023-08-18 09:50:34 +00:00
// Step 0: Set the finish condition
2023-10-25 08:23:42 +00:00
// epsilon sub t > 0.5 => inverse the weights policy
2023-09-07 09:27:35 +00:00
// validation error is not decreasing
2024-03-20 10:30:02 +00:00
// run out of features
2024-03-05 10:05:11 +00:00
bool ascending = order_algorithm = = Orders . ASC ;
2024-02-26 16:07:57 +00:00
std : : mt19937 g { 173 } ;
2024-03-19 13:13:40 +00:00
while ( ! finished ) {
2023-08-18 09:50:34 +00:00
// Step 1: Build ranking with mutual information
2023-08-20 18:31:23 +00:00
auto featureSelection = metrics . SelectKBestWeighted ( weights_ , ascending , n ) ; // Get all the features sorted
2024-03-21 18:24:51 +00:00
VLOG_SCOPE_F ( 1 , " featureSelection.size: %zu featuresUsed.size: %zu " , featureSelection . size ( ) , featuresUsed . size ( ) ) ;
2024-03-05 10:05:11 +00:00
if ( order_algorithm = = Orders . RAND ) {
2024-02-26 16:07:57 +00:00
std : : shuffle ( featureSelection . begin ( ) , featureSelection . end ( ) , g ) ;
}
2024-03-19 08:42:03 +00:00
// Remove used features
featureSelection . erase ( remove_if ( begin ( featureSelection ) , end ( featureSelection ) , [ & ] ( auto x )
2024-03-20 10:30:02 +00:00
{ return std : : find ( begin ( featuresUsed ) , end ( featuresUsed ) , x ) ! = end ( featuresUsed ) ; } ) ,
2024-03-19 08:42:03 +00:00
end ( featureSelection )
) ;
2024-03-19 13:13:40 +00:00
int k = pow ( 2 , tolerance ) ;
2024-03-20 10:30:02 +00:00
int counter = 0 ; // The model counter of the current pack
2024-04-10 22:02:43 +00:00
VLOG_SCOPE_F ( 1 , " counter=%d k=%d featureSelection.size: %zu " , counter , k , featureSelection . size ( ) ) ;
2024-03-20 10:30:02 +00:00
while ( counter + + < k & & featureSelection . size ( ) > 0 ) {
2024-03-19 13:13:40 +00:00
auto feature = featureSelection [ 0 ] ;
featureSelection . erase ( featureSelection . begin ( ) ) ;
std : : unique_ptr < Classifier > model ;
model = std : : make_unique < SPODE > ( feature ) ;
model - > fit ( dataset , features , className , states , weights_ ) ;
2024-04-09 22:55:36 +00:00
alpha_t = 0.0 ;
if ( ! block_update ) {
auto ypred = model - > predict ( X_train ) ;
// Step 3.1: Compute the classifier amout of say
std : : tie ( weights_ , alpha_t , finished ) = update_weights ( y_train , ypred , weights_ ) ;
if ( finished ) {
VLOG_SCOPE_F ( 2 , " ** epsilon_t > 0.5 ** " ) ;
break ;
}
2024-03-19 13:13:40 +00:00
}
// Step 3.4: Store classifier and its accuracy to weigh its future vote
2024-03-20 10:30:02 +00:00
numItemsPack + + ;
2024-03-20 22:33:02 +00:00
featuresUsed . push_back ( feature ) ;
2024-03-19 13:13:40 +00:00
models . push_back ( std : : move ( model ) ) ;
significanceModels . push_back ( alpha_t ) ;
n_models + + ;
2024-03-21 18:24:51 +00:00
VLOG_SCOPE_F ( 2 , " numItemsPack: %d n_models: %d featuresUsed: %zu " , numItemsPack , n_models , featuresUsed . size ( ) ) ;
2024-02-20 09:11:22 +00:00
}
2024-04-09 22:55:36 +00:00
if ( block_update ) {
std : : tie ( weights_ , alpha_t , finished ) = update_weights_block ( k , y_train , weights_ ) ;
}
2024-03-20 10:30:02 +00:00
if ( convergence & & ! finished ) {
2023-09-10 17:50:36 +00:00
auto y_val_predict = predict ( X_test ) ;
double accuracy = ( y_val_predict = = y_test ) . sum ( ) . item < double > ( ) / ( double ) y_test . size ( 0 ) ;
if ( priorAccuracy = = 0 ) {
priorAccuracy = accuracy ;
2024-03-20 10:30:02 +00:00
VLOG_SCOPE_F ( 3 , " First accuracy: %f " , priorAccuracy ) ;
2023-09-10 17:50:36 +00:00
} else {
2024-03-20 22:33:02 +00:00
improvement = accuracy - priorAccuracy ;
2023-09-10 17:50:36 +00:00
}
2024-03-20 22:33:02 +00:00
if ( improvement < convergence_threshold ) {
VLOG_SCOPE_F ( 3 , " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f " , tolerance , numItemsPack , improvement , priorAccuracy , accuracy ) ;
2024-03-19 13:13:40 +00:00
tolerance + + ;
2024-03-11 20:30:01 +00:00
} else {
2024-03-20 22:33:02 +00:00
VLOG_SCOPE_F ( 3 , " *(improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f " , tolerance , numItemsPack , improvement , priorAccuracy , accuracy ) ;
2024-03-19 13:13:40 +00:00
tolerance = 0 ; // Reset the counter if the model performs better
2024-03-20 10:30:02 +00:00
numItemsPack = 0 ;
2023-09-10 17:50:36 +00:00
}
2024-03-19 13:13:40 +00:00
// Keep the best accuracy until now as the prior accuracy
2024-03-20 22:33:02 +00:00
priorAccuracy = std : : max ( accuracy , priorAccuracy ) ;
// priorAccuracy = accuracy;
2023-09-07 09:27:35 +00:00
}
2024-03-21 18:24:51 +00:00
VLOG_SCOPE_F ( 1 , " tolerance: %d featuresUsed.size: %zu features.size: %zu " , tolerance , featuresUsed . size ( ) , features . size ( ) ) ;
2024-03-20 10:30:02 +00:00
finished = finished | | tolerance > maxTolerance | | featuresUsed . size ( ) = = features . size ( ) ;
2023-08-20 18:31:23 +00:00
}
2024-03-20 10:30:02 +00:00
if ( tolerance > maxTolerance ) {
if ( numItemsPack < n_models ) {
notes . push_back ( " Convergence threshold reached & " + std : : to_string ( numItemsPack ) + " models eliminated " ) ;
VLOG_SCOPE_F ( 4 , " Convergence threshold reached & %d models eliminated of %d " , numItemsPack , n_models ) ;
for ( int i = 0 ; i < numItemsPack ; + + i ) {
significanceModels . pop_back ( ) ;
models . pop_back ( ) ;
n_models - - ;
}
} else {
VLOG_SCOPE_F ( 4 , " Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d " , n_models , numItemsPack ) ;
notes . push_back ( " Convergence threshold reached & 0 models eliminated " ) ;
2024-03-19 13:13:40 +00:00
}
2024-03-11 21:33:50 +00:00
}
2023-08-20 18:31:23 +00:00
if ( featuresUsed . size ( ) ! = features . size ( ) ) {
2024-02-08 17:01:09 +00:00
notes . push_back ( " Used features in train: " + std : : to_string ( featuresUsed . size ( ) ) + " of " + std : : to_string ( features . size ( ) ) ) ;
2023-09-05 11:39:43 +00:00
status = WARNING ;
2023-08-16 17:05:18 +00:00
}
2024-02-12 09:58:20 +00:00
notes . push_back ( " Number of models: " + std : : to_string ( n_models ) ) ;
2023-08-16 17:05:18 +00:00
}
2023-11-08 17:45:35 +00:00
std : : vector < std : : string > BoostAODE : : graph ( const std : : string & title ) const
2023-08-15 14:16:04 +00:00
{
return Ensemble : : graph ( title ) ;
}
}