diff --git a/.vscode/launch.json b/.vscode/launch.json index c14865d..300bfba 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,7 +8,7 @@ "program": "${workspaceFolder}/build_release/sample/bayesnet_sample", "args": [ "${workspaceFolder}/tests/data/glass.arff" - ], + ] }, { "type": "lldb", @@ -16,11 +16,33 @@ "name": "test", "program": "${workspaceFolder}/build_debug/tests/TestBayesNet", "args": [ - "[Network]" - //"-c=\"Metrics Test\"", - // "-s", + "Block Update" ], - "cwd": "${workspaceFolder}/build_debug/tests", + "cwd": "${workspaceFolder}/build_debug/tests" + }, + { + "name": "(gdb) Launch", + "type": "cppdbg", + "request": "launch", + "program": "enter program name, for example ${workspaceFolder}/a.out", + "args": [], + "stopAtEntry": false, + "cwd": "${fileDirname}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + }, + { + "description": "Set Disassembly Flavor to Intel", + "text": "-gdb-set disassembly-flavor intel", + "ignoreFailures": true + } + ] } ] } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index e258e89..13e831e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,18 +10,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Install command and instructions in README.md +- Prefix to install command to install the package in the any location. +- The 'block_update' hyperparameter to the BoostAODE class, to control the way weights/significances are updated. Default value is false. +- Html report of coverage in the coverage folder. It is created with *make viewcoverage* +- Badges of coverage and code quality (codacy) in README.md. Coverage badge is updated with *make viewcoverage* +- Tests to reach 97% of coverage. +- Copyright header to source files. ### Changed - Sample app now is a separate target in the Makefile and shows how to use the library with a sample dataset - The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy. +- Default hyperparameter values in BoostAODE: bisection is true, maxTolerance is 3, convergence is true ## [1.0.4] 2024-03-06 ### Added -- Change _ascending_ hyperparameter to _order_ with these possible values _{"asc", "desc", "rand"}_, Default is _"desc"_. -- Add the _predict_single_ hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true. +- Change *ascending* hyperparameter to *order* with these possible values *{"asc", "desc", "rand"}*, Default is *"desc"*. +- Add the *predict_single* hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true. - sample app to show how to use the library (make sample) ### Changed diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b9d120..45c0972 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) project(BayesNet - VERSION 1.0.4 + VERSION 1.0.4.1 DESCRIPTION "Bayesian Network and basic classifiers Library." HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" LANGUAGES CXX diff --git a/Makefile b/Makefile index c43c01e..58707a5 100644 --- a/Makefile +++ b/Makefile @@ -53,9 +53,10 @@ uninstall: ## Uninstall library xargs rm < $(f_release)/install_manifest.txt @echo ">>> Done"; +prefix = "/usr/local" install: ## Install library @echo ">>> Installing BayesNet..."; - @cmake --install $(f_release) + @cmake --install $(f_release) --prefix $(prefix) @echo ">>> Done"; debug: ## Build a debug version of the project diff --git a/README.md b/README.md index a33a6e6..3db71e2 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) ![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) -![Static Badge](https://img.shields.io/badge/Coverage-95,8%25-green) +![Static Badge](https://img.shields.io/badge/Coverage-97,2%25-green) Bayesian Network Classifiers using libtorch from scratch diff --git a/bayesnet/BaseClassifier.h b/bayesnet/BaseClassifier.h index c237349..f8b4c84 100644 --- a/bayesnet/BaseClassifier.h +++ b/bayesnet/BaseClassifier.h @@ -1,5 +1,10 @@ -#ifndef BASE_H -#define BASE_H +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#pragma once #include #include #include @@ -37,5 +42,4 @@ namespace bayesnet { virtual void trainModel(const torch::Tensor& weights) = 0; std::vector validHyperparameters; }; -} -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/bayesnet/classifiers/Classifier.cc b/bayesnet/classifiers/Classifier.cc index a8bf6ef..8d7ba15 100644 --- a/bayesnet/classifiers/Classifier.cc +++ b/bayesnet/classifiers/Classifier.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include "bayesnet/utils/bayesnetUtils.h" #include "Classifier.h" diff --git a/bayesnet/classifiers/Classifier.h b/bayesnet/classifiers/Classifier.h index 2511c4d..0349bcd 100644 --- a/bayesnet/classifiers/Classifier.h +++ b/bayesnet/classifiers/Classifier.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef CLASSIFIER_H #define CLASSIFIER_H #include diff --git a/bayesnet/classifiers/KDB.cc b/bayesnet/classifiers/KDB.cc index 6c4bb99..e9582c8 100644 --- a/bayesnet/classifiers/KDB.cc +++ b/bayesnet/classifiers/KDB.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "KDB.h" namespace bayesnet { diff --git a/bayesnet/classifiers/KDB.h b/bayesnet/classifiers/KDB.h index 17c2a1f..a8d6cab 100644 --- a/bayesnet/classifiers/KDB.h +++ b/bayesnet/classifiers/KDB.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef KDB_H #define KDB_H #include diff --git a/bayesnet/classifiers/KDBLd.cc b/bayesnet/classifiers/KDBLd.cc index 8f17901..9f1647c 100644 --- a/bayesnet/classifiers/KDBLd.cc +++ b/bayesnet/classifiers/KDBLd.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "KDBLd.h" namespace bayesnet { diff --git a/bayesnet/classifiers/KDBLd.h b/bayesnet/classifiers/KDBLd.h index 3597576..9150bba 100644 --- a/bayesnet/classifiers/KDBLd.h +++ b/bayesnet/classifiers/KDBLd.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef KDBLD_H #define KDBLD_H #include "Proposal.h" diff --git a/bayesnet/classifiers/Proposal.cc b/bayesnet/classifiers/Proposal.cc index aef27bf..3ee9cda 100644 --- a/bayesnet/classifiers/Proposal.cc +++ b/bayesnet/classifiers/Proposal.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include "Proposal.h" diff --git a/bayesnet/classifiers/Proposal.h b/bayesnet/classifiers/Proposal.h index 59e8451..6e7c351 100644 --- a/bayesnet/classifiers/Proposal.h +++ b/bayesnet/classifiers/Proposal.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef PROPOSAL_H #define PROPOSAL_H #include diff --git a/bayesnet/classifiers/SPODE.cc b/bayesnet/classifiers/SPODE.cc index 038c87e..7736e7e 100644 --- a/bayesnet/classifiers/SPODE.cc +++ b/bayesnet/classifiers/SPODE.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "SPODE.h" namespace bayesnet { diff --git a/bayesnet/classifiers/SPODE.h b/bayesnet/classifiers/SPODE.h index 96b7834..7ecff63 100644 --- a/bayesnet/classifiers/SPODE.h +++ b/bayesnet/classifiers/SPODE.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef SPODE_H #define SPODE_H #include "Classifier.h" diff --git a/bayesnet/classifiers/SPODELd.cc b/bayesnet/classifiers/SPODELd.cc index d41471d..98c41ff 100644 --- a/bayesnet/classifiers/SPODELd.cc +++ b/bayesnet/classifiers/SPODELd.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "SPODELd.h" namespace bayesnet { diff --git a/bayesnet/classifiers/SPODELd.h b/bayesnet/classifiers/SPODELd.h index 001dc73..f24a030 100644 --- a/bayesnet/classifiers/SPODELd.h +++ b/bayesnet/classifiers/SPODELd.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef SPODELD_H #define SPODELD_H #include "SPODE.h" diff --git a/bayesnet/classifiers/TAN.cc b/bayesnet/classifiers/TAN.cc index 39f071e..d2be0c7 100644 --- a/bayesnet/classifiers/TAN.cc +++ b/bayesnet/classifiers/TAN.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "TAN.h" namespace bayesnet { diff --git a/bayesnet/classifiers/TAN.h b/bayesnet/classifiers/TAN.h index ecb803d..00d50f9 100644 --- a/bayesnet/classifiers/TAN.h +++ b/bayesnet/classifiers/TAN.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef TAN_H #define TAN_H #include "Classifier.h" diff --git a/bayesnet/classifiers/TANLd.cc b/bayesnet/classifiers/TANLd.cc index b7f563c..ab86dc4 100644 --- a/bayesnet/classifiers/TANLd.cc +++ b/bayesnet/classifiers/TANLd.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "TANLd.h" namespace bayesnet { diff --git a/bayesnet/classifiers/TANLd.h b/bayesnet/classifiers/TANLd.h index 88b1d65..e6c3c75 100644 --- a/bayesnet/classifiers/TANLd.h +++ b/bayesnet/classifiers/TANLd.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef TANLD_H #define TANLD_H #include "TAN.h" diff --git a/bayesnet/ensembles/AODE.cc b/bayesnet/ensembles/AODE.cc index 22b17b8..3a9ed61 100644 --- a/bayesnet/ensembles/AODE.cc +++ b/bayesnet/ensembles/AODE.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "AODE.h" namespace bayesnet { diff --git a/bayesnet/ensembles/AODE.h b/bayesnet/ensembles/AODE.h index ba7aa99..e74bddb 100644 --- a/bayesnet/ensembles/AODE.h +++ b/bayesnet/ensembles/AODE.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef AODE_H #define AODE_H #include "bayesnet/classifiers/SPODE.h" diff --git a/bayesnet/ensembles/AODELd.cc b/bayesnet/ensembles/AODELd.cc index 8fdc033..28fc793 100644 --- a/bayesnet/ensembles/AODELd.cc +++ b/bayesnet/ensembles/AODELd.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "AODELd.h" namespace bayesnet { diff --git a/bayesnet/ensembles/AODELd.h b/bayesnet/ensembles/AODELd.h index f257945..9c87090 100644 --- a/bayesnet/ensembles/AODELd.h +++ b/bayesnet/ensembles/AODELd.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef AODELD_H #define AODELD_H #include "bayesnet/classifiers/Proposal.h" diff --git a/bayesnet/ensembles/BoostAODE.cc b/bayesnet/ensembles/BoostAODE.cc index 9e4a856..f714985 100644 --- a/bayesnet/ensembles/BoostAODE.cc +++ b/bayesnet/ensembles/BoostAODE.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include @@ -16,7 +22,7 @@ namespace bayesnet { { validHyperparameters = { "maxModels", "bisection", "order", "convergence", "threshold", - "select_features", "maxTolerance", "predict_voting" + "select_features", "maxTolerance", "predict_voting", "block_update" }; } @@ -94,6 +100,10 @@ namespace bayesnet { } hyperparameters.erase("select_features"); } + if (hyperparameters.contains("block_update")) { + block_update = hyperparameters["block_update"]; + hyperparameters.erase("block_update"); + } Classifier::setHyperparameters(hyperparameters); } std::tuple update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights) @@ -123,6 +133,103 @@ namespace bayesnet { } return { weights, alpha_t, terminate }; } + std::tuple BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights) + { + /* Update Block algorithm + k = # of models in block + n_models = # of models in ensemble to make predictions + n_models_bak = # models saved + models = vector of models to make predictions + models_bak = models not used to make predictions + significances_bak = backup of significances vector + + Case list + A) k = 1, n_models = 1 => n = 0 , n_models = n + k + B) k = 1, n_models = n + 1 => n_models = n + k + C) k > 1, n_models = k + 1 => n= 1, n_models = n + k + D) k > 1, n_models = k => n = 0, n_models = n + k + E) k > 1, n_models = k + n => n_models = n + k + + A, D) n=0, k > 0, n_models == k + 1. n_models_bak <- n_models + 2. significances_bak <- significances + 3. significances = vector(k, 1) + 4. Don’t move any classifiers out of models + 5. n_models <- k + 6. Make prediction, compute alpha, update weights + 7. Don’t restore any classifiers to models + 8. significances <- significances_bak + 9. Update last k significances + 10. n_models <- n_models_bak + + B, C, E) n > 0, k > 0, n_models == n + k + 1. n_models_bak <- n_models + 2. significances_bak <- significances + 3. significances = vector(k, 1) + 4. Move first n classifiers to models_bak + 5. n_models <- k + 6. Make prediction, compute alpha, update weights + 7. Insert classifiers in models_bak to be the first n models + 8. significances <- significances_bak + 9. Update last k significances + 10. n_models <- n_models_bak + */ + // + // Make predict with only the last k models + // + std::unique_ptr model; + std::vector> models_bak; + // 1. n_models_bak <- n_models 2. significances_bak <- significances + auto significance_bak = significanceModels; + auto n_models_bak = n_models; + // 3. significances = vector(k, 1) + significanceModels = std::vector(k, 1.0); + // 4. Move first n classifiers to models_bak + // backup the first n_models - k models (if n_models == k, don't backup any) + VLOG_SCOPE_F(1, "upd_weights_block n_models=%d k=%d", n_models, k); + for (int i = 0; i < n_models - k; ++i) { + model = std::move(models[0]); + models.erase(models.begin()); + models_bak.push_back(std::move(model)); + } + assert(models.size() == k); + // 5. n_models <- k + n_models = k; + // 6. Make prediction, compute alpha, update weights + auto ypred = predict(X_train); + // + // Update weights + // + double alpha_t; + bool terminate; + std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights); + // + // Restore the models if needed + // + // 7. Insert classifiers in models_bak to be the first n models + // if n_models_bak == k, don't restore any, because none of them were moved + if (k != n_models_bak) { + // Insert in the same order as they were extracted + int bak_size = models_bak.size(); + for (int i = 0; i < bak_size; ++i) { + model = std::move(models_bak[bak_size - 1 - i]); + models_bak.erase(models_bak.end() - 1); + models.insert(models.begin(), std::move(model)); + } + } + // 8. significances <- significances_bak + significanceModels = significance_bak; + // + // Update the significance of the last k models + // + // 9. Update last k significances + for (int i = 0; i < k; ++i) { + significanceModels[n_models_bak - k + i] = alpha_t; + } + // 10. n_models <- n_models_bak + n_models = n_models_bak; + return { weights, alpha_t, terminate }; + } std::vector BoostAODE::initializeModels() { std::vector featuresUsed; @@ -152,7 +259,7 @@ namespace bayesnet { std::unique_ptr model = std::make_unique(feature); model->fit(dataset, features, className, states, weights_); models.push_back(std::move(model)); - significanceModels.push_back(1.0); + significanceModels.push_back(1.0); // They will be updated later in trainModel n_models++; } notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); @@ -217,21 +324,22 @@ namespace bayesnet { ); int k = pow(2, tolerance); int counter = 0; // The model counter of the current pack - VLOG_SCOPE_F(1, "k=%d featureSelection.size: %zu", k, featureSelection.size()); + VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size()); while (counter++ < k && featureSelection.size() > 0) { - VLOG_SCOPE_F(2, "counter: %d numItemsPack: %d", counter, numItemsPack); auto feature = featureSelection[0]; featureSelection.erase(featureSelection.begin()); std::unique_ptr model; model = std::make_unique(feature); model->fit(dataset, features, className, states, weights_); - torch::Tensor ypred; - ypred = model->predict(X_train); - // Step 3.1: Compute the classifier amout of say - std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); - if (finished) { - VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **"); - break; + alpha_t = 0.0; + if (!block_update) { + auto ypred = model->predict(X_train); + // Step 3.1: Compute the classifier amout of say + std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); + if (finished) { + VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **"); + break; + } } // Step 3.4: Store classifier and its accuracy to weigh its future vote numItemsPack++; @@ -241,6 +349,9 @@ namespace bayesnet { n_models++; VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size()); } + if (block_update) { + std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_); + } if (convergence && !finished) { auto y_val_predict = predict(X_test); double accuracy = (y_val_predict == y_test).sum().item() / (double)y_test.size(0); diff --git a/bayesnet/ensembles/BoostAODE.h b/bayesnet/ensembles/BoostAODE.h index f4091df..7150db9 100644 --- a/bayesnet/ensembles/BoostAODE.h +++ b/bayesnet/ensembles/BoostAODE.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef BOOSTAODE_H #define BOOSTAODE_H #include @@ -25,17 +31,19 @@ namespace bayesnet { void buildModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override; private: + std::tuple update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights); std::vector initializeModels(); torch::Tensor X_train, y_train, X_test, y_test; // Hyperparameters - bool bisection = false; // if true, use bisection stratety to add k models at once to the ensemble - int maxTolerance = 1; + bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble + int maxTolerance = 3; std::string order_algorithm; // order to process the KBest features asc, desc, rand - bool convergence = false; //if true, stop when the model does not improve + bool convergence = true; //if true, stop when the model does not improve bool selectFeatures = false; // if true, use feature selection std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm FeatureSelect* featureSelector = nullptr; double threshold = -1; + bool block_update = false; }; } #endif \ No newline at end of file diff --git a/bayesnet/ensembles/Ensemble.cc b/bayesnet/ensembles/Ensemble.cc index b0b8d9c..fa2c271 100644 --- a/bayesnet/ensembles/Ensemble.cc +++ b/bayesnet/ensembles/Ensemble.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "Ensemble.h" namespace bayesnet { diff --git a/bayesnet/ensembles/Ensemble.h b/bayesnet/ensembles/Ensemble.h index bab4d25..2c072a8 100644 --- a/bayesnet/ensembles/Ensemble.h +++ b/bayesnet/ensembles/Ensemble.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef ENSEMBLE_H #define ENSEMBLE_H #include diff --git a/bayesnet/feature_selection/CFS.cc b/bayesnet/feature_selection/CFS.cc index d7b55ca..ea83f1b 100644 --- a/bayesnet/feature_selection/CFS.cc +++ b/bayesnet/feature_selection/CFS.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include "bayesnet/utils/bayesnetUtils.h" #include "CFS.h" diff --git a/bayesnet/feature_selection/CFS.h b/bayesnet/feature_selection/CFS.h index 0d44e2b..fe94162 100644 --- a/bayesnet/feature_selection/CFS.h +++ b/bayesnet/feature_selection/CFS.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef CFS_H #define CFS_H #include diff --git a/bayesnet/feature_selection/FCBF.cc b/bayesnet/feature_selection/FCBF.cc index ebf6d7a..e79664f 100644 --- a/bayesnet/feature_selection/FCBF.cc +++ b/bayesnet/feature_selection/FCBF.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "bayesnet/utils/bayesnetUtils.h" #include "FCBF.h" namespace bayesnet { diff --git a/bayesnet/feature_selection/FCBF.h b/bayesnet/feature_selection/FCBF.h index 8ec3a8f..a71cc8c 100644 --- a/bayesnet/feature_selection/FCBF.h +++ b/bayesnet/feature_selection/FCBF.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef FCBF_H #define FCBF_H #include diff --git a/bayesnet/feature_selection/FeatureSelect.cc b/bayesnet/feature_selection/FeatureSelect.cc index 54dcfb0..8e70591 100644 --- a/bayesnet/feature_selection/FeatureSelect.cc +++ b/bayesnet/feature_selection/FeatureSelect.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include "bayesnet/utils/bayesnetUtils.h" #include "FeatureSelect.h" diff --git a/bayesnet/feature_selection/FeatureSelect.h b/bayesnet/feature_selection/FeatureSelect.h index fbd3abc..170cb4e 100644 --- a/bayesnet/feature_selection/FeatureSelect.h +++ b/bayesnet/feature_selection/FeatureSelect.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef FEATURE_SELECT_H #define FEATURE_SELECT_H #include diff --git a/bayesnet/feature_selection/IWSS.cc b/bayesnet/feature_selection/IWSS.cc index 8ed3d76..8b09166 100644 --- a/bayesnet/feature_selection/IWSS.cc +++ b/bayesnet/feature_selection/IWSS.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include "bayesnet/utils/bayesnetUtils.h" #include "IWSS.h" diff --git a/bayesnet/feature_selection/IWSS.h b/bayesnet/feature_selection/IWSS.h index a990e60..268b353 100644 --- a/bayesnet/feature_selection/IWSS.h +++ b/bayesnet/feature_selection/IWSS.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef IWSS_H #define IWSS_H #include diff --git a/bayesnet/network/Network.cc b/bayesnet/network/Network.cc index 77d4d49..04fa71b 100644 --- a/bayesnet/network/Network.cc +++ b/bayesnet/network/Network.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index 79187ab..a87d5e1 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef NETWORK_H #define NETWORK_H #include diff --git a/bayesnet/network/Node.cc b/bayesnet/network/Node.cc index 28408d4..69fda6a 100644 --- a/bayesnet/network/Node.cc +++ b/bayesnet/network/Node.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "Node.h" namespace bayesnet { diff --git a/bayesnet/network/Node.h b/bayesnet/network/Node.h index 81f6f73..7b883d4 100644 --- a/bayesnet/network/Node.h +++ b/bayesnet/network/Node.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef NODE_H #define NODE_H #include diff --git a/bayesnet/utils/BayesMetrics.cc b/bayesnet/utils/BayesMetrics.cc index 6501e4f..5e041a7 100644 --- a/bayesnet/utils/BayesMetrics.cc +++ b/bayesnet/utils/BayesMetrics.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "Mst.h" #include "BayesMetrics.h" namespace bayesnet { diff --git a/bayesnet/utils/BayesMetrics.h b/bayesnet/utils/BayesMetrics.h index d6abf47..aa8b0d5 100644 --- a/bayesnet/utils/BayesMetrics.h +++ b/bayesnet/utils/BayesMetrics.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef BAYESNET_METRICS_H #define BAYESNET_METRICS_H #include diff --git a/bayesnet/utils/Mst.cc b/bayesnet/utils/Mst.cc index b0897b2..d1efd1b 100644 --- a/bayesnet/utils/Mst.cc +++ b/bayesnet/utils/Mst.cc @@ -1,3 +1,10 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#include #include #include #include "Mst.h" @@ -45,15 +52,6 @@ namespace bayesnet { } } } - void Graph::display_mst() - { - std::cout << "Edge :" << " Weight" << std::endl; - for (int i = 0; i < T.size(); i++) { - std::cout << T[i].second.first << " - " << T[i].second.second << " : " - << T[i].first; - std::cout << std::endl; - } - } void insertElement(std::list& variables, int variable) { diff --git a/bayesnet/utils/Mst.h b/bayesnet/utils/Mst.h index 9fa951b..5a2b8f7 100644 --- a/bayesnet/utils/Mst.h +++ b/bayesnet/utils/Mst.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef MST_H #define MST_H #include @@ -5,29 +11,28 @@ #include namespace bayesnet { class MST { - private: - torch::Tensor weights; - std::vector features; - int root = 0; public: MST() = default; MST(const std::vector& features, const torch::Tensor& weights, const int root); std::vector> maximumSpanningTree(); + private: + torch::Tensor weights; + std::vector features; + int root = 0; }; class Graph { - private: - int V; // number of nodes in graph - std::vector >> G; // std::vector for graph - std::vector >> T; // std::vector for mst - std::vector parent; public: explicit Graph(int V); void addEdge(int u, int v, float wt); int find_set(int i); void union_set(int u, int v); void kruskal_algorithm(); - void display_mst(); std::vector >> get_mst() { return T; } + private: + int V; // number of nodes in graph + std::vector >> G; // std::vector for graph + std::vector >> T; // std::vector for mst + std::vector parent; }; } #endif \ No newline at end of file diff --git a/bayesnet/utils/bayesnetUtils.cc b/bayesnet/utils/bayesnetUtils.cc index 5082a5f..bf60416 100644 --- a/bayesnet/utils/bayesnetUtils.cc +++ b/bayesnet/utils/bayesnetUtils.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "bayesnetUtils.h" namespace bayesnet { diff --git a/bayesnet/utils/bayesnetUtils.h b/bayesnet/utils/bayesnetUtils.h index b75e3f4..1049968 100644 --- a/bayesnet/utils/bayesnetUtils.h +++ b/bayesnet/utils/bayesnetUtils.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef BAYESNET_UTILS_H #define BAYESNET_UTILS_H #include diff --git a/docs/BoostAODE.md b/docs/BoostAODE.md index 2ed66bf..57916c4 100644 --- a/docs/BoostAODE.md +++ b/docs/BoostAODE.md @@ -4,13 +4,15 @@ The hyperparameters defined in the algorithm are: -- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *false*. +- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *true*. - ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*. -- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*. +- ***block_update*** (*boolean*): Sets whether the algorithm will update the weights of the models in blocks. If set to false, the algorithm will update the weights of the models one by one. Default value: *false*. -- ***maxTolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *1*. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once. +- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *true*. + +- ***maxTolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once. Default value: *3* - ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same αt. Default value: *""*. diff --git a/sample/lib/Files/ArffFiles.cc b/sample/lib/Files/ArffFiles.cc index 99f29bd..d333d1e 100644 --- a/sample/lib/Files/ArffFiles.cc +++ b/sample/lib/Files/ArffFiles.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "ArffFiles.h" #include #include diff --git a/sample/lib/Files/ArffFiles.h b/sample/lib/Files/ArffFiles.h index 25e5a8c..12206c5 100644 --- a/sample/lib/Files/ArffFiles.h +++ b/sample/lib/Files/ArffFiles.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef ARFFFILES_H #define ARFFFILES_H diff --git a/sample/lib/mdlp/CPPFImdlp.h b/sample/lib/mdlp/CPPFImdlp.h index 1fb0cab..4e69fe1 100644 --- a/sample/lib/mdlp/CPPFImdlp.h +++ b/sample/lib/mdlp/CPPFImdlp.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef CPPFIMDLP_H #define CPPFIMDLP_H diff --git a/sample/lib/mdlp/Metrics.h b/sample/lib/mdlp/Metrics.h index 4f8151a..d97a77f 100644 --- a/sample/lib/mdlp/Metrics.h +++ b/sample/lib/mdlp/Metrics.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef CCMETRICS_H #define CCMETRICS_H diff --git a/sample/lib/mdlp/typesFImdlp.h b/sample/lib/mdlp/typesFImdlp.h index b28b2ca..b577bca 100644 --- a/sample/lib/mdlp/typesFImdlp.h +++ b/sample/lib/mdlp/typesFImdlp.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef TYPES_H #define TYPES_H diff --git a/sample/sample.cc b/sample/sample.cc index a6352e5..e58dab7 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include diff --git a/tests/TestBayesClassifier.cc b/tests/TestBayesClassifier.cc index 31fa660..9b2bb85 100644 --- a/tests/TestBayesClassifier.cc +++ b/tests/TestBayesClassifier.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include @@ -54,6 +60,13 @@ TEST_CASE("Invalid feature name", "[Classifier]") REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), std::invalid_argument); REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), "feature [petallength] not found in states"); } +TEST_CASE("Invalid hyperparameter", "[Classifier]") +{ + auto model = bayesnet::KDB(2); + auto raw = RawDatasets("iris", true); + REQUIRE_THROWS_AS(model.setHyperparameters({ { "alpha", "0.0" } }), std::invalid_argument); + REQUIRE_THROWS_WITH(model.setHyperparameters({ { "alpha", "0.0" } }), "Invalid hyperparameters{\"alpha\":\"0.0\"}"); +} TEST_CASE("Topological order", "[Classifier]") { auto model = bayesnet::TAN(); @@ -66,6 +79,14 @@ TEST_CASE("Topological order", "[Classifier]") REQUIRE(order[2] == "sepalwidth"); REQUIRE(order[3] == "petalwidth"); } +TEST_CASE("Dump_cpt", "[Classifier]") +{ + auto model = bayesnet::TAN(); + auto raw = RawDatasets("iris", true); + model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto cpt = model.dump_cpt(); + REQUIRE(cpt.size() == 1713); +} TEST_CASE("Not fitted model", "[Classifier]") { auto model = bayesnet::TAN(); diff --git a/tests/TestBayesEnsemble.cc b/tests/TestBayesEnsemble.cc index d009d11..9957ec3 100644 --- a/tests/TestBayesEnsemble.cc +++ b/tests/TestBayesEnsemble.cc @@ -1,8 +1,16 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include #include #include "bayesnet/ensembles/BoostAODE.h" +#include "bayesnet/ensembles/AODE.h" +#include "bayesnet/ensembles/AODELd.h" #include "TestUtils.h" @@ -33,6 +41,11 @@ TEST_CASE("Show", "[Ensemble]") { auto clf = bayesnet::BoostAODE(); auto raw = RawDatasets("iris", true); + clf.setHyperparameters({ + {"bisection", false}, + {"maxTolerance", 1}, + {"convergence", false}, + }); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); std::vector expected = { "class -> sepallength, sepalwidth, petallength, petalwidth, ", @@ -68,6 +81,15 @@ TEST_CASE("Graph", "[Ensemble]") clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); auto graph = clf.graph(); REQUIRE(graph.size() == 56); + auto clf2 = bayesnet::AODE(); + clf2.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + graph = clf2.graph(); + REQUIRE(graph.size() == 56); + raw = RawDatasets("glass", false); + auto clf3 = bayesnet::AODELd(); + clf3.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + graph = clf3.graph(); + REQUIRE(graph.size() == 261); } TEST_CASE("Compute ArgMax", "[Ensemble]") { diff --git a/tests/TestBayesMetrics.cc b/tests/TestBayesMetrics.cc index 6d3f5ed..c06ed03 100644 --- a/tests/TestBayesMetrics.cc +++ b/tests/TestBayesMetrics.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 55a932f..54a9ab3 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include @@ -14,7 +20,7 @@ #include "bayesnet/ensembles/BoostAODE.h" #include "TestUtils.h" -const std::string ACTUAL_VERSION = "1.0.4"; +const std::string ACTUAL_VERSION = "1.0.4.1"; TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") { @@ -52,6 +58,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") auto score = clf->score(raw.Xt, raw.yt); INFO("Classifier: " + name + " File: " + file_name); REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon)); + REQUIRE(clf->getStatus() == bayesnet::NORMAL); } } SECTION("Library check version") @@ -61,7 +68,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]") } delete clf; } -TEST_CASE("Models features", "[Models]") +TEST_CASE("Models features & Graph", "[Models]") { auto graph = std::vector({ "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n", "class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n", @@ -70,15 +77,30 @@ TEST_CASE("Models features", "[Models]") "sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n" } ); - auto raw = RawDatasets("iris", true); - auto clf = bayesnet::TAN(); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - REQUIRE(clf.getNumberOfNodes() == 5); - REQUIRE(clf.getNumberOfEdges() == 7); - REQUIRE(clf.getNumberOfStates() == 19); - REQUIRE(clf.getClassNumStates() == 3); - REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); - REQUIRE(clf.graph("Test") == graph); + SECTION("Test TAN") + { + auto raw = RawDatasets("iris", true); + auto clf = bayesnet::TAN(); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfNodes() == 5); + REQUIRE(clf.getNumberOfEdges() == 7); + REQUIRE(clf.getNumberOfStates() == 19); + REQUIRE(clf.getClassNumStates() == 3); + REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); + REQUIRE(clf.graph("Test") == graph); + } + SECTION("Test TANLd") + { + auto clf = bayesnet::TANLd(); + auto raw = RawDatasets("iris", false); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + REQUIRE(clf.getNumberOfNodes() == 5); + REQUIRE(clf.getNumberOfEdges() == 7); + REQUIRE(clf.getNumberOfStates() == 19); + REQUIRE(clf.getClassNumStates() == 3); + REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); + REQUIRE(clf.graph("Test") == graph); + } } TEST_CASE("Get num features & num edges", "[Models]") { @@ -115,15 +137,15 @@ TEST_CASE("Model predict_proba", "[Models]") {0.003135, 0.991799, 0.0050661} }); auto res_prob_baode = std::vector>({ - {0.00803291, 0.9676, 0.0243672}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00189227, 0.859575, 0.138533}, - {0.0118341, 0.442149, 0.546017}, - {0.0216135, 0.785781, 0.192605}, - {0.0204803, 0.844276, 0.135244}, - {0.00576313, 0.961665, 0.0325716}, + {0.0112349, 0.962274, 0.0264907}, + {0.00371025, 0.950592, 0.0456973}, + {0.00371025, 0.950592, 0.0456973}, + {0.00371025, 0.950592, 0.0456973}, + {0.00369275, 0.84967, 0.146637}, + {0.0252205, 0.113564, 0.861215}, + {0.0284828, 0.770524, 0.200993}, + {0.0213182, 0.857189, 0.121493}, + {0.00868436, 0.949494, 0.0418215} }); auto res_prob_voting = std::vector>({ {0, 1, 0}, @@ -131,8 +153,8 @@ TEST_CASE("Model predict_proba", "[Models]") {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, - {0, 0.447909, 0.552091}, - {0, 0.811482, 0.188517}, + {0, 0, 1}, + {0, 1, 0}, {0, 1, 0}, {0, 1, 0} }); @@ -155,7 +177,7 @@ TEST_CASE("Model predict_proba", "[Models]") REQUIRE(y_pred.size() == raw.yv.size()); REQUIRE(y_pred_proba[0].size() == 3); REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); - for (int i = 0; i < y_pred_proba.size(); ++i) { + for (int i = 0; i < 9; ++i) { auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); int predictedClass = distance(y_pred_proba[i].begin(), maxElem); REQUIRE(predictedClass == y_pred[i]); @@ -166,7 +188,7 @@ TEST_CASE("Model predict_proba", "[Models]") } } // Check predict_proba values for vectors and tensors - for (int i = 0; i < res_prob.size(); i++) { + for (int i = 0; i < 9; i++) { REQUIRE(y_pred[i] == yt_pred[i].item()); for (int j = 0; j < 3; j++) { REQUIRE(res_prob[model][i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); @@ -222,6 +244,12 @@ TEST_CASE("KDB with hyperparameters", "[Models]") REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon)); REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon)); } +TEST_CASE("Incorrect type of data for SPODELd", "[Models]") +{ + auto raw = RawDatasets("iris", true); + auto clf = bayesnet::SPODELd(0); + REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest), std::runtime_error); +} TEST_CASE("Predict, predict_proba & score without fitting", "[Models]") { auto clf = bayesnet::AODE(); diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index b6e8c19..f829af9 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do #include #include diff --git a/tests/TestBayesNode.cc b/tests/TestBayesNode.cc index bc2b977..4ace8fc 100644 --- a/tests/TestBayesNode.cc +++ b/tests/TestBayesNode.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include diff --git a/tests/TestBoostAODE.cc b/tests/TestBoostAODE.cc index 58c32cd..19dee45 100644 --- a/tests/TestBoostAODE.cc +++ b/tests/TestBoostAODE.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include @@ -27,7 +33,7 @@ TEST_CASE("Feature_select IWSS", "[BoostAODE]") REQUIRE(clf.getNumberOfNodes() == 90); REQUIRE(clf.getNumberOfEdges() == 153); REQUIRE(clf.getNotes().size() == 2); - REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS"); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); } TEST_CASE("Feature_select FCBF", "[BoostAODE]") @@ -76,8 +82,8 @@ TEST_CASE("Voting vs proba", "[BoostAODE]") auto pred_voting = clf.predict_proba(raw.Xv); REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon)); REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon)); - REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon)); - REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon)); + REQUIRE(pred_voting[83][2] == Catch::Approx(1.0).epsilon(raw.epsilon)); + REQUIRE(pred_proba[83][2] == Catch::Approx(0.86121525).epsilon(raw.epsilon)); REQUIRE(clf.dump_cpt() == ""); REQUIRE(clf.topological_order() == std::vector()); } @@ -91,6 +97,9 @@ TEST_CASE("Order asc, desc & random", "[BoostAODE]") auto clf = bayesnet::BoostAODE(); clf.setHyperparameters({ {"order", order}, + {"bisection", false}, + {"maxTolerance", 1}, + {"convergence", false}, }); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); auto score = clf.score(raw.Xv, raw.yv); @@ -136,6 +145,30 @@ TEST_CASE("Bisection", "[BoostAODE]") {"bisection", true}, {"maxTolerance", 3}, {"convergence", true}, + {"block_update", false}, + }); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfNodes() == 217); + REQUIRE(clf.getNumberOfEdges() == 431); + REQUIRE(clf.getNotes().size() == 3); + REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated"); + REQUIRE(clf.getNotes()[1] == "Used features in train: 16 of 216"); + REQUIRE(clf.getNotes()[2] == "Number of models: 1"); + auto score = clf.score(raw.Xv, raw.yv); + auto scoret = clf.score(raw.Xt, raw.yt); + REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon)); +} + +TEST_CASE("Block Update", "[BoostAODE]") +{ + auto clf = bayesnet::BoostAODE(); + auto raw = RawDatasets("mfeat-factors", true); + clf.setHyperparameters({ + {"bisection", true}, + {"block_update", true}, + {"maxTolerance", 3}, + {"convergence", true}, }); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); REQUIRE(clf.getNumberOfNodes() == 217); diff --git a/tests/TestFeatureSelection.cc b/tests/TestFeatureSelection.cc index 2feb723..e20299b 100644 --- a/tests/TestFeatureSelection.cc +++ b/tests/TestFeatureSelection.cc @@ -1,6 +1,13 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include #include #include +#include #include "bayesnet/utils/BayesMetrics.h" #include "bayesnet/feature_selection/CFS.h" #include "bayesnet/feature_selection/FCBF.h" @@ -68,4 +75,15 @@ TEST_CASE("Features Selected", "[FeatureSelection]") delete featureSelector; } } +} +TEST_CASE("Oddities", "[FeatureSelection]") +{ + auto raw = RawDatasets("iris", true); + // FCBF Limits + REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument); + REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7"); + REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument); + REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]"); + REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument); + REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]"); } \ No newline at end of file diff --git a/tests/TestUtils.cc b/tests/TestUtils.cc index 64f5769..82fb073 100644 --- a/tests/TestUtils.cc +++ b/tests/TestUtils.cc @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #include "TestUtils.h" #include "bayesnet/config.h" diff --git a/tests/TestUtils.h b/tests/TestUtils.h index 474b3cf..f77684f 100644 --- a/tests/TestUtils.h +++ b/tests/TestUtils.h @@ -1,3 +1,9 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + #ifndef TEST_UTILS_H #define TEST_UTILS_H #include diff --git a/update_coverage.py b/update_coverage.py index dd426c1..da99463 100644 --- a/update_coverage.py +++ b/update_coverage.py @@ -1,5 +1,10 @@ +# *************************************************************** +# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +# SPDX-FileType: SOURCE +# SPDX-License-Identifier: MIT +# *************************************************************** + import subprocess -import os import sys readme_file = "README.md"