diff --git a/bayesnet/BaseClassifier.h b/bayesnet/BaseClassifier.h index 60a0c8e..5b063ed 100644 --- a/bayesnet/BaseClassifier.h +++ b/bayesnet/BaseClassifier.h @@ -14,13 +14,13 @@ namespace bayesnet { enum status_t { NORMAL, WARNING, ERROR }; class BaseClassifier { public: + virtual ~BaseClassifier() = default; // X is nxm std::vector, y is nx1 std::vector virtual BaseClassifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) = 0; // X is nxm tensor, y is nx1 tensor virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) = 0; virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const Smoothing_t smoothing) = 0; virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0; - virtual ~BaseClassifier() = default; torch::Tensor virtual predict(torch::Tensor& X) = 0; std::vector virtual predict(std::vector>& X) = 0; torch::Tensor virtual predict_proba(torch::Tensor& X) = 0; @@ -43,5 +43,7 @@ namespace bayesnet { protected: virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0; std::vector validHyperparameters; + std::vector notes; // Used to store messages occurred during the fit process + status_t status = NORMAL; }; } \ No newline at end of file diff --git a/bayesnet/CMakeLists.txt b/bayesnet/CMakeLists.txt index 73d0ef7..6815b70 100644 --- a/bayesnet/CMakeLists.txt +++ b/bayesnet/CMakeLists.txt @@ -1,4 +1,5 @@ include_directories( + ${BayesNet_SOURCE_DIR}/lib/log ${BayesNet_SOURCE_DIR}/lib/mdlp/src ${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/json/include diff --git a/bayesnet/classifiers/Classifier.h b/bayesnet/classifiers/Classifier.h index 4d3ea83..d475363 100644 --- a/bayesnet/classifiers/Classifier.h +++ b/bayesnet/classifiers/Classifier.h @@ -46,8 +46,6 @@ namespace bayesnet { std::string className; std::map> states; torch::Tensor dataset; // (n+1)xm tensor - status_t status = NORMAL; - std::vector notes; // Used to store messages occurred during the fit process void checkFitParameters(); virtual void buildModel(const torch::Tensor& weights) = 0; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override; diff --git a/bayesnet/ensembles/Boost.cc b/bayesnet/ensembles/Boost.cc index 7a3381e..114db0c 100644 --- a/bayesnet/ensembles/Boost.cc +++ b/bayesnet/ensembles/Boost.cc @@ -138,6 +138,7 @@ namespace bayesnet { auto mask_right = ypred == ytrain; auto masked_weights = weights * mask_wrong.to(weights.dtype()); double epsilon_t = masked_weights.sum().item(); + // std::cout << "epsilon_t: " << epsilon_t << " count wrong: " << mask_wrong.sum().item() << " count right: " << mask_right.sum().item() << std::endl; if (epsilon_t > 0.5) { // Inverse the weights policy (plot ln(wt)) // "In each round of AdaBoost, there is a sanity check to ensure that the current base diff --git a/bayesnet/ensembles/Boost.h b/bayesnet/ensembles/Boost.h index 82433e0..abb5ac2 100644 --- a/bayesnet/ensembles/Boost.h +++ b/bayesnet/ensembles/Boost.h @@ -27,7 +27,7 @@ namespace bayesnet { class Boost : public Ensemble { public: explicit Boost(bool predict_voting = false); - virtual ~Boost() = default; + virtual ~Boost() override = default; void setHyperparameters(const nlohmann::json& hyperparameters_) override; protected: std::vector featureSelection(torch::Tensor& weights_); @@ -38,11 +38,11 @@ namespace bayesnet { // Hyperparameters bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble int maxTolerance = 3; - std::string order_algorithm; // order to process the KBest features asc, desc, rand + std::string order_algorithm = Orders.DESC; // order to process the KBest features asc, desc, rand bool convergence = true; //if true, stop when the model does not improve bool convergence_best = false; // wether to keep the best accuracy to the moment or the last accuracy as prior accuracy bool selectFeatures = false; // if true, use feature selection - std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm + std::string select_features_algorithm; // Selected feature selection algorithm FeatureSelect* featureSelector = nullptr; double threshold = -1; bool block_update = false; // if true, use block update algorithm, only meaningful if bisection is true diff --git a/bayesnet/ensembles/BoostAODE.cc b/bayesnet/ensembles/BoostAODE.cc index b2ba9b6..2de0986 100644 --- a/bayesnet/ensembles/BoostAODE.cc +++ b/bayesnet/ensembles/BoostAODE.cc @@ -10,6 +10,8 @@ #include #include #include "BoostAODE.h" +#include +#include namespace bayesnet { @@ -35,9 +37,9 @@ namespace bayesnet { // // Logging setup // - // loguru::set_thread_name("BoostAODE"); - // loguru::g_stderr_verbosity = loguru::Verbosity_OFF; - // loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX); + loguru::set_thread_name("BoostAODE"); + loguru::g_stderr_verbosity = loguru::Verbosity_OFF; + loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX); // Algorithm based on the adaboost algorithm for classification // as explained in Ensemble methods (Zhi-Hua Zhou, 2012) @@ -46,14 +48,16 @@ namespace bayesnet { torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); bool finished = false; std::vector featuresUsed; + n_models = 0; if (selectFeatures) { featuresUsed = initializeModels(smoothing); auto ypred = predict(X_train); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); // Update significance of the models for (int i = 0; i < n_models; ++i) { - significanceModels[i] = alpha_t; + significanceModels.push_back(alpha_t); } + VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models); if (finished) { return; } @@ -83,7 +87,7 @@ namespace bayesnet { ); int k = bisection ? pow(2, tolerance) : 1; int counter = 0; // The model counter of the current pack - // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size()); + VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size()); while (counter++ < k && featureSelection.size() > 0) { auto feature = featureSelection[0]; featureSelection.erase(featureSelection.begin()); @@ -120,7 +124,7 @@ namespace bayesnet { models.push_back(std::move(model)); significanceModels.push_back(alpha_t); n_models++; - // VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size()); + VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size()); } if (block_update) { std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_); @@ -134,10 +138,10 @@ namespace bayesnet { improvement = accuracy - priorAccuracy; } if (improvement < convergence_threshold) { - // VLOG_SCOPE_F(3, " (improvement=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy); + VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy); tolerance = 0; // Reset the counter if the model performs better numItemsPack = 0; } @@ -149,13 +153,13 @@ namespace bayesnet { priorAccuracy = accuracy; } } - // VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size()); + VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size()); finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size(); } if (tolerance > maxTolerance) { if (numItemsPack < n_models) { notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated"); - // VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models); + VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models); for (int i = 0; i < numItemsPack; ++i) { significanceModels.pop_back(); models.pop_back(); @@ -163,7 +167,7 @@ namespace bayesnet { } } else { notes.push_back("Convergence threshold reached & 0 models eliminated"); - // VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack); + VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack); } } if (featuresUsed.size() != features.size()) { diff --git a/bayesnet/network/Network.h b/bayesnet/network/Network.h index 0210877..efee01e 100644 --- a/bayesnet/network/Network.h +++ b/bayesnet/network/Network.h @@ -10,14 +10,10 @@ #include #include "bayesnet/config.h" #include "Node.h" +#include "Smoothing.h" namespace bayesnet { - enum class Smoothing_t { - NONE = -1, - ORIGINAL = 0, - LAPLACE, - CESTNIK - }; + class Network { public: Network(); diff --git a/bayesnet/network/Smoothing.h b/bayesnet/network/Smoothing.h new file mode 100644 index 0000000..021f298 --- /dev/null +++ b/bayesnet/network/Smoothing.h @@ -0,0 +1,15 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#ifndef SMOOTHING_H +#define SMOOTHING_H +enum class Smoothing_t { + NONE = -1, + ORIGINAL = 0, + LAPLACE, + CESTNIK +}; +#endif // SMOOTHING_H \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7868f5f..11f11f0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,6 +3,7 @@ if(ENABLE_TESTING) ${BayesNet_SOURCE_DIR}/tests/lib/Files ${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/mdlp/src + ${BayesNet_SOURCE_DIR}/lib/log ${BayesNet_SOURCE_DIR}/lib/json/include ${BayesNet_SOURCE_DIR} ${CMAKE_BINARY_DIR}/configured_files/include