Refactor Smoothing type to its own file
Add log to boost
This commit is contained in:
@@ -14,13 +14,13 @@ namespace bayesnet {
|
|||||||
enum status_t { NORMAL, WARNING, ERROR };
|
enum status_t { NORMAL, WARNING, ERROR };
|
||||||
class BaseClassifier {
|
class BaseClassifier {
|
||||||
public:
|
public:
|
||||||
|
virtual ~BaseClassifier() = default;
|
||||||
// X is nxm std::vector, y is nx1 std::vector
|
// X is nxm std::vector, y is nx1 std::vector
|
||||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||||
// X is nxm tensor, y is nx1 tensor
|
// X is nxm tensor, y is nx1 tensor
|
||||||
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||||
virtual ~BaseClassifier() = default;
|
|
||||||
torch::Tensor virtual predict(torch::Tensor& X) = 0;
|
torch::Tensor virtual predict(torch::Tensor& X) = 0;
|
||||||
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
|
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
|
||||||
torch::Tensor virtual predict_proba(torch::Tensor& X) = 0;
|
torch::Tensor virtual predict_proba(torch::Tensor& X) = 0;
|
||||||
@@ -43,5 +43,7 @@ namespace bayesnet {
|
|||||||
protected:
|
protected:
|
||||||
virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||||
std::vector<std::string> validHyperparameters;
|
std::vector<std::string> validHyperparameters;
|
||||||
|
std::vector<std::string> notes; // Used to store messages occurred during the fit process
|
||||||
|
status_t status = NORMAL;
|
||||||
};
|
};
|
||||||
}
|
}
|
@@ -1,4 +1,5 @@
|
|||||||
include_directories(
|
include_directories(
|
||||||
|
${BayesNet_SOURCE_DIR}/lib/log
|
||||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||||
${BayesNet_SOURCE_DIR}/lib/folding
|
${BayesNet_SOURCE_DIR}/lib/folding
|
||||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||||
|
@@ -46,8 +46,6 @@ namespace bayesnet {
|
|||||||
std::string className;
|
std::string className;
|
||||||
std::map<std::string, std::vector<int>> states;
|
std::map<std::string, std::vector<int>> states;
|
||||||
torch::Tensor dataset; // (n+1)xm tensor
|
torch::Tensor dataset; // (n+1)xm tensor
|
||||||
status_t status = NORMAL;
|
|
||||||
std::vector<std::string> notes; // Used to store messages occurred during the fit process
|
|
||||||
void checkFitParameters();
|
void checkFitParameters();
|
||||||
virtual void buildModel(const torch::Tensor& weights) = 0;
|
virtual void buildModel(const torch::Tensor& weights) = 0;
|
||||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||||
|
@@ -138,6 +138,7 @@ namespace bayesnet {
|
|||||||
auto mask_right = ypred == ytrain;
|
auto mask_right = ypred == ytrain;
|
||||||
auto masked_weights = weights * mask_wrong.to(weights.dtype());
|
auto masked_weights = weights * mask_wrong.to(weights.dtype());
|
||||||
double epsilon_t = masked_weights.sum().item<double>();
|
double epsilon_t = masked_weights.sum().item<double>();
|
||||||
|
// std::cout << "epsilon_t: " << epsilon_t << " count wrong: " << mask_wrong.sum().item<int>() << " count right: " << mask_right.sum().item<int>() << std::endl;
|
||||||
if (epsilon_t > 0.5) {
|
if (epsilon_t > 0.5) {
|
||||||
// Inverse the weights policy (plot ln(wt))
|
// Inverse the weights policy (plot ln(wt))
|
||||||
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
|
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
|
||||||
|
@@ -27,7 +27,7 @@ namespace bayesnet {
|
|||||||
class Boost : public Ensemble {
|
class Boost : public Ensemble {
|
||||||
public:
|
public:
|
||||||
explicit Boost(bool predict_voting = false);
|
explicit Boost(bool predict_voting = false);
|
||||||
virtual ~Boost() = default;
|
virtual ~Boost() override = default;
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||||
protected:
|
protected:
|
||||||
std::vector<int> featureSelection(torch::Tensor& weights_);
|
std::vector<int> featureSelection(torch::Tensor& weights_);
|
||||||
@@ -38,11 +38,11 @@ namespace bayesnet {
|
|||||||
// Hyperparameters
|
// Hyperparameters
|
||||||
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
||||||
int maxTolerance = 3;
|
int maxTolerance = 3;
|
||||||
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
std::string order_algorithm = Orders.DESC; // order to process the KBest features asc, desc, rand
|
||||||
bool convergence = true; //if true, stop when the model does not improve
|
bool convergence = true; //if true, stop when the model does not improve
|
||||||
bool convergence_best = false; // wether to keep the best accuracy to the moment or the last accuracy as prior accuracy
|
bool convergence_best = false; // wether to keep the best accuracy to the moment or the last accuracy as prior accuracy
|
||||||
bool selectFeatures = false; // if true, use feature selection
|
bool selectFeatures = false; // if true, use feature selection
|
||||||
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
std::string select_features_algorithm; // Selected feature selection algorithm
|
||||||
FeatureSelect* featureSelector = nullptr;
|
FeatureSelect* featureSelector = nullptr;
|
||||||
double threshold = -1;
|
double threshold = -1;
|
||||||
bool block_update = false; // if true, use block update algorithm, only meaningful if bisection is true
|
bool block_update = false; // if true, use block update algorithm, only meaningful if bisection is true
|
||||||
|
@@ -10,6 +10,8 @@
|
|||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include "BoostAODE.h"
|
#include "BoostAODE.h"
|
||||||
|
#include <loguru.hpp>
|
||||||
|
#include <loguru.cpp>
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
|
||||||
@@ -35,9 +37,9 @@ namespace bayesnet {
|
|||||||
//
|
//
|
||||||
// Logging setup
|
// Logging setup
|
||||||
//
|
//
|
||||||
// loguru::set_thread_name("BoostAODE");
|
loguru::set_thread_name("BoostAODE");
|
||||||
// loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
|
loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
|
||||||
// loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
||||||
|
|
||||||
// Algorithm based on the adaboost algorithm for classification
|
// Algorithm based on the adaboost algorithm for classification
|
||||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||||
@@ -46,14 +48,16 @@ namespace bayesnet {
|
|||||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||||
bool finished = false;
|
bool finished = false;
|
||||||
std::vector<int> featuresUsed;
|
std::vector<int> featuresUsed;
|
||||||
|
n_models = 0;
|
||||||
if (selectFeatures) {
|
if (selectFeatures) {
|
||||||
featuresUsed = initializeModels(smoothing);
|
featuresUsed = initializeModels(smoothing);
|
||||||
auto ypred = predict(X_train);
|
auto ypred = predict(X_train);
|
||||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||||
// Update significance of the models
|
// Update significance of the models
|
||||||
for (int i = 0; i < n_models; ++i) {
|
for (int i = 0; i < n_models; ++i) {
|
||||||
significanceModels[i] = alpha_t;
|
significanceModels.push_back(alpha_t);
|
||||||
}
|
}
|
||||||
|
VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models);
|
||||||
if (finished) {
|
if (finished) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -83,7 +87,7 @@ namespace bayesnet {
|
|||||||
);
|
);
|
||||||
int k = bisection ? pow(2, tolerance) : 1;
|
int k = bisection ? pow(2, tolerance) : 1;
|
||||||
int counter = 0; // The model counter of the current pack
|
int counter = 0; // The model counter of the current pack
|
||||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||||
while (counter++ < k && featureSelection.size() > 0) {
|
while (counter++ < k && featureSelection.size() > 0) {
|
||||||
auto feature = featureSelection[0];
|
auto feature = featureSelection[0];
|
||||||
featureSelection.erase(featureSelection.begin());
|
featureSelection.erase(featureSelection.begin());
|
||||||
@@ -120,7 +124,7 @@ namespace bayesnet {
|
|||||||
models.push_back(std::move(model));
|
models.push_back(std::move(model));
|
||||||
significanceModels.push_back(alpha_t);
|
significanceModels.push_back(alpha_t);
|
||||||
n_models++;
|
n_models++;
|
||||||
// VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
|
VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size());
|
||||||
}
|
}
|
||||||
if (block_update) {
|
if (block_update) {
|
||||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||||
@@ -134,10 +138,10 @@ namespace bayesnet {
|
|||||||
improvement = accuracy - priorAccuracy;
|
improvement = accuracy - priorAccuracy;
|
||||||
}
|
}
|
||||||
if (improvement < convergence_threshold) {
|
if (improvement < convergence_threshold) {
|
||||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||||
tolerance++;
|
tolerance++;
|
||||||
} else {
|
} else {
|
||||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||||
tolerance = 0; // Reset the counter if the model performs better
|
tolerance = 0; // Reset the counter if the model performs better
|
||||||
numItemsPack = 0;
|
numItemsPack = 0;
|
||||||
}
|
}
|
||||||
@@ -149,13 +153,13 @@ namespace bayesnet {
|
|||||||
priorAccuracy = accuracy;
|
priorAccuracy = accuracy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
||||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||||
}
|
}
|
||||||
if (tolerance > maxTolerance) {
|
if (tolerance > maxTolerance) {
|
||||||
if (numItemsPack < n_models) {
|
if (numItemsPack < n_models) {
|
||||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||||
for (int i = 0; i < numItemsPack; ++i) {
|
for (int i = 0; i < numItemsPack; ++i) {
|
||||||
significanceModels.pop_back();
|
significanceModels.pop_back();
|
||||||
models.pop_back();
|
models.pop_back();
|
||||||
@@ -163,7 +167,7 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (featuresUsed.size() != features.size()) {
|
if (featuresUsed.size() != features.size()) {
|
||||||
|
@@ -10,14 +10,10 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "bayesnet/config.h"
|
#include "bayesnet/config.h"
|
||||||
#include "Node.h"
|
#include "Node.h"
|
||||||
|
#include "Smoothing.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
enum class Smoothing_t {
|
|
||||||
NONE = -1,
|
|
||||||
ORIGINAL = 0,
|
|
||||||
LAPLACE,
|
|
||||||
CESTNIK
|
|
||||||
};
|
|
||||||
class Network {
|
class Network {
|
||||||
public:
|
public:
|
||||||
Network();
|
Network();
|
||||||
|
15
bayesnet/network/Smoothing.h
Normal file
15
bayesnet/network/Smoothing.h
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#ifndef SMOOTHING_H
|
||||||
|
#define SMOOTHING_H
|
||||||
|
enum class Smoothing_t {
|
||||||
|
NONE = -1,
|
||||||
|
ORIGINAL = 0,
|
||||||
|
LAPLACE,
|
||||||
|
CESTNIK
|
||||||
|
};
|
||||||
|
#endif // SMOOTHING_H
|
@@ -3,6 +3,7 @@ if(ENABLE_TESTING)
|
|||||||
${BayesNet_SOURCE_DIR}/tests/lib/Files
|
${BayesNet_SOURCE_DIR}/tests/lib/Files
|
||||||
${BayesNet_SOURCE_DIR}/lib/folding
|
${BayesNet_SOURCE_DIR}/lib/folding
|
||||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||||
|
${BayesNet_SOURCE_DIR}/lib/log
|
||||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||||
${BayesNet_SOURCE_DIR}
|
${BayesNet_SOURCE_DIR}
|
||||||
${CMAKE_BINARY_DIR}/configured_files/include
|
${CMAKE_BINARY_DIR}/configured_files/include
|
||||||
|
Reference in New Issue
Block a user