Reformat source
This commit is contained in:
@@ -4,25 +4,26 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <random>
|
||||
#include <set>
|
||||
#include <limits.h>
|
||||
#include <tuple>
|
||||
#include "BoostAODE.h"
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
#include <loguru.hpp>
|
||||
#include <limits.h>
|
||||
#include <loguru.cpp>
|
||||
#include <loguru.hpp>
|
||||
#include <random>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
|
||||
namespace bayesnet {
|
||||
namespace bayesnet
|
||||
{
|
||||
|
||||
BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting)
|
||||
{
|
||||
}
|
||||
std::vector<int> BoostAODE::initializeModels(const Smoothing_t smoothing)
|
||||
{
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
for (const int& feature : featuresSelected) {
|
||||
for (const int &feature : featuresSelected) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
models.push_back(std::move(model));
|
||||
@@ -32,7 +33,7 @@ namespace bayesnet {
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
void BoostAODE::trainModel(const torch::Tensor &weights, const Smoothing_t smoothing)
|
||||
{
|
||||
//
|
||||
// Logging setup
|
||||
@@ -45,7 +46,7 @@ namespace bayesnet {
|
||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||
fitted = true;
|
||||
double alpha_t = 0;
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
n_models = 0;
|
||||
@@ -73,7 +74,7 @@ namespace bayesnet {
|
||||
// validation error is not decreasing
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == Orders.ASC;
|
||||
std::mt19937 g{ 173 };
|
||||
std::mt19937 g{173};
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||
@@ -81,10 +82,8 @@ namespace bayesnet {
|
||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||
}
|
||||
// Remove used features
|
||||
featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x)
|
||||
{ return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
|
||||
end(featureSelection)
|
||||
);
|
||||
featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x) { return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed); }),
|
||||
end(featureSelection));
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
@@ -176,7 +175,7 @@ namespace bayesnet {
|
||||
}
|
||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||
}
|
||||
std::vector<std::string> BoostAODE::graph(const std::string& title) const
|
||||
std::vector<std::string> BoostAODE::graph(const std::string &title) const
|
||||
{
|
||||
return Ensemble::graph(title);
|
||||
}
|
||||
|
@@ -17,8 +17,7 @@ namespace bayesnet
|
||||
{
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
for (const int &feature : featuresSelected)
|
||||
{
|
||||
for (const int &feature : featuresSelected) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
add_model(std::move(model), 1.0);
|
||||
@@ -31,8 +30,7 @@ namespace bayesnet
|
||||
{
|
||||
X_train_ = TensorUtils::to_matrix(X_train);
|
||||
y_train_ = TensorUtils::to_vector<int>(y_train);
|
||||
if (convergence)
|
||||
{
|
||||
if (convergence) {
|
||||
X_test_ = TensorUtils::to_matrix(X_test);
|
||||
y_test_ = TensorUtils::to_vector<int>(y_test);
|
||||
}
|
||||
@@ -42,25 +40,21 @@ namespace bayesnet
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
n_models = 0;
|
||||
if (selectFeatures)
|
||||
{
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
auto ypred = predict(X_train_);
|
||||
auto ypred_t = torch::tensor(ypred);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
|
||||
// Update significance of the models
|
||||
for (const int &feature : featuresUsed)
|
||||
{
|
||||
for (const int &feature : featuresUsed) {
|
||||
significanceModels.pop_back();
|
||||
}
|
||||
for (const int &feature : featuresUsed)
|
||||
{
|
||||
for (const int &feature : featuresUsed) {
|
||||
significanceModels.push_back(alpha_t);
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t,
|
||||
// n_models);
|
||||
if (finished)
|
||||
{
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -76,18 +70,15 @@ namespace bayesnet
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == bayesnet::Orders.ASC;
|
||||
std::mt19937 g{173};
|
||||
while (!finished)
|
||||
{
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||
if (order_algorithm == bayesnet::Orders.RAND)
|
||||
{
|
||||
if (order_algorithm == bayesnet::Orders.RAND) {
|
||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||
}
|
||||
// Remove used features
|
||||
featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(),
|
||||
[&](auto x)
|
||||
{
|
||||
[&](auto x) {
|
||||
return std::find(featuresUsed.begin(), featuresUsed.end(), x) !=
|
||||
featuresUsed.end();
|
||||
}),
|
||||
@@ -96,8 +87,7 @@ namespace bayesnet
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k,
|
||||
// featureSelection.size());
|
||||
while (counter++ < k && featureSelection.size() > 0)
|
||||
{
|
||||
while (counter++ < k && featureSelection.size() > 0) {
|
||||
auto feature = featureSelection[0];
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
@@ -110,8 +100,7 @@ namespace bayesnet
|
||||
* std::endl;*/
|
||||
// DEBUG
|
||||
std::vector<int> ypred;
|
||||
if (alpha_block)
|
||||
{
|
||||
if (alpha_block) {
|
||||
//
|
||||
// Compute the prediction with the current ensemble + model
|
||||
//
|
||||
@@ -122,9 +111,7 @@ namespace bayesnet
|
||||
model = std::move(models.back());
|
||||
// Remove the model from the ensemble
|
||||
remove_last_model();
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
ypred = model->predict(X_train_);
|
||||
}
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
@@ -138,40 +125,30 @@ namespace bayesnet
|
||||
// featuresUsed: %zu", finished, numItemsPack, n_models,
|
||||
// featuresUsed.size());
|
||||
} // End of the pack
|
||||
if (convergence && !finished)
|
||||
{
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0)
|
||||
{
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (improvement < convergence_threshold)
|
||||
{
|
||||
if (improvement < convergence_threshold) {
|
||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d
|
||||
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
|
||||
// numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d
|
||||
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
|
||||
// numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
if (convergence_best)
|
||||
{
|
||||
if (convergence_best) {
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
// Keep the last accuray obtained as the prior accuracy
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
@@ -180,29 +157,23 @@ namespace bayesnet
|
||||
// %zu", tolerance, featuresUsed.size(), features.size());
|
||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||
}
|
||||
if (tolerance > maxTolerance)
|
||||
{
|
||||
if (numItemsPack < n_models)
|
||||
{
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated
|
||||
// of %d", numItemsPack, n_models);
|
||||
for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i)
|
||||
{
|
||||
for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) {
|
||||
remove_last_model();
|
||||
}
|
||||
// VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features
|
||||
// used.", n_models, featuresUsed.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated
|
||||
// n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (featuresUsed.size() != features.size())
|
||||
{
|
||||
if (featuresUsed.size() != features.size()) {
|
||||
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " +
|
||||
std::to_string(features.size()));
|
||||
status = bayesnet::WARNING;
|
||||
|
Reference in New Issue
Block a user