Merge pull request 'block_update' (#26) from block_update into main
Reviewed-on: #26
This commit is contained in:
commit
7d906b24d1
32
.vscode/launch.json
vendored
32
.vscode/launch.json
vendored
@ -8,7 +8,7 @@
|
||||
"program": "${workspaceFolder}/build_release/sample/bayesnet_sample",
|
||||
"args": [
|
||||
"${workspaceFolder}/tests/data/glass.arff"
|
||||
],
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
@ -16,11 +16,33 @@
|
||||
"name": "test",
|
||||
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet",
|
||||
"args": [
|
||||
"[Network]"
|
||||
//"-c=\"Metrics Test\"",
|
||||
// "-s",
|
||||
"Block Update"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build_debug/tests",
|
||||
"cwd": "${workspaceFolder}/build_debug/tests"
|
||||
},
|
||||
{
|
||||
"name": "(gdb) Launch",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "enter program name, for example ${workspaceFolder}/a.out",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${fileDirname}",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"MIMode": "gdb",
|
||||
"setupCommands": [
|
||||
{
|
||||
"description": "Enable pretty-printing for gdb",
|
||||
"text": "-enable-pretty-printing",
|
||||
"ignoreFailures": true
|
||||
},
|
||||
{
|
||||
"description": "Set Disassembly Flavor to Intel",
|
||||
"text": "-gdb-set disassembly-flavor intel",
|
||||
"ignoreFailures": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
11
CHANGELOG.md
11
CHANGELOG.md
@ -10,18 +10,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
### Added
|
||||
|
||||
- Install command and instructions in README.md
|
||||
- Prefix to install command to install the package in the any location.
|
||||
- The 'block_update' hyperparameter to the BoostAODE class, to control the way weights/significances are updated. Default value is false.
|
||||
- Html report of coverage in the coverage folder. It is created with *make viewcoverage*
|
||||
- Badges of coverage and code quality (codacy) in README.md. Coverage badge is updated with *make viewcoverage*
|
||||
- Tests to reach 97% of coverage.
|
||||
- Copyright header to source files.
|
||||
|
||||
### Changed
|
||||
|
||||
- Sample app now is a separate target in the Makefile and shows how to use the library with a sample dataset
|
||||
- The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy.
|
||||
- Default hyperparameter values in BoostAODE: bisection is true, maxTolerance is 3, convergence is true
|
||||
|
||||
## [1.0.4] 2024-03-06
|
||||
|
||||
### Added
|
||||
|
||||
- Change _ascending_ hyperparameter to _order_ with these possible values _{"asc", "desc", "rand"}_, Default is _"desc"_.
|
||||
- Add the _predict_single_ hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true.
|
||||
- Change *ascending* hyperparameter to *order* with these possible values *{"asc", "desc", "rand"}*, Default is *"desc"*.
|
||||
- Add the *predict_single* hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true.
|
||||
- sample app to show how to use the library (make sample)
|
||||
|
||||
### Changed
|
||||
|
@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(BayesNet
|
||||
VERSION 1.0.4
|
||||
VERSION 1.0.4.1
|
||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||
LANGUAGES CXX
|
||||
|
3
Makefile
3
Makefile
@ -53,9 +53,10 @@ uninstall: ## Uninstall library
|
||||
xargs rm < $(f_release)/install_manifest.txt
|
||||
@echo ">>> Done";
|
||||
|
||||
prefix = "/usr/local"
|
||||
install: ## Install library
|
||||
@echo ">>> Installing BayesNet...";
|
||||
@cmake --install $(f_release)
|
||||
@cmake --install $(f_release) --prefix $(prefix)
|
||||
@echo ">>> Done";
|
||||
|
||||
debug: ## Build a debug version of the project
|
||||
|
@ -5,7 +5,7 @@
|
||||
![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000)
|
||||
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
||||
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
|
||||
![Static Badge](https://img.shields.io/badge/Coverage-95,8%25-green)
|
||||
![Static Badge](https://img.shields.io/badge/Coverage-97,2%25-green)
|
||||
|
||||
Bayesian Network Classifiers using libtorch from scratch
|
||||
|
||||
|
@ -1,5 +1,10 @@
|
||||
#ifndef BASE_H
|
||||
#define BASE_H
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
@ -37,5 +42,4 @@ namespace bayesnet {
|
||||
virtual void trainModel(const torch::Tensor& weights) = 0;
|
||||
std::vector<std::string> validHyperparameters;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
}
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <sstream>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "Classifier.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CLASSIFIER_H
|
||||
#define CLASSIFIER_H
|
||||
#include <torch/torch.h>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "KDB.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef KDB_H
|
||||
#define KDB_H
|
||||
#include <torch/torch.h>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "KDBLd.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef KDBLD_H
|
||||
#define KDBLD_H
|
||||
#include "Proposal.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.h>
|
||||
#include "Proposal.h"
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef PROPOSAL_H
|
||||
#define PROPOSAL_H
|
||||
#include <string>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "SPODE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef SPODE_H
|
||||
#define SPODE_H
|
||||
#include "Classifier.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "SPODELd.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef SPODELD_H
|
||||
#define SPODELD_H
|
||||
#include "SPODE.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "TAN.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TAN_H
|
||||
#define TAN_H
|
||||
#include "Classifier.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "TANLd.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TANLD_H
|
||||
#define TANLD_H
|
||||
#include "TAN.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "AODE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef AODE_H
|
||||
#define AODE_H
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "AODELd.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef AODELD_H
|
||||
#define AODELD_H
|
||||
#include "bayesnet/classifiers/Proposal.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <set>
|
||||
#include <functional>
|
||||
#include <limits.h>
|
||||
@ -16,7 +22,7 @@ namespace bayesnet {
|
||||
{
|
||||
validHyperparameters = {
|
||||
"maxModels", "bisection", "order", "convergence", "threshold",
|
||||
"select_features", "maxTolerance", "predict_voting"
|
||||
"select_features", "maxTolerance", "predict_voting", "block_update"
|
||||
};
|
||||
|
||||
}
|
||||
@ -94,6 +100,10 @@ namespace bayesnet {
|
||||
}
|
||||
hyperparameters.erase("select_features");
|
||||
}
|
||||
if (hyperparameters.contains("block_update")) {
|
||||
block_update = hyperparameters["block_update"];
|
||||
hyperparameters.erase("block_update");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
|
||||
@ -123,6 +133,103 @@ namespace bayesnet {
|
||||
}
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
|
||||
{
|
||||
/* Update Block algorithm
|
||||
k = # of models in block
|
||||
n_models = # of models in ensemble to make predictions
|
||||
n_models_bak = # models saved
|
||||
models = vector of models to make predictions
|
||||
models_bak = models not used to make predictions
|
||||
significances_bak = backup of significances vector
|
||||
|
||||
Case list
|
||||
A) k = 1, n_models = 1 => n = 0 , n_models = n + k
|
||||
B) k = 1, n_models = n + 1 => n_models = n + k
|
||||
C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
|
||||
D) k > 1, n_models = k => n = 0, n_models = n + k
|
||||
E) k > 1, n_models = k + n => n_models = n + k
|
||||
|
||||
A, D) n=0, k > 0, n_models == k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Don’t move any classifiers out of models
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Don’t restore any classifiers to models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
|
||||
B, C, E) n > 0, k > 0, n_models == n + k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Move first n classifiers to models_bak
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Insert classifiers in models_bak to be the first n models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
*/
|
||||
//
|
||||
// Make predict with only the last k models
|
||||
//
|
||||
std::unique_ptr<Classifier> model;
|
||||
std::vector<std::unique_ptr<Classifier>> models_bak;
|
||||
// 1. n_models_bak <- n_models 2. significances_bak <- significances
|
||||
auto significance_bak = significanceModels;
|
||||
auto n_models_bak = n_models;
|
||||
// 3. significances = vector(k, 1)
|
||||
significanceModels = std::vector<double>(k, 1.0);
|
||||
// 4. Move first n classifiers to models_bak
|
||||
// backup the first n_models - k models (if n_models == k, don't backup any)
|
||||
VLOG_SCOPE_F(1, "upd_weights_block n_models=%d k=%d", n_models, k);
|
||||
for (int i = 0; i < n_models - k; ++i) {
|
||||
model = std::move(models[0]);
|
||||
models.erase(models.begin());
|
||||
models_bak.push_back(std::move(model));
|
||||
}
|
||||
assert(models.size() == k);
|
||||
// 5. n_models <- k
|
||||
n_models = k;
|
||||
// 6. Make prediction, compute alpha, update weights
|
||||
auto ypred = predict(X_train);
|
||||
//
|
||||
// Update weights
|
||||
//
|
||||
double alpha_t;
|
||||
bool terminate;
|
||||
std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
|
||||
//
|
||||
// Restore the models if needed
|
||||
//
|
||||
// 7. Insert classifiers in models_bak to be the first n models
|
||||
// if n_models_bak == k, don't restore any, because none of them were moved
|
||||
if (k != n_models_bak) {
|
||||
// Insert in the same order as they were extracted
|
||||
int bak_size = models_bak.size();
|
||||
for (int i = 0; i < bak_size; ++i) {
|
||||
model = std::move(models_bak[bak_size - 1 - i]);
|
||||
models_bak.erase(models_bak.end() - 1);
|
||||
models.insert(models.begin(), std::move(model));
|
||||
}
|
||||
}
|
||||
// 8. significances <- significances_bak
|
||||
significanceModels = significance_bak;
|
||||
//
|
||||
// Update the significance of the last k models
|
||||
//
|
||||
// 9. Update last k significances
|
||||
for (int i = 0; i < k; ++i) {
|
||||
significanceModels[n_models_bak - k + i] = alpha_t;
|
||||
}
|
||||
// 10. n_models <- n_models_bak
|
||||
n_models = n_models_bak;
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::vector<int> BoostAODE::initializeModels()
|
||||
{
|
||||
std::vector<int> featuresUsed;
|
||||
@ -152,7 +259,7 @@ namespace bayesnet {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0);
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
n_models++;
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
@ -217,21 +324,22 @@ namespace bayesnet {
|
||||
);
|
||||
int k = pow(2, tolerance);
|
||||
int counter = 0; // The model counter of the current pack
|
||||
VLOG_SCOPE_F(1, "k=%d featureSelection.size: %zu", k, featureSelection.size());
|
||||
VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
while (counter++ < k && featureSelection.size() > 0) {
|
||||
VLOG_SCOPE_F(2, "counter: %d numItemsPack: %d", counter, numItemsPack);
|
||||
auto feature = featureSelection[0];
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
torch::Tensor ypred;
|
||||
ypred = model->predict(X_train);
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
if (finished) {
|
||||
VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **");
|
||||
break;
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
if (finished) {
|
||||
VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **");
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
numItemsPack++;
|
||||
@ -241,6 +349,9 @@ namespace bayesnet {
|
||||
n_models++;
|
||||
VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
|
||||
}
|
||||
if (block_update) {
|
||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||
}
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BOOSTAODE_H
|
||||
#define BOOSTAODE_H
|
||||
#include <map>
|
||||
@ -25,17 +31,19 @@ namespace bayesnet {
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
private:
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
|
||||
std::vector<int> initializeModels();
|
||||
torch::Tensor X_train, y_train, X_test, y_test;
|
||||
// Hyperparameters
|
||||
bool bisection = false; // if true, use bisection stratety to add k models at once to the ensemble
|
||||
int maxTolerance = 1;
|
||||
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
||||
int maxTolerance = 3;
|
||||
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
||||
bool convergence = false; //if true, stop when the model does not improve
|
||||
bool convergence = true; //if true, stop when the model does not improve
|
||||
bool selectFeatures = false; // if true, use feature selection
|
||||
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
||||
FeatureSelect* featureSelector = nullptr;
|
||||
double threshold = -1;
|
||||
bool block_update = false;
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "Ensemble.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef ENSEMBLE_H
|
||||
#define ENSEMBLE_H
|
||||
#include <torch/torch.h>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <limits>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "CFS.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CFS_H
|
||||
#define CFS_H
|
||||
#include <torch/torch.h>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "FCBF.h"
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef FCBF_H
|
||||
#define FCBF_H
|
||||
#include <torch/torch.h>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <limits>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "FeatureSelect.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef FEATURE_SELECT_H
|
||||
#define FEATURE_SELECT_H
|
||||
#include <torch/torch.h>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <limits>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "IWSS.h"
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef IWSS_H
|
||||
#define IWSS_H
|
||||
#include <vector>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef NETWORK_H
|
||||
#define NETWORK_H
|
||||
#include <map>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "Node.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef NODE_H
|
||||
#define NODE_H
|
||||
#include <unordered_set>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "Mst.h"
|
||||
#include "BayesMetrics.h"
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BAYESNET_METRICS_H
|
||||
#define BAYESNET_METRICS_H
|
||||
#include <vector>
|
||||
|
@ -1,3 +1,10 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include "Mst.h"
|
||||
@ -45,15 +52,6 @@ namespace bayesnet {
|
||||
}
|
||||
}
|
||||
}
|
||||
void Graph::display_mst()
|
||||
{
|
||||
std::cout << "Edge :" << " Weight" << std::endl;
|
||||
for (int i = 0; i < T.size(); i++) {
|
||||
std::cout << T[i].second.first << " - " << T[i].second.second << " : "
|
||||
<< T[i].first;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void insertElement(std::list<int>& variables, int variable)
|
||||
{
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef MST_H
|
||||
#define MST_H
|
||||
#include <vector>
|
||||
@ -5,29 +11,28 @@
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
class MST {
|
||||
private:
|
||||
torch::Tensor weights;
|
||||
std::vector<std::string> features;
|
||||
int root = 0;
|
||||
public:
|
||||
MST() = default;
|
||||
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree();
|
||||
private:
|
||||
torch::Tensor weights;
|
||||
std::vector<std::string> features;
|
||||
int root = 0;
|
||||
};
|
||||
class Graph {
|
||||
private:
|
||||
int V; // number of nodes in graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
|
||||
std::vector<int> parent;
|
||||
public:
|
||||
explicit Graph(int V);
|
||||
void addEdge(int u, int v, float wt);
|
||||
int find_set(int i);
|
||||
void union_set(int u, int v);
|
||||
void kruskal_algorithm();
|
||||
void display_mst();
|
||||
std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; }
|
||||
private:
|
||||
int V; // number of nodes in graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
|
||||
std::vector<int> parent;
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
|
||||
#include "bayesnetUtils.h"
|
||||
namespace bayesnet {
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BAYESNET_UTILS_H
|
||||
#define BAYESNET_UTILS_H
|
||||
#include <vector>
|
||||
|
@ -4,13 +4,15 @@
|
||||
|
||||
The hyperparameters defined in the algorithm are:
|
||||
|
||||
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *false*.
|
||||
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *true*.
|
||||
|
||||
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
|
||||
|
||||
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*.
|
||||
- ***block_update*** (*boolean*): Sets whether the algorithm will update the weights of the models in blocks. If set to false, the algorithm will update the weights of the models one by one. Default value: *false*.
|
||||
|
||||
- ***maxTolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *1*. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once.
|
||||
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *true*.
|
||||
|
||||
- ***maxTolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once. Default value: *3*
|
||||
|
||||
- ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same α<sub>t</sub>. Default value: *""*.
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.h>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <bayesnet/ensembles/BoostAODE.h>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <string>
|
||||
@ -54,6 +60,13 @@ TEST_CASE("Invalid feature name", "[Classifier]")
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), "feature [petallength] not found in states");
|
||||
}
|
||||
TEST_CASE("Invalid hyperparameter", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::KDB(2);
|
||||
auto raw = RawDatasets("iris", true);
|
||||
REQUIRE_THROWS_AS(model.setHyperparameters({ { "alpha", "0.0" } }), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.setHyperparameters({ { "alpha", "0.0" } }), "Invalid hyperparameters{\"alpha\":\"0.0\"}");
|
||||
}
|
||||
TEST_CASE("Topological order", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
@ -66,6 +79,14 @@ TEST_CASE("Topological order", "[Classifier]")
|
||||
REQUIRE(order[2] == "sepalwidth");
|
||||
REQUIRE(order[3] == "petalwidth");
|
||||
}
|
||||
TEST_CASE("Dump_cpt", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto cpt = model.dump_cpt();
|
||||
REQUIRE(cpt.size() == 1713);
|
||||
}
|
||||
TEST_CASE("Not fitted model", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
|
@ -1,8 +1,16 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "bayesnet/ensembles/AODE.h"
|
||||
#include "bayesnet/ensembles/AODELd.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
|
||||
@ -33,6 +41,11 @@ TEST_CASE("Show", "[Ensemble]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", false},
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
std::vector<std::string> expected = {
|
||||
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||
@ -68,6 +81,15 @@ TEST_CASE("Graph", "[Ensemble]")
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto graph = clf.graph();
|
||||
REQUIRE(graph.size() == 56);
|
||||
auto clf2 = bayesnet::AODE();
|
||||
clf2.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
graph = clf2.graph();
|
||||
REQUIRE(graph.size() == 56);
|
||||
raw = RawDatasets("glass", false);
|
||||
auto clf3 = bayesnet::AODELd();
|
||||
clf3.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
graph = clf3.graph();
|
||||
REQUIRE(graph.size() == 261);
|
||||
}
|
||||
TEST_CASE("Compute ArgMax", "[Ensemble]")
|
||||
{
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
@ -14,7 +20,7 @@
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
const std::string ACTUAL_VERSION = "1.0.4";
|
||||
const std::string ACTUAL_VERSION = "1.0.4.1";
|
||||
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
{
|
||||
@ -52,6 +58,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
auto score = clf->score(raw.Xt, raw.yt);
|
||||
INFO("Classifier: " + name + " File: " + file_name);
|
||||
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
||||
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
|
||||
}
|
||||
}
|
||||
SECTION("Library check version")
|
||||
@ -61,7 +68,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
}
|
||||
delete clf;
|
||||
}
|
||||
TEST_CASE("Models features", "[Models]")
|
||||
TEST_CASE("Models features & Graph", "[Models]")
|
||||
{
|
||||
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
|
||||
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
||||
@ -70,15 +77,30 @@ TEST_CASE("Models features", "[Models]")
|
||||
"sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n"
|
||||
}
|
||||
);
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::TAN();
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 19);
|
||||
REQUIRE(clf.getClassNumStates() == 3);
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
SECTION("Test TAN")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::TAN();
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 19);
|
||||
REQUIRE(clf.getClassNumStates() == 3);
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
}
|
||||
SECTION("Test TANLd")
|
||||
{
|
||||
auto clf = bayesnet::TANLd();
|
||||
auto raw = RawDatasets("iris", false);
|
||||
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 19);
|
||||
REQUIRE(clf.getClassNumStates() == 3);
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
}
|
||||
}
|
||||
TEST_CASE("Get num features & num edges", "[Models]")
|
||||
{
|
||||
@ -115,15 +137,15 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
{0.003135, 0.991799, 0.0050661}
|
||||
});
|
||||
auto res_prob_baode = std::vector<std::vector<double>>({
|
||||
{0.00803291, 0.9676, 0.0243672},
|
||||
{0.00398714, 0.945126, 0.050887},
|
||||
{0.00398714, 0.945126, 0.050887},
|
||||
{0.00398714, 0.945126, 0.050887},
|
||||
{0.00189227, 0.859575, 0.138533},
|
||||
{0.0118341, 0.442149, 0.546017},
|
||||
{0.0216135, 0.785781, 0.192605},
|
||||
{0.0204803, 0.844276, 0.135244},
|
||||
{0.00576313, 0.961665, 0.0325716},
|
||||
{0.0112349, 0.962274, 0.0264907},
|
||||
{0.00371025, 0.950592, 0.0456973},
|
||||
{0.00371025, 0.950592, 0.0456973},
|
||||
{0.00371025, 0.950592, 0.0456973},
|
||||
{0.00369275, 0.84967, 0.146637},
|
||||
{0.0252205, 0.113564, 0.861215},
|
||||
{0.0284828, 0.770524, 0.200993},
|
||||
{0.0213182, 0.857189, 0.121493},
|
||||
{0.00868436, 0.949494, 0.0418215}
|
||||
});
|
||||
auto res_prob_voting = std::vector<std::vector<double>>({
|
||||
{0, 1, 0},
|
||||
@ -131,8 +153,8 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 0.447909, 0.552091},
|
||||
{0, 0.811482, 0.188517},
|
||||
{0, 0, 1},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0}
|
||||
});
|
||||
@ -155,7 +177,7 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
REQUIRE(y_pred.size() == raw.yv.size());
|
||||
REQUIRE(y_pred_proba[0].size() == 3);
|
||||
REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size());
|
||||
for (int i = 0; i < y_pred_proba.size(); ++i) {
|
||||
for (int i = 0; i < 9; ++i) {
|
||||
auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end());
|
||||
int predictedClass = distance(y_pred_proba[i].begin(), maxElem);
|
||||
REQUIRE(predictedClass == y_pred[i]);
|
||||
@ -166,7 +188,7 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
}
|
||||
}
|
||||
// Check predict_proba values for vectors and tensors
|
||||
for (int i = 0; i < res_prob.size(); i++) {
|
||||
for (int i = 0; i < 9; i++) {
|
||||
REQUIRE(y_pred[i] == yt_pred[i].item<int>());
|
||||
for (int j = 0; j < 3; j++) {
|
||||
REQUIRE(res_prob[model][i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon));
|
||||
@ -222,6 +244,12 @@ TEST_CASE("KDB with hyperparameters", "[Models]")
|
||||
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Incorrect type of data for SPODELd", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::SPODELd(0);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest), std::runtime_error);
|
||||
}
|
||||
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
|
||||
{
|
||||
auto clf = bayesnet::AODE();
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
@ -27,7 +33,7 @@ TEST_CASE("Feature_select IWSS", "[BoostAODE]")
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS");
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("Feature_select FCBF", "[BoostAODE]")
|
||||
@ -76,8 +82,8 @@ TEST_CASE("Voting vs proba", "[BoostAODE]")
|
||||
auto pred_voting = clf.predict_proba(raw.Xv);
|
||||
REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
|
||||
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[83][2] == Catch::Approx(1.0).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.86121525).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.dump_cpt() == "");
|
||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
}
|
||||
@ -91,6 +97,9 @@ TEST_CASE("Order asc, desc & random", "[BoostAODE]")
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({
|
||||
{"order", order},
|
||||
{"bisection", false},
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
@ -136,6 +145,30 @@ TEST_CASE("Bisection", "[BoostAODE]")
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"block_update", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||
REQUIRE(clf.getNumberOfEdges() == 431);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Used features in train: 16 of 216");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
}
|
||||
|
||||
TEST_CASE("Block Update", "[BoostAODE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("mfeat-factors", true);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"block_update", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||
|
@ -1,6 +1,13 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include "bayesnet/utils/BayesMetrics.h"
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
@ -68,4 +75,15 @@ TEST_CASE("Features Selected", "[FeatureSelection]")
|
||||
delete featureSelector;
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_CASE("Oddities", "[FeatureSelection]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
// FCBF Limits
|
||||
REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
|
||||
}
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/config.h"
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TEST_UTILS_H
|
||||
#define TEST_UTILS_H
|
||||
#include <torch/torch.h>
|
||||
|
@ -1,5 +1,10 @@
|
||||
# ***************************************************************
|
||||
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
# SPDX-FileType: SOURCE
|
||||
# SPDX-License-Identifier: MIT
|
||||
# ***************************************************************
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
|
||||
readme_file = "README.md"
|
||||
|
Loading…
Reference in New Issue
Block a user