bisection proposal #24
38
.vscode/c_cpp_properties.json
vendored
38
.vscode/c_cpp_properties.json
vendored
@ -3,15 +3,47 @@
|
||||
{
|
||||
"name": "Mac",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**"
|
||||
"/Users/rmontanana/Code/BayesNet/**"
|
||||
],
|
||||
"defines": [],
|
||||
"macFrameworkPath": [
|
||||
"/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks"
|
||||
"/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include"
|
||||
],
|
||||
"cStandard": "c17",
|
||||
"cppStandard": "c++17",
|
||||
"compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json"
|
||||
"compileCommands": "",
|
||||
"intelliSenseMode": "macos-clang-arm64",
|
||||
"mergeConfigurations": false,
|
||||
"browse": {
|
||||
"path": [
|
||||
"/Users/rmontanana/Code/BayesNet/**",
|
||||
"${workspaceFolder}"
|
||||
],
|
||||
"limitSymbolsToIncludedHeaders": true
|
||||
},
|
||||
"configurationProvider": "ms-vscode.cmake-tools"
|
||||
},
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"/home/rmontanana/Code/BayesNet/**",
|
||||
"/home/rmontanana/Code/libtorch/include/torch/csrc/api/include/",
|
||||
"/home/rmontanana/Code/BayesNet/lib/"
|
||||
],
|
||||
"defines": [],
|
||||
"cStandard": "c17",
|
||||
"cppStandard": "c++17",
|
||||
"intelliSenseMode": "linux-gcc-x64",
|
||||
"mergeConfigurations": false,
|
||||
"compilerPath": "/usr/bin/g++",
|
||||
"browse": {
|
||||
"path": [
|
||||
"/home/rmontanana/Code/BayesNet/**",
|
||||
"${workspaceFolder}"
|
||||
],
|
||||
"limitSymbolsToIncludedHeaders": true
|
||||
},
|
||||
"configurationProvider": "ms-vscode.cmake-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
|
3
.vscode/launch.json
vendored
3
.vscode/launch.json
vendored
@ -14,8 +14,9 @@
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "test",
|
||||
"program": "${workspaceFolder}/build_debug/tests/unit_tests_bayesnet",
|
||||
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet",
|
||||
"args": [
|
||||
"[Network]"
|
||||
//"-c=\"Metrics Test\"",
|
||||
// "-s",
|
||||
],
|
||||
|
@ -25,6 +25,8 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||
# Options
|
||||
# -------
|
||||
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
||||
@ -48,7 +50,6 @@ if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE("Code coverage enabled")
|
||||
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
|
||||
|
28
Makefile
28
Makefile
@ -1,11 +1,11 @@
|
||||
SHELL := /bin/bash
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: coverage setup help buildr buildd test clean debug release sample
|
||||
.PHONY: viewcoverage coverage setup help install uninstall buildr buildd test clean debug release sample updatebadge
|
||||
|
||||
f_release = build_release
|
||||
f_debug = build_debug
|
||||
app_targets = BayesNet
|
||||
test_targets = unit_tests_bayesnet
|
||||
test_targets = TestBayesNet
|
||||
n_procs = -j 16
|
||||
|
||||
define ClearTests
|
||||
@ -29,6 +29,7 @@ setup: ## Install dependencies for tests and coverage
|
||||
fi
|
||||
@if [ "$(shell uname)" = "Linux" ]; then \
|
||||
pip install gcovr; \
|
||||
sudo dnf install lcov;\
|
||||
fi
|
||||
|
||||
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
|
||||
@ -85,9 +86,11 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
|
||||
@$(MAKE) clean
|
||||
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
|
||||
@for t in $(test_targets); do \
|
||||
echo ">>> Running $$t...";\
|
||||
if [ -f $(f_debug)/tests/$$t ]; then \
|
||||
cd $(f_debug)/tests ; \
|
||||
./$$t $(opt) ; \
|
||||
cd ../.. ; \
|
||||
fi ; \
|
||||
done
|
||||
@echo ">>> Done";
|
||||
@ -98,6 +101,27 @@ coverage: ## Run tests and generate coverage report (build/index.html)
|
||||
@gcovr $(f_debug)/tests
|
||||
@echo ">>> Done";
|
||||
|
||||
viewcoverage: ## Run tests, generate coverage report and upload it to codecov (build/index.html)
|
||||
@echo ">>> Building tests with coverage..."
|
||||
@folder=`pwd` ;
|
||||
@$(MAKE) coverage
|
||||
@echo ">>> Building report..."
|
||||
@cd $(f_debug)/tests; \
|
||||
lcov --directory . --capture --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
genhtml coverage.info --output-directory $(f_debug)/tests/coverage >/dev/null 2>&1;
|
||||
@$(MAKE) updatebadge
|
||||
@xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null
|
||||
@echo ">>> Done";
|
||||
|
||||
updatebadge: ## Update the coverage badge in README.md
|
||||
@echo ">>> Updating coverage badge..."
|
||||
@env python update_coverage.py $(f_debug)/tests
|
||||
@echo ">>> Done";
|
||||
|
||||
help: ## Show help message
|
||||
@IFS=$$'\n' ; \
|
||||
|
@ -3,7 +3,9 @@
|
||||
![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=flat&logo=c%2B%2B&logoColor=white)
|
||||
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](<https://opensource.org/licenses/MIT>)
|
||||
![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000)
|
||||
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
||||
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
|
||||
![Static Badge](https://img.shields.io/badge/Coverage-92,4%25-green)
|
||||
|
||||
Bayesian Network Classifiers using libtorch from scratch
|
||||
|
||||
|
@ -30,7 +30,7 @@ namespace bayesnet {
|
||||
virtual std::string getVersion() = 0;
|
||||
std::vector<std::string> virtual topological_order() = 0;
|
||||
std::vector<std::string> virtual getNotes() const = 0;
|
||||
void virtual dump_cpt()const = 0;
|
||||
std::string virtual dump_cpt()const = 0;
|
||||
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
||||
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
||||
protected:
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <sstream>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "Classifier.h"
|
||||
|
||||
@ -10,7 +11,7 @@ namespace bayesnet {
|
||||
this->className = className;
|
||||
this->states = states;
|
||||
m = dataset.size(1);
|
||||
n = dataset.size(0) - 1;
|
||||
n = features.size();
|
||||
checkFitParameters();
|
||||
auto n_classes = states.at(className).size();
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
@ -27,10 +28,11 @@ namespace bayesnet {
|
||||
dataset = torch::cat({ dataset, yresized }, 0);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
std::cout << "X dimensions: " << dataset.sizes() << "\n";
|
||||
std::cout << "y dimensions: " << ytmp.sizes() << "\n";
|
||||
exit(1);
|
||||
std::stringstream oss;
|
||||
oss << "* Error in X and y dimensions *\n";
|
||||
oss << "X dimensions: " << dataset.sizes() << "\n";
|
||||
oss << "y dimensions: " << ytmp.sizes();
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
}
|
||||
void Classifier::trainModel(const torch::Tensor& weights)
|
||||
@ -73,11 +75,11 @@ namespace bayesnet {
|
||||
if (torch::is_floating_point(dataset)) {
|
||||
throw std::invalid_argument("dataset (X, y) must be of type Integer");
|
||||
}
|
||||
if (n != features.size()) {
|
||||
throw std::invalid_argument("Classifier: X " + std::to_string(n) + " and features " + std::to_string(features.size()) + " must have the same number of features");
|
||||
if (dataset.size(0) - 1 != features.size()) {
|
||||
throw std::invalid_argument("Classifier: X " + std::to_string(dataset.size(0) - 1) + " and features " + std::to_string(features.size()) + " must have the same number of features");
|
||||
}
|
||||
if (states.find(className) == states.end()) {
|
||||
throw std::invalid_argument("className not found in states");
|
||||
throw std::invalid_argument("class name not found in states");
|
||||
}
|
||||
for (auto feature : features) {
|
||||
if (states.find(feature) == states.end()) {
|
||||
@ -173,12 +175,14 @@ namespace bayesnet {
|
||||
{
|
||||
return model.topological_sort();
|
||||
}
|
||||
void Classifier::dump_cpt() const
|
||||
std::string Classifier::dump_cpt() const
|
||||
{
|
||||
model.dump_cpt();
|
||||
return model.dump_cpt();
|
||||
}
|
||||
void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||
{
|
||||
//For classifiers that don't have hyperparameters
|
||||
if (!hyperparameters.empty()) {
|
||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||
}
|
||||
}
|
||||
}
|
@ -30,7 +30,7 @@ namespace bayesnet {
|
||||
std::vector<std::string> show() const override;
|
||||
std::vector<std::string> topological_order() override;
|
||||
std::vector<std::string> getNotes() const override { return notes; }
|
||||
void dump_cpt() const override;
|
||||
std::string dump_cpt() const override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
|
||||
protected:
|
||||
bool fitted;
|
||||
|
@ -6,14 +6,18 @@ namespace bayesnet {
|
||||
validHyperparameters = { "k", "theta" };
|
||||
|
||||
}
|
||||
void KDB::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||
void KDB::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("k")) {
|
||||
k = hyperparameters["k"];
|
||||
hyperparameters.erase("k");
|
||||
}
|
||||
if (hyperparameters.contains("theta")) {
|
||||
theta = hyperparameters["theta"];
|
||||
hyperparameters.erase("theta");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void KDB::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
|
@ -14,7 +14,7 @@ namespace bayesnet {
|
||||
public:
|
||||
explicit KDB(int k, float theta = 0.03);
|
||||
virtual ~KDB() = default;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||
};
|
||||
}
|
||||
|
@ -5,25 +5,23 @@ namespace bayesnet {
|
||||
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
className = className_;
|
||||
Xf = X_;
|
||||
y = y_;
|
||||
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
|
||||
SPODE::fit(dataset, features, className, states);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
return commonFit(features_, className_, states_);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
{
|
||||
if (!torch::is_floating_point(dataset)) {
|
||||
throw std::runtime_error("Dataset must be a floating point tensor");
|
||||
}
|
||||
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
|
||||
y = dataset.index({ -1, "..." }).clone();
|
||||
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
|
||||
return commonFit(features_, className_, states_);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
{
|
||||
features = features_;
|
||||
className = className_;
|
||||
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
|
||||
@ -34,7 +32,6 @@ namespace bayesnet {
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
}
|
||||
|
||||
torch::Tensor SPODELd::predict(torch::Tensor& X)
|
||||
{
|
||||
auto Xt = prepareX(X);
|
||||
|
@ -10,6 +10,7 @@ namespace bayesnet {
|
||||
virtual ~SPODELd() = default;
|
||||
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states);
|
||||
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
|
@ -13,9 +13,7 @@ namespace bayesnet {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
if (!hyperparameters.empty()) {
|
||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void AODE::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
|
@ -3,19 +3,6 @@
|
||||
namespace bayesnet {
|
||||
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
|
||||
{
|
||||
validHyperparameters = { "predict_voting" };
|
||||
|
||||
}
|
||||
void AODELd::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
if (!hyperparameters.empty()) {
|
||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||
}
|
||||
}
|
||||
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
{
|
||||
|
@ -10,7 +10,6 @@ namespace bayesnet {
|
||||
AODELd(bool predict_voting = true);
|
||||
virtual ~AODELd() = default;
|
||||
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
|
@ -8,22 +8,15 @@
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "BoostAODE.h"
|
||||
|
||||
#include "bayesnet/utils/loguru.cpp"
|
||||
|
||||
namespace bayesnet {
|
||||
struct {
|
||||
std::string CFS = "CFS";
|
||||
std::string FCBF = "FCBF";
|
||||
std::string IWSS = "IWSS";
|
||||
}SelectFeatures;
|
||||
struct {
|
||||
std::string ASC = "asc";
|
||||
std::string DESC = "desc";
|
||||
std::string RAND = "rand";
|
||||
}Orders;
|
||||
|
||||
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
|
||||
{
|
||||
validHyperparameters = {
|
||||
"maxModels", "order", "convergence", "threshold",
|
||||
"select_features", "tolerance", "predict_voting", "predict_single"
|
||||
"maxModels", "bisection", "order", "convergence", "threshold",
|
||||
"select_features", "maxTolerance", "predict_voting"
|
||||
};
|
||||
|
||||
}
|
||||
@ -38,8 +31,6 @@ namespace bayesnet {
|
||||
if (convergence) {
|
||||
// Prepare train & validation sets from train data
|
||||
auto fold = folding::StratifiedKFold(5, y_, 271);
|
||||
dataset_ = torch::clone(dataset);
|
||||
// save input dataset
|
||||
auto [train, test] = fold.getFold(0);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
@ -51,9 +42,9 @@ namespace bayesnet {
|
||||
dataset = X_train;
|
||||
m = X_train.size(1);
|
||||
auto n_classes = states.at(className).size();
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
// Build dataset with train data
|
||||
buildDataset(y_train);
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
} else {
|
||||
// Use all data to train
|
||||
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
||||
@ -63,10 +54,6 @@ namespace bayesnet {
|
||||
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("maxModels")) {
|
||||
maxModels = hyperparameters["maxModels"];
|
||||
hyperparameters.erase("maxModels");
|
||||
}
|
||||
if (hyperparameters.contains("order")) {
|
||||
std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
|
||||
order_algorithm = hyperparameters["order"];
|
||||
@ -79,17 +66,19 @@ namespace bayesnet {
|
||||
convergence = hyperparameters["convergence"];
|
||||
hyperparameters.erase("convergence");
|
||||
}
|
||||
if (hyperparameters.contains("predict_single")) {
|
||||
predict_single = hyperparameters["predict_single"];
|
||||
hyperparameters.erase("predict_single");
|
||||
if (hyperparameters.contains("bisection")) {
|
||||
bisection = hyperparameters["bisection"];
|
||||
hyperparameters.erase("bisection");
|
||||
}
|
||||
if (hyperparameters.contains("threshold")) {
|
||||
threshold = hyperparameters["threshold"];
|
||||
hyperparameters.erase("threshold");
|
||||
}
|
||||
if (hyperparameters.contains("tolerance")) {
|
||||
tolerance = hyperparameters["tolerance"];
|
||||
hyperparameters.erase("tolerance");
|
||||
if (hyperparameters.contains("maxTolerance")) {
|
||||
maxTolerance = hyperparameters["maxTolerance"];
|
||||
if (maxTolerance < 1 || maxTolerance > 4)
|
||||
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]");
|
||||
hyperparameters.erase("maxTolerance");
|
||||
}
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
@ -105,9 +94,7 @@ namespace bayesnet {
|
||||
}
|
||||
hyperparameters.erase("select_features");
|
||||
}
|
||||
if (!hyperparameters.empty()) {
|
||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
|
||||
{
|
||||
@ -136,9 +123,9 @@ namespace bayesnet {
|
||||
}
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::unordered_set<int> BoostAODE::initializeModels()
|
||||
std::vector<int> BoostAODE::initializeModels()
|
||||
{
|
||||
std::unordered_set<int> featuresUsed;
|
||||
std::vector<int> featuresUsed;
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
int maxFeatures = 0;
|
||||
if (select_features_algorithm == SelectFeatures.CFS) {
|
||||
@ -156,8 +143,12 @@ namespace bayesnet {
|
||||
}
|
||||
featureSelector->fit();
|
||||
auto cfsFeatures = featureSelector->getFeatures();
|
||||
auto scores = featureSelector->getScores();
|
||||
for (int i = 0; i < cfsFeatures.size(); ++i) {
|
||||
LOG_F(INFO, "Feature: %d Score: %f", cfsFeatures[i], scores[i]);
|
||||
}
|
||||
for (const int& feature : cfsFeatures) {
|
||||
featuresUsed.insert(feature);
|
||||
featuresUsed.push_back(feature);
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
models.push_back(std::move(model));
|
||||
@ -168,123 +159,131 @@ namespace bayesnet {
|
||||
delete featureSelector;
|
||||
return featuresUsed;
|
||||
}
|
||||
torch::Tensor BoostAODE::ensemble_predict(torch::Tensor& X, SPODE* model)
|
||||
{
|
||||
if (initialize_prob_table) {
|
||||
initialize_prob_table = false;
|
||||
prob_table = model->predict_proba(X) * 1.0;
|
||||
} else {
|
||||
prob_table += model->predict_proba(X) * 1.0;
|
||||
}
|
||||
// prob_table doesn't store probabilities but the sum of them
|
||||
// to have them we need to divide by the sum of the "weights" used to
|
||||
// consider the results obtanined in the model's predict_proba.
|
||||
return prob_table.argmax(1);
|
||||
}
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||
{
|
||||
//
|
||||
// Logging setup
|
||||
//
|
||||
loguru::set_thread_name("BoostAODE");
|
||||
loguru::g_stderr_verbosity = loguru::Verbosity_OFF;;
|
||||
loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
||||
// Algorithm based on the adaboost algorithm for classification
|
||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||
initialize_prob_table = true;
|
||||
fitted = true;
|
||||
double alpha_t = 0;
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
bool exitCondition = false;
|
||||
std::unordered_set<int> featuresUsed;
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels();
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
significanceModels[i] = alpha_t;
|
||||
}
|
||||
if (exitCondition) {
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
LOG_F(INFO, "Initial models: %d", n_models);
|
||||
LOG_F(INFO, "Significances: ");
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
LOG_F(INFO, "i=%d significance=%f", i, significanceModels[i]);
|
||||
}
|
||||
}
|
||||
bool resetMaxModels = false;
|
||||
if (maxModels == 0) {
|
||||
maxModels = .1 * n > 10 ? .1 * n : n;
|
||||
resetMaxModels = true; // Flag to unset maxModels
|
||||
}
|
||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||
// Variables to control the accuracy finish condition
|
||||
double priorAccuracy = 0.0;
|
||||
double delta = 1.0;
|
||||
double improvement = 1.0;
|
||||
double convergence_threshold = 1e-4;
|
||||
int worse_model_count = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
// Step 0: Set the finish condition
|
||||
// if not repeatSparent a finish condition is run out of features
|
||||
// n_models == maxModels
|
||||
// epsilon sub t > 0.5 => inverse the weights policy
|
||||
// validation error is not decreasing
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == Orders.ASC;
|
||||
std::mt19937 g{ 173 };
|
||||
while (!exitCondition) {
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||
VLOG_SCOPE_F(1, "featureSelection.size: %zu featuresUsed.size: %zu", featureSelection.size(), featuresUsed.size());
|
||||
if (order_algorithm == Orders.RAND) {
|
||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||
}
|
||||
// Remove used features
|
||||
featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x)
|
||||
{ return find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
|
||||
{ return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
|
||||
end(featureSelection)
|
||||
);
|
||||
if (featureSelection.empty()) {
|
||||
break;
|
||||
}
|
||||
auto feature = featureSelection[0];
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
torch::Tensor ypred;
|
||||
if (predict_single) {
|
||||
int k = pow(2, tolerance);
|
||||
int counter = 0; // The model counter of the current pack
|
||||
VLOG_SCOPE_F(1, "k=%d featureSelection.size: %zu", k, featureSelection.size());
|
||||
while (counter++ < k && featureSelection.size() > 0) {
|
||||
VLOG_SCOPE_F(2, "counter: %d numItemsPack: %d", counter, numItemsPack);
|
||||
auto feature = featureSelection[0];
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
torch::Tensor ypred;
|
||||
ypred = model->predict(X_train);
|
||||
} else {
|
||||
ypred = ensemble_predict(X_train, dynamic_cast<SPODE*>(model.get()));
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
if (finished) {
|
||||
VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **");
|
||||
break;
|
||||
}
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
numItemsPack++;
|
||||
featuresUsed.push_back(feature);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
|
||||
}
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_);
|
||||
if (exitCondition) {
|
||||
break;
|
||||
}
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
featuresUsed.insert(feature);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
if (convergence) {
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
VLOG_SCOPE_F(3, "First accuracy: %f", priorAccuracy);
|
||||
} else {
|
||||
delta = accuracy - priorAccuracy;
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (delta < convergence_threshold) {
|
||||
worse_model_count++;
|
||||
if (improvement < convergence_threshold) {
|
||||
VLOG_SCOPE_F(3, "(improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
} else {
|
||||
worse_model_count = 0; // Reset the counter if the model performs better
|
||||
VLOG_SCOPE_F(3, "*(improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
priorAccuracy = accuracy;
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
// priorAccuracy = accuracy;
|
||||
}
|
||||
exitCondition = n_models >= maxModels && repeatSparent || worse_model_count > tolerance;
|
||||
VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||
}
|
||||
if (worse_model_count > tolerance) {
|
||||
notes.push_back("Convergence threshold reached & last model eliminated");
|
||||
significanceModels.pop_back();
|
||||
models.pop_back();
|
||||
n_models--;
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||
for (int i = 0; i < numItemsPack; ++i) {
|
||||
significanceModels.pop_back();
|
||||
models.pop_back();
|
||||
n_models--;
|
||||
}
|
||||
} else {
|
||||
VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
}
|
||||
}
|
||||
if (featuresUsed.size() != features.size()) {
|
||||
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
|
||||
status = WARNING;
|
||||
}
|
||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||
if (resetMaxModels) {
|
||||
maxModels = 0;
|
||||
}
|
||||
}
|
||||
std::vector<std::string> BoostAODE::graph(const std::string& title) const
|
||||
{
|
||||
|
@ -5,31 +5,35 @@
|
||||
#include "bayesnet/feature_selection/FeatureSelect.h"
|
||||
#include "Ensemble.h"
|
||||
namespace bayesnet {
|
||||
struct {
|
||||
std::string CFS = "CFS";
|
||||
std::string FCBF = "FCBF";
|
||||
std::string IWSS = "IWSS";
|
||||
}SelectFeatures;
|
||||
struct {
|
||||
std::string ASC = "asc";
|
||||
std::string DESC = "desc";
|
||||
std::string RAND = "rand";
|
||||
}Orders;
|
||||
class BoostAODE : public Ensemble {
|
||||
public:
|
||||
BoostAODE(bool predict_voting = false);
|
||||
virtual ~BoostAODE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
private:
|
||||
std::unordered_set<int> initializeModels();
|
||||
torch::Tensor ensemble_predict(torch::Tensor& X, SPODE* model);
|
||||
torch::Tensor dataset_;
|
||||
std::vector<int> initializeModels();
|
||||
torch::Tensor X_train, y_train, X_test, y_test;
|
||||
// Hyperparameters
|
||||
bool repeatSparent = false; // if true, a feature can be selected more than once
|
||||
int maxModels = 0;
|
||||
int tolerance = 0;
|
||||
bool predict_single = true; // wether the last model is used to predict in training or the whole ensemble
|
||||
bool bisection = false; // if true, use bisection stratety to add k models at once to the ensemble
|
||||
int maxTolerance = 1;
|
||||
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
||||
bool convergence = false; //if true, stop when the model does not improve
|
||||
bool selectFeatures = false; // if true, use feature selection
|
||||
std::string select_features_algorithm = "desc"; // Selected feature selection algorithm
|
||||
bool initialize_prob_table; // if true, initialize the prob_table with the first model (used in train)
|
||||
torch::Tensor prob_table; // Table of probabilities for ensemble predicting if predict_single is false
|
||||
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
||||
FeatureSelect* featureSelector = nullptr;
|
||||
double threshold = -1;
|
||||
};
|
||||
|
@ -25,8 +25,9 @@ namespace bayesnet {
|
||||
{
|
||||
return std::vector<std::string>();
|
||||
}
|
||||
void dump_cpt() const override
|
||||
std::string dump_cpt() const override
|
||||
{
|
||||
return "";
|
||||
}
|
||||
protected:
|
||||
torch::Tensor predict_average_voting(torch::Tensor& X);
|
||||
|
@ -11,7 +11,7 @@ namespace bayesnet {
|
||||
auto feature = featureOrder[0];
|
||||
selectedFeatures.push_back(feature);
|
||||
selectedScores.push_back(suLabels[feature]);
|
||||
selectedFeatures.erase(selectedFeatures.begin());
|
||||
featureOrder.erase(featureOrder.begin());
|
||||
while (continueCondition) {
|
||||
double merit = std::numeric_limits<double>::lowest();
|
||||
int bestFeature = -1;
|
||||
|
@ -1,27 +1,35 @@
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include "Network.h"
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
namespace bayesnet {
|
||||
Network::Network() : features(std::vector<std::string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
|
||||
Network::Network(float maxT) : features(std::vector<std::string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
|
||||
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
|
||||
getmaxThreads()), fitted(other.fitted)
|
||||
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
{
|
||||
}
|
||||
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
{
|
||||
|
||||
}
|
||||
Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
|
||||
maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples)
|
||||
{
|
||||
if (samples.defined())
|
||||
samples = samples.clone();
|
||||
for (const auto& node : other.nodes) {
|
||||
nodes[node.first] = std::make_unique<Node>(*node.second);
|
||||
}
|
||||
}
|
||||
void Network::initialize()
|
||||
{
|
||||
features = std::vector<std::string>();
|
||||
features.clear();
|
||||
className = "";
|
||||
classNumStates = 0;
|
||||
fitted = false;
|
||||
nodes.clear();
|
||||
samples = torch::Tensor();
|
||||
}
|
||||
float Network::getmaxThreads()
|
||||
float Network::getMaxThreads() const
|
||||
{
|
||||
return maxThreads;
|
||||
}
|
||||
@ -114,11 +122,14 @@ namespace bayesnet {
|
||||
if (n_features != featureNames.size()) {
|
||||
throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")");
|
||||
}
|
||||
if (features.size() == 0) {
|
||||
throw std::invalid_argument("The network has not been initialized. You must call addNode() before calling fit()");
|
||||
}
|
||||
if (n_features != features.size() - 1) {
|
||||
throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
if (find(features.begin(), features.end(), className) == features.end()) {
|
||||
throw std::invalid_argument("className not found in Network::features");
|
||||
throw std::invalid_argument("Class Name not found in Network::features");
|
||||
}
|
||||
for (auto& feature : featureNames) {
|
||||
if (find(features.begin(), features.end(), feature) == features.end()) {
|
||||
@ -404,11 +415,13 @@ namespace bayesnet {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
void Network::dump_cpt() const
|
||||
std::string Network::dump_cpt() const
|
||||
{
|
||||
std::stringstream oss;
|
||||
for (auto& node : nodes) {
|
||||
std::cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
|
||||
std::cout << node.second->getCPT() << std::endl;
|
||||
oss << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
|
||||
oss << node.second->getCPT() << std::endl;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
|
@ -10,10 +10,10 @@ namespace bayesnet {
|
||||
public:
|
||||
Network();
|
||||
explicit Network(float);
|
||||
explicit Network(Network&);
|
||||
explicit Network(const Network&);
|
||||
~Network() = default;
|
||||
torch::Tensor& getSamples();
|
||||
float getmaxThreads();
|
||||
float getMaxThreads() const;
|
||||
void addNode(const std::string&);
|
||||
void addEdge(const std::string&, const std::string&);
|
||||
std::map<std::string, std::unique_ptr<Node>>& getNodes();
|
||||
@ -39,7 +39,7 @@ namespace bayesnet {
|
||||
std::vector<std::string> show() const;
|
||||
std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
|
||||
void initialize();
|
||||
void dump_cpt() const;
|
||||
std::string dump_cpt() const;
|
||||
inline std::string version() { return { project_version.begin(), project_version.end() }; }
|
||||
private:
|
||||
std::map<std::string, std::unique_ptr<Node>> nodes;
|
||||
@ -49,7 +49,7 @@ namespace bayesnet {
|
||||
std::vector<std::string> features; // Including classname
|
||||
std::string className;
|
||||
double laplaceSmoothing;
|
||||
torch::Tensor samples; // nxm tensor used to fit the model
|
||||
torch::Tensor samples; // n+1xm tensor used to fit the model
|
||||
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
|
||||
std::vector<double> predict_sample(const std::vector<int>&);
|
||||
std::vector<double> predict_sample(const torch::Tensor&);
|
||||
|
@ -9,12 +9,12 @@ namespace bayesnet {
|
||||
, classNumStates(classNumStates)
|
||||
{
|
||||
}
|
||||
//samples is nxm std::vector used to fit the model
|
||||
//samples is n+1xm std::vector used to fit the model
|
||||
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
|
||||
: features(features)
|
||||
, className(className)
|
||||
, classNumStates(classNumStates)
|
||||
, samples(torch::zeros({ static_cast<int>(vsamples[0].size()), static_cast<int>(vsamples.size() + 1) }, torch::kInt32))
|
||||
, samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
|
||||
{
|
||||
for (int i = 0; i < vsamples.size(); ++i) {
|
||||
samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32));
|
||||
@ -24,7 +24,7 @@ namespace bayesnet {
|
||||
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
|
||||
{
|
||||
// Return the K Best features
|
||||
auto n = samples.size(0) - 1;
|
||||
auto n = features.size();
|
||||
if (k == 0) {
|
||||
k = n;
|
||||
}
|
||||
|
@ -5,11 +5,16 @@
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
class Metrics {
|
||||
private:
|
||||
int classNumStates = 0;
|
||||
std::vector<double> scoresKBest;
|
||||
std::vector<int> featuresKBest; // sorted indices of the features
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
public:
|
||||
Metrics() = default;
|
||||
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
|
||||
std::vector<double> getScoresKBest() const;
|
||||
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
|
||||
torch::Tensor conditionalEdge(const torch::Tensor& weights);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
protected:
|
||||
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
|
||||
std::string className;
|
||||
@ -34,16 +39,11 @@ namespace bayesnet {
|
||||
v.erase(v.begin());
|
||||
return temp;
|
||||
}
|
||||
public:
|
||||
Metrics() = default;
|
||||
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
|
||||
std::vector<double> getScoresKBest() const;
|
||||
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
|
||||
torch::Tensor conditionalEdge(const torch::Tensor& weights);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
private:
|
||||
int classNumStates = 0;
|
||||
std::vector<double> scoresKBest;
|
||||
std::vector<int> featuresKBest; // sorted indices of the features
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
};
|
||||
}
|
||||
#endif
|
@ -10,18 +10,6 @@ namespace bayesnet {
|
||||
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
|
||||
return indices;
|
||||
}
|
||||
std::vector<std::vector<int>> tensorToVector(torch::Tensor& dtensor)
|
||||
{
|
||||
// convert mxn tensor to nxm std::vector
|
||||
std::vector<std::vector<int>> result;
|
||||
// Iterate over cols
|
||||
for (int i = 0; i < dtensor.size(1); ++i) {
|
||||
auto col_tensor = dtensor.index({ "...", i });
|
||||
auto col = std::vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + dtensor.size(0));
|
||||
result.push_back(col);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor)
|
||||
{
|
||||
// convert mxn tensor to mxn std::vector
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
std::vector<int> argsort(std::vector<double>& nums);
|
||||
std::vector<std::vector<int>> tensorToVector(torch::Tensor& dtensor);
|
||||
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor);
|
||||
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector, bool transpose = true);
|
||||
}
|
||||
|
2037
bayesnet/utils/loguru.cpp
Normal file
2037
bayesnet/utils/loguru.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1482
bayesnet/utils/loguru.hpp
Normal file
1482
bayesnet/utils/loguru.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,20 +1,18 @@
|
||||
# BoostAODE Algorithm Operation
|
||||
|
||||
The algorithm is based on the AdaBoost algorithm with some new proposals that can be activated using the following hyperparameters.
|
||||
## Algorithm
|
||||
|
||||
## Hyperparameters
|
||||
|
||||
The hyperparameters defined in the algorithm are:
|
||||
|
||||
- ***repeatSparent*** (*boolean*): Allows dataset variables to be repeated as parents of an *SPODE*. Default value: *false*.
|
||||
|
||||
- ***maxModels*** (*int*): Maximum number of models (*SPODEs*) to build. This hyperparameter is only taken into account if ***repeatSparent*** is set to *true*. Default value: *0*.
|
||||
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *false*.
|
||||
|
||||
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
|
||||
|
||||
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*.
|
||||
|
||||
- ***tolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *0*.
|
||||
- ***maxTolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *1*. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once.
|
||||
|
||||
- ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same α<sub>t</sub>. Default value: *""*.
|
||||
|
||||
@ -26,8 +24,6 @@ The hyperparameters defined in the algorithm are:
|
||||
|
||||
- ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *false*.
|
||||
|
||||
- ***predict_single*** (*boolean*): Sets whether the algorithm will use single-model prediction in the learning process. If set to *false*, all models trained up to that point will be used to calculate the prediction necessary to update the weights in the learning process. Default value: *true*.
|
||||
|
||||
## Operation
|
||||
|
||||
The algorithm performs the following steps:
|
||||
|
118
docs/algorithm.md
Normal file
118
docs/algorithm.md
Normal file
@ -0,0 +1,118 @@
|
||||
# Algorithm
|
||||
|
||||
- // notation
|
||||
|
||||
- $n$ features ${\cal{X}} = \{X_1, \dots, X_n\}$ and the class $Y$
|
||||
|
||||
- $m$ instances.
|
||||
|
||||
- $D = \{ (x_1^i, \dots, x_n^i, y^i) \}_{i=1}^{m}$
|
||||
|
||||
- $W$ a weights vector. $W_0$ are the initial weights.
|
||||
|
||||
- $D[W]$ dataset with weights $W$ for the instances.
|
||||
|
||||
1. // initialization
|
||||
|
||||
2. $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$
|
||||
|
||||
3. $W \leftarrow W_0$
|
||||
|
||||
4. $Vars \leftarrow {\cal{X}}$
|
||||
|
||||
5. $\delta \leftarrow 10^{-4}$
|
||||
|
||||
6. $convergence \leftarrow True$ // hyperparameter
|
||||
|
||||
7. $maxTolerancia \leftarrow 3$ // hyperparameter
|
||||
|
||||
8. $bisection \leftarrow False$ // hyperparameter
|
||||
|
||||
9. $finished \leftarrow False$
|
||||
|
||||
10. $AODE \leftarrow \emptyset$ // the ensemble
|
||||
|
||||
11. $tolerance \leftarrow 0$
|
||||
|
||||
12. $numModelsInPack \leftarrow 0$
|
||||
|
||||
13. $maxAccuracy \leftarrow -1$
|
||||
|
||||
14.
|
||||
|
||||
15. // main loop
|
||||
|
||||
16. While $(\lnot finished)$
|
||||
|
||||
1. $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$
|
||||
|
||||
2. $k \leftarrow 2^{tolerance}$
|
||||
|
||||
3. if ($tolerance == 0$) $numItemsPack \leftarrow0$
|
||||
|
||||
4. $P \leftarrow Head(\pi,k)$ // first k features in order
|
||||
|
||||
5. $spodes \leftarrow \emptyset$
|
||||
|
||||
6. $i \leftarrow 0$
|
||||
|
||||
7. While ($i < size(P)$)
|
||||
|
||||
1. $X \leftarrow P[i]$
|
||||
|
||||
2. $i \leftarrow i + 1$
|
||||
|
||||
3. $numItemsPack \leftarrow numItemsPack + 1$
|
||||
|
||||
4. $Vars.remove(X)$
|
||||
|
||||
5. $spode \leftarrow BuildSpode(X, {\cal{X}}, D[W])$
|
||||
|
||||
6. $\hat{y}[] \leftarrow spode.Predict(D)$
|
||||
|
||||
7. $\epsilon \leftarrow error(\hat{y}[], y[])$
|
||||
|
||||
8. $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-\epsilon}{\epsilon} \right )$
|
||||
|
||||
9. if ($\epsilon > 0.5$)
|
||||
|
||||
1. $finished \leftarrow True$
|
||||
|
||||
2. break
|
||||
|
||||
10. $spodes.add( (spode,\alpha_t) )$
|
||||
|
||||
11. $W \leftarrow UpdateWeights(W,\alpha,y[],\hat{y}[])$
|
||||
|
||||
8. $AODE.add( spodes )$
|
||||
|
||||
9. if ($convergence \land \lnot finished$)
|
||||
|
||||
1. $\hat{y}[] \leftarrow AODE.Predict(D)$
|
||||
|
||||
2. $actualAccuracy \leftarrow accuracy(\hat{y}[], y[])$
|
||||
|
||||
3. $if (maxAccuracy == -1)\; maxAccuracy \leftarrow actualAccuracy$
|
||||
|
||||
4. if $((accuracy - maxAccuracy) < \delta)$ // result doesn't
|
||||
improve enough
|
||||
|
||||
1. $tolerance \leftarrow tolerance + 1$
|
||||
|
||||
5. else
|
||||
|
||||
1. $tolerance \leftarrow 0$
|
||||
|
||||
2. $numItemsPack \leftarrow 0$
|
||||
|
||||
10. If
|
||||
$(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
|
||||
|
||||
11. $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$
|
||||
|
||||
17. if ($tolerance > maxTolerance$) // algorithm finished because of
|
||||
lack of convergence
|
||||
|
||||
1. $removeModels(AODE, numItemsPack)$
|
||||
|
||||
18. Return $AODE$
|
80
docs/algorithm.tex
Normal file
80
docs/algorithm.tex
Normal file
@ -0,0 +1,80 @@
|
||||
\section{Algorithm}
|
||||
\begin{itemize}
|
||||
\item[] // notation
|
||||
\item $n$ features ${\cal{X}} = \{X_1, \dots, X_n\}$ and the class $Y$
|
||||
\item $m$ instances.
|
||||
\item $D = \{ (x_1^i, \dots, x_n^i, y^i) \}_{i=1}^{m}$
|
||||
\item $W$ a weights vector. $W_0$ are the initial weights.
|
||||
\item $D[W]$ dataset with weights $W$ for the instances.
|
||||
\end{itemize}
|
||||
\bigskip
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item[] // initialization
|
||||
\item $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$
|
||||
\item $W \leftarrow W_0$
|
||||
\item $Vars \leftarrow {\cal{X}}$
|
||||
\item $\delta \leftarrow 10^{-4}$
|
||||
\item $convergence \leftarrow True$ // hyperparameter
|
||||
\item $maxTolerancia \leftarrow 3$ // hyperparameter
|
||||
\item $bisection \leftarrow False$ // hyperparameter
|
||||
\item $finished \leftarrow False$
|
||||
\item $AODE \leftarrow \emptyset$ \hspace*{2cm} // the ensemble
|
||||
\item $tolerance \leftarrow 0$
|
||||
\item $numModelsInPack \leftarrow 0$
|
||||
\item $maxAccuracy \leftarrow -1$
|
||||
\item[]
|
||||
\newpage
|
||||
\item[] // main loop
|
||||
\item While $(\lnot finished)$
|
||||
\begin{enumerate}
|
||||
\item $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$
|
||||
\item $k \leftarrow 2^{tolerance}$
|
||||
\item if ($tolerance == 0$) $numItemsPack \leftarrow0$
|
||||
\item $P \leftarrow Head(\pi,k)$ \hspace*{2cm} // first k features in order
|
||||
\item $spodes \leftarrow \emptyset$
|
||||
\item $i \leftarrow 0$
|
||||
\item While ($ i < size(P)$)
|
||||
\begin{enumerate}
|
||||
\item $X \leftarrow P[i]$
|
||||
\item $i \leftarrow i + 1$
|
||||
\item $numItemsPack \leftarrow numItemsPack + 1$
|
||||
\item $Vars.remove(X)$
|
||||
\item $spode \leftarrow BuildSpode(X, {\cal{X}}, D[W])$
|
||||
\item $\hat{y}[] \leftarrow spode.Predict(D)$
|
||||
\item $\epsilon \leftarrow error(\hat{y}[], y[])$
|
||||
\item $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-\epsilon}{\epsilon} \right )$
|
||||
\item if ($\epsilon > 0.5$)
|
||||
\begin{enumerate}
|
||||
\item $finished \leftarrow True$
|
||||
\item break
|
||||
\end{enumerate}
|
||||
\item $spodes.add( (spode,\alpha_t) )$
|
||||
\item $W \leftarrow UpdateWeights(W,\alpha,y[],\hat{y}[])$
|
||||
\end{enumerate}
|
||||
\item $AODE.add( spodes )$
|
||||
\item if ($convergence \land \lnot finished$)
|
||||
\begin{enumerate}
|
||||
\item $\hat{y}[] \leftarrow AODE.Predict(D)$
|
||||
\item $actualAccuracy \leftarrow accuracy(\hat{y}[], y[])$
|
||||
\item $if (maxAccuracy == -1)\; maxAccuracy \leftarrow actualAccuracy$
|
||||
\item if $((accuracy - maxAccuracy) < \delta)$\hspace*{2cm} // result doesn't improve enough
|
||||
\begin{enumerate}
|
||||
\item $tolerance \leftarrow tolerance + 1$
|
||||
\end{enumerate}
|
||||
\item else
|
||||
\begin{enumerate}
|
||||
\item $tolerance \leftarrow 0$
|
||||
\item $numItemsPack \leftarrow 0$
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
\item If $(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
|
||||
\item $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$
|
||||
\end{enumerate}
|
||||
\item if ($tolerance > maxTolerance$) \hspace*{1cm} // algorithm finished because of lack of convergence
|
||||
\begin{enumerate}
|
||||
\item $removeModels(AODE, numItemsPack)$
|
||||
\end{enumerate}
|
||||
\item Return $AODE$
|
||||
\end{enumerate}
|
@ -1,4 +1,5 @@
|
||||
filter = bayesnet/
|
||||
exclude-directories = build_debug/lib/
|
||||
exclude = bayesnet/utils/loguru.*
|
||||
print-summary = yes
|
||||
sort = uncovered-percent
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 8ac8190e494a381072c89f5e161b92a08d98b37b
|
||||
Subproject commit bff6e35e2b239217f3940ed52429f94b745adc50
|
@ -1 +1 @@
|
||||
Subproject commit 37316a54e0d558555ae02ae95c8bb083ec063874
|
||||
Subproject commit 71d6055be4488cf2e6443123ae8fc4a63ae289dc
|
2
lib/json
2
lib/json
@ -1 +1 @@
|
||||
Subproject commit 0457de21cffb298c22b629e538036bfeb96130b7
|
||||
Subproject commit 199dea11b17c533721b26249e2dcaee6ca1d51d3
|
@ -1,5 +1,4 @@
|
||||
if(ENABLE_TESTING)
|
||||
set(TEST_BAYESNET "unit_tests_bayesnet")
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
@ -9,8 +8,13 @@ if(ENABLE_TESTING)
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
||||
set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES})
|
||||
add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET})
|
||||
target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
|
||||
add_test(NAME ${TEST_BAYESNET} COMMAND ${TEST_BAYESNET})
|
||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestUtils.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
|
||||
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
||||
add_test(NAME Network COMMAND TestBayesNet "[Network]")
|
||||
add_test(NAME Node COMMAND TestBayesNet "[Node]")
|
||||
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
|
||||
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
|
||||
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
|
||||
add_test(NAME Models COMMAND TestBayesNet "[Models]")
|
||||
endif(ENABLE_TESTING)
|
||||
|
86
tests/TestBayesClassifier.cc
Normal file
86
tests/TestBayesClassifier.cc
Normal file
@ -0,0 +1,86 @@
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <string>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/classifiers/TAN.h"
|
||||
|
||||
|
||||
TEST_CASE("Test Cannot build dataset with wrong data vector", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
raw.yv.pop_back();
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
}
|
||||
TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto yshort = torch::zeros({ 149 }, torch::kInt32);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.featurest, raw.classNamet, raw.statest), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.featurest, raw.classNamet, raw.statest), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
}
|
||||
TEST_CASE("Invalid data type", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", false);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), "dataset (X, y) must be of type Integer");
|
||||
}
|
||||
TEST_CASE("Invalid number of features", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0);
|
||||
REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), "Classifier: X 5 and features 4 must have the same number of features");
|
||||
}
|
||||
TEST_CASE("Invalid class name", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, "duck", raw.statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, "duck", raw.statest), "class name not found in states");
|
||||
}
|
||||
TEST_CASE("Invalid feature name", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto statest = raw.statest;
|
||||
statest.erase("petallength");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), "feature [petallength] not found in states");
|
||||
}
|
||||
TEST_CASE("Topological order", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto order = model.topological_order();
|
||||
REQUIRE(order.size() == 4);
|
||||
REQUIRE(order[0] == "petallength");
|
||||
REQUIRE(order[1] == "sepallength");
|
||||
REQUIRE(order[2] == "sepalwidth");
|
||||
REQUIRE(order[3] == "petalwidth");
|
||||
}
|
||||
TEST_CASE("Not fitted model", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto message = "Classifier has not been fitted";
|
||||
// tensors
|
||||
REQUIRE_THROWS_AS(model.predict(raw.Xt), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.predict(raw.Xt), message);
|
||||
REQUIRE_THROWS_AS(model.predict_proba(raw.Xt), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.predict_proba(raw.Xt), message);
|
||||
REQUIRE_THROWS_AS(model.score(raw.Xt, raw.yt), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.score(raw.Xt, raw.yt), message);
|
||||
// vectors
|
||||
REQUIRE_THROWS_AS(model.predict(raw.Xv), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.predict(raw.Xv), message);
|
||||
REQUIRE_THROWS_AS(model.predict_proba(raw.Xv), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.predict_proba(raw.Xv), message);
|
||||
REQUIRE_THROWS_AS(model.score(raw.Xv, raw.yv), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.score(raw.Xv, raw.yv), message);
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
#include "TestUtils.h"
|
||||
|
||||
|
||||
TEST_CASE("Metrics Test", "[BayesNet]")
|
||||
TEST_CASE("Metrics Test", "[Metrics]")
|
||||
{
|
||||
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
|
||||
map<std::string, pair<int, std::vector<int>>> resultsKBest = {
|
||||
@ -32,31 +32,41 @@ TEST_CASE("Metrics Test", "[BayesNet]")
|
||||
};
|
||||
auto raw = RawDatasets(file_name, true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.featurest, raw.classNamet, raw.classNumStates);
|
||||
bayesnet::Metrics metricsv(raw.Xv, raw.yv, raw.featurest, raw.classNamet, raw.classNumStates);
|
||||
|
||||
SECTION("Test Constructor")
|
||||
{
|
||||
REQUIRE(metrics.getScoresKBest().size() == 0);
|
||||
REQUIRE(metricsv.getScoresKBest().size() == 0);
|
||||
}
|
||||
|
||||
SECTION("Test SelectKBestWeighted")
|
||||
{
|
||||
std::vector<int> kBest = metrics.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first);
|
||||
std::vector<int> kBestv = metricsv.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first);
|
||||
REQUIRE(kBest.size() == resultsKBest.at(file_name).first);
|
||||
REQUIRE(kBestv.size() == resultsKBest.at(file_name).first);
|
||||
REQUIRE(kBest == resultsKBest.at(file_name).second);
|
||||
REQUIRE(kBestv == resultsKBest.at(file_name).second);
|
||||
}
|
||||
|
||||
SECTION("Test Mutual Information")
|
||||
{
|
||||
auto result = metrics.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights);
|
||||
auto resultv = metricsv.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights);
|
||||
REQUIRE(result == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon));
|
||||
REQUIRE(resultv == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon));
|
||||
}
|
||||
|
||||
SECTION("Test Maximum Spanning Tree")
|
||||
{
|
||||
auto weights_matrix = metrics.conditionalEdge(raw.weights);
|
||||
auto weights_matrixv = metricsv.conditionalEdge(raw.weights);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
auto result = metrics.maximumSpanningTree(raw.featurest, weights_matrix, i);
|
||||
auto resultv = metricsv.maximumSpanningTree(raw.featurest, weights_matrixv, i);
|
||||
REQUIRE(result == resultsMST.at({ file_name, i }));
|
||||
REQUIRE(resultv == resultsMST.at({ file_name, i }));
|
||||
}
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
@ -15,17 +15,17 @@
|
||||
|
||||
const std::string ACTUAL_VERSION = "1.0.4";
|
||||
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]")
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
{
|
||||
map <pair<std::string, std::string>, float> scores{
|
||||
// Diabetes
|
||||
{{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
|
||||
{{"diabetes", "AODE"}, 0.82161}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
|
||||
{{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
|
||||
// Ecoli
|
||||
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
|
||||
{{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f},
|
||||
// Glass
|
||||
{{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
|
||||
{{"glass", "AODE"}, 0.79439}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
|
||||
{{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f},
|
||||
// Iris
|
||||
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
|
||||
@ -49,7 +49,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]")
|
||||
auto raw = RawDatasets(file_name, discretize);
|
||||
clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto score = clf->score(raw.Xt, raw.yt);
|
||||
INFO("File: " + file_name);
|
||||
INFO("Classifier: " + name + " File: " + file_name);
|
||||
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
@ -60,7 +60,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]")
|
||||
}
|
||||
delete clf;
|
||||
}
|
||||
TEST_CASE("Models features", "[BayesNet]")
|
||||
TEST_CASE("Models features", "[Models]")
|
||||
{
|
||||
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
|
||||
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
||||
@ -79,7 +79,7 @@ TEST_CASE("Models features", "[BayesNet]")
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
}
|
||||
TEST_CASE("Get num features & num edges", "[BayesNet]")
|
||||
TEST_CASE("Get num features & num edges", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::KDB(2);
|
||||
@ -87,7 +87,7 @@ TEST_CASE("Get num features & num edges", "[BayesNet]")
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 8);
|
||||
}
|
||||
TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]")
|
||||
TEST_CASE("BoostAODE feature_select CFS", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
@ -99,29 +99,51 @@ TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]")
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]")
|
||||
TEST_CASE("BoostAODE feature_select IWSS", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("BoostAODE feature_select FCBF", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("BoostAODE test used features in train note and score", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::BoostAODE(true);
|
||||
clf.setHyperparameters({
|
||||
{"order", "asc"},
|
||||
{"convergence", true},
|
||||
{"repeatSparent",true},
|
||||
{"select_features","CFS"},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 72);
|
||||
REQUIRE(clf.getNumberOfEdges() == 120);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 8");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon));
|
||||
REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Model predict_proba", "[BayesNet]")
|
||||
TEST_CASE("Model predict_proba", "[Models]")
|
||||
{
|
||||
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
|
||||
auto res_prob_tan = std::vector<std::vector<double>>({
|
||||
@ -208,9 +230,9 @@ TEST_CASE("Model predict_proba", "[BayesNet]")
|
||||
delete clf;
|
||||
}
|
||||
}
|
||||
TEST_CASE("BoostAODE voting-proba", "[BayesNet]")
|
||||
TEST_CASE("BoostAODE voting-proba", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", false);
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostAODE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
@ -224,15 +246,59 @@ TEST_CASE("BoostAODE voting-proba", "[BayesNet]")
|
||||
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon));
|
||||
clf.dump_cpt();
|
||||
REQUIRE(clf.dump_cpt() == "");
|
||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
}
|
||||
TEST_CASE("BoostAODE order asc, desc & random", "[BayesNet]")
|
||||
TEST_CASE("AODE voting-proba", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::AODE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
clf.setHyperparameters({
|
||||
{"predict_voting",true},
|
||||
});
|
||||
auto score_voting = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_voting = clf.predict_proba(raw.Xv);
|
||||
REQUIRE(score_proba == Catch::Approx(0.79439f).epsilon(raw.epsilon));
|
||||
REQUIRE(score_voting == Catch::Approx(0.78972f).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[67][0] == Catch::Approx(0.888889).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[67][0] == Catch::Approx(0.702184).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
}
|
||||
TEST_CASE("SPODELd dataset", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", false);
|
||||
auto clf = bayesnet::SPODELd(0);
|
||||
// raw.dataset.to(torch::kFloat32);
|
||||
clf.fit(raw.dataset, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = clf.score(raw.Xt, raw.yt);
|
||||
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("KDB with hyperparameters", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::KDB(2);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
clf.setHyperparameters({
|
||||
{"k", 3},
|
||||
{"theta", 0.7},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto scoret = clf.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("BoostAODE order asc, desc & random", "[Models]")
|
||||
{
|
||||
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<std::string, double> scores{
|
||||
{"asc", 0.83178f }, { "desc", 0.84579f }, { "rand", 0.83645f }
|
||||
{"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
|
||||
};
|
||||
for (const std::string& order : { "asc", "desc", "rand" }) {
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
@ -242,28 +308,8 @@ TEST_CASE("BoostAODE order asc, desc & random", "[BayesNet]")
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("order: " + order);
|
||||
INFO("BoostAODE order: " + order);
|
||||
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
TEST_CASE("BoostAODE predict_single", "[BayesNet]")
|
||||
{
|
||||
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<bool, double> scores{
|
||||
{true, 0.84579f }, { false, 0.80841f }
|
||||
};
|
||||
for (const bool kind : { true, false}) {
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({
|
||||
{"predict_single", kind}, {"order", "desc" },
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("kind: " + std::string(kind ? "true" : "false"));
|
||||
REQUIRE(score == Catch::Approx(scores[kind]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[kind]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,12 @@
|
||||
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <string>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
|
||||
void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::string& className)
|
||||
{
|
||||
@ -20,7 +23,7 @@ void buildModel(bayesnet::Network& net, const std::vector<std::string>& features
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
{
|
||||
|
||||
auto raw = RawDatasets("iris", true);
|
||||
@ -110,6 +113,22 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
net3.fit(raw.Xt, raw.yt, raw.weights, raw.featurest, raw.classNamet, raw.statest);
|
||||
REQUIRE(net.getStates() == net2.getStates());
|
||||
REQUIRE(net.getStates() == net3.getStates());
|
||||
REQUIRE(net.getFeatures() == net2.getFeatures());
|
||||
REQUIRE(net.getFeatures() == net3.getFeatures());
|
||||
REQUIRE(net.getClassName() == net2.getClassName());
|
||||
REQUIRE(net.getClassName() == net3.getClassName());
|
||||
REQUIRE(net.getNodes().size() == net2.getNodes().size());
|
||||
REQUIRE(net.getNodes().size() == net3.getNodes().size());
|
||||
REQUIRE(net.getEdges() == net2.getEdges());
|
||||
REQUIRE(net.getEdges() == net3.getEdges());
|
||||
REQUIRE(net.getNumEdges() == net2.getNumEdges());
|
||||
REQUIRE(net.getNumEdges() == net3.getNumEdges());
|
||||
REQUIRE(net.getClassNumStates() == net2.getClassNumStates());
|
||||
REQUIRE(net.getClassNumStates() == net3.getClassNumStates());
|
||||
REQUIRE(net.getSamples().size(0) == net2.getSamples().size(0));
|
||||
REQUIRE(net.getSamples().size(0) == net3.getSamples().size(0));
|
||||
REQUIRE(net.getSamples().size(1) == net2.getSamples().size(1));
|
||||
REQUIRE(net.getSamples().size(1) == net3.getSamples().size(1));
|
||||
// Check Conditional Probabilities tables
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
auto feature = features.at(i);
|
||||
@ -124,7 +143,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test show")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@ -138,7 +156,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test topological_sort")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@ -152,7 +169,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test graph")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@ -170,7 +186,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test predict")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
@ -180,7 +195,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test predict_proba")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
@ -202,10 +216,230 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test score")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = net.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.97333333).margin(threshold));
|
||||
}
|
||||
}
|
||||
SECTION("Copy constructor")
|
||||
{
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto net2 = bayesnet::Network(net);
|
||||
REQUIRE(net.getFeatures() == net2.getFeatures());
|
||||
REQUIRE(net.getEdges() == net2.getEdges());
|
||||
REQUIRE(net.getNumEdges() == net2.getNumEdges());
|
||||
REQUIRE(net.getStates() == net2.getStates());
|
||||
REQUIRE(net.getClassName() == net2.getClassName());
|
||||
REQUIRE(net.getClassNumStates() == net2.getClassNumStates());
|
||||
REQUIRE(net.getSamples().size(0) == net2.getSamples().size(0));
|
||||
REQUIRE(net.getSamples().size(1) == net2.getSamples().size(1));
|
||||
REQUIRE(net.getNodes().size() == net2.getNodes().size());
|
||||
for (const auto& feature : net.getFeatures()) {
|
||||
auto& node = net.getNodes().at(feature);
|
||||
auto& node2 = net2.getNodes().at(feature);
|
||||
REQUIRE(node->getName() == node2->getName());
|
||||
REQUIRE(node->getChildren().size() == node2->getChildren().size());
|
||||
REQUIRE(node->getParents().size() == node2->getParents().size());
|
||||
REQUIRE(node->getCPT().equal(node2->getCPT()));
|
||||
}
|
||||
}
|
||||
SECTION("Test oddities")
|
||||
{
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
// predict without fitting
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
auto test_tensor = bayesnet::vectorToTensor(test);
|
||||
REQUIRE_THROWS_AS(net.predict(test), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net.predict(test), "You must call fit() before calling predict()");
|
||||
REQUIRE_THROWS_AS(net.predict(test_tensor), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net.predict(test_tensor), "You must call fit() before calling predict()");
|
||||
REQUIRE_THROWS_AS(net.predict_proba(test), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net.predict_proba(test), "You must call fit() before calling predict_proba()");
|
||||
REQUIRE_THROWS_AS(net.score(raw.Xv, raw.yv), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net.score(raw.Xv, raw.yv), "You must call fit() before calling predict()");
|
||||
// predict with wrong data
|
||||
auto netx = bayesnet::Network();
|
||||
buildModel(netx, raw.featuresv, raw.classNamev);
|
||||
netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
std::vector<std::vector<int>> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} };
|
||||
auto test_tensor2 = bayesnet::vectorToTensor(test2, false);
|
||||
REQUIRE_THROWS_AS(netx.predict(test2), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test2), "Sample size (3) does not match the number of features (4)");
|
||||
REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "Sample size (3) does not match the number of features (4)");
|
||||
// fit with wrong data
|
||||
// Weights
|
||||
auto net2 = bayesnet::Network();
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.featuresv, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
std::string invalid_weights = "Weights (0) must have the same number of elements as samples (150) in Network::fit";
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.featuresv, raw.classNamev, raw.statesv), invalid_weights);
|
||||
// X & y
|
||||
std::string invalid_labels = "X and y must have the same number of samples in Network::fit (150 != 0)";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv), invalid_labels);
|
||||
// Features
|
||||
std::string invalid_features = "X and features must have the same number of features in Network::fit (4 != 0)";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.classNamev, raw.statesv), invalid_features);
|
||||
// Different number of features
|
||||
auto net3 = bayesnet::Network();
|
||||
auto test2y = { 1, 2, 3, 4, 5 };
|
||||
buildModel(net3, raw.featuresv, raw.classNamev);
|
||||
auto features3 = raw.featuresv;
|
||||
features3.pop_back();
|
||||
std::string invalid_features2 = "X and local features must have the same number of features in Network::fit (3 != 4)";
|
||||
REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.classNamev, raw.statesv), invalid_features2);
|
||||
// Uninitialized network
|
||||
std::string network_invalid = "The network has not been initialized. You must call addNode() before calling fit()";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), network_invalid);
|
||||
// Classname
|
||||
std::string invalid_classname = "Class Name not found in Network::features";
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), invalid_classname);
|
||||
// Invalid feature
|
||||
auto features2 = raw.featuresv;
|
||||
features2.pop_back();
|
||||
features2.push_back("duck");
|
||||
std::string invalid_feature = "Feature duck not found in Network::features";
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.classNamev, raw.statesv), invalid_feature);
|
||||
}
|
||||
|
||||
}
|
||||
TEST_CASE("Test and empty Node", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
REQUIRE_THROWS_AS(net.addNode(""), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addNode(""), "Node name cannot be empty");
|
||||
}
|
||||
TEST_CASE("Cicle in Network", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
net.addEdge("A", "B");
|
||||
net.addEdge("B", "C");
|
||||
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Adding this edge forms a cycle in the graph.");
|
||||
}
|
||||
TEST_CASE("Test max threads constructor", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
REQUIRE(net.getMaxThreads() == 0.95f);
|
||||
auto net2 = bayesnet::Network(4);
|
||||
REQUIRE(net2.getMaxThreads() == 4);
|
||||
auto net3 = bayesnet::Network(1.75);
|
||||
REQUIRE(net3.getMaxThreads() == 1.75);
|
||||
}
|
||||
TEST_CASE("Edges troubles", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
REQUIRE_THROWS_AS(net.addEdge("A", "C"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("A", "C"), "Child node C does not exist");
|
||||
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Parent node C does not exist");
|
||||
}
|
||||
TEST_CASE("Dump CPT", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto res = net.dump_cpt();
|
||||
std::string expected = R"(* class: (3) : [3]
|
||||
0.3333
|
||||
0.3333
|
||||
0.3333
|
||||
[ CPUFloatType{3} ]
|
||||
* petallength: (4) : [4, 3, 3]
|
||||
(1,.,.) =
|
||||
0.9388 0.1000 0.2000
|
||||
0.6250 0.0526 0.1667
|
||||
0.4000 0.0303 0.0196
|
||||
|
||||
(2,.,.) =
|
||||
0.0204 0.7000 0.4000
|
||||
0.1250 0.8421 0.1667
|
||||
0.2000 0.7273 0.0196
|
||||
|
||||
(3,.,.) =
|
||||
0.0204 0.1000 0.2000
|
||||
0.1250 0.0526 0.5000
|
||||
0.2000 0.1818 0.1373
|
||||
|
||||
(4,.,.) =
|
||||
0.0204 0.1000 0.2000
|
||||
0.1250 0.0526 0.1667
|
||||
0.2000 0.0606 0.8235
|
||||
[ CPUFloatType{4,3,3} ]
|
||||
* petalwidth: (3) : [3, 6, 3]
|
||||
(1,.,.) =
|
||||
0.5000 0.0417 0.0714
|
||||
0.3333 0.1111 0.0909
|
||||
0.5000 0.1000 0.2000
|
||||
0.7778 0.0909 0.0667
|
||||
0.8667 0.1000 0.0667
|
||||
0.9394 0.2500 0.1250
|
||||
|
||||
(2,.,.) =
|
||||
0.2500 0.9167 0.2857
|
||||
0.3333 0.7778 0.1818
|
||||
0.2500 0.8000 0.2000
|
||||
0.1111 0.8182 0.1333
|
||||
0.0667 0.7000 0.0667
|
||||
0.0303 0.5000 0.1250
|
||||
|
||||
(3,.,.) =
|
||||
0.2500 0.0417 0.6429
|
||||
0.3333 0.1111 0.7273
|
||||
0.2500 0.1000 0.6000
|
||||
0.1111 0.0909 0.8000
|
||||
0.0667 0.2000 0.8667
|
||||
0.0303 0.2500 0.7500
|
||||
[ CPUFloatType{3,6,3} ]
|
||||
* sepallength: (3) : [3, 3]
|
||||
0.8679 0.1321 0.0377
|
||||
0.0943 0.3019 0.0566
|
||||
0.0377 0.5660 0.9057
|
||||
[ CPUFloatType{3,3} ]
|
||||
* sepalwidth: (6) : [6, 3, 3]
|
||||
(1,.,.) =
|
||||
0.0392 0.5000 0.2857
|
||||
0.1000 0.4286 0.2500
|
||||
0.1429 0.2571 0.1887
|
||||
|
||||
(2,.,.) =
|
||||
0.0196 0.0833 0.1429
|
||||
0.1000 0.1429 0.2500
|
||||
0.1429 0.1429 0.1509
|
||||
|
||||
(3,.,.) =
|
||||
0.0392 0.0833 0.1429
|
||||
0.1000 0.1429 0.1250
|
||||
0.1429 0.1714 0.0566
|
||||
|
||||
(4,.,.) =
|
||||
0.1373 0.1667 0.1429
|
||||
0.1000 0.1905 0.1250
|
||||
0.1429 0.1429 0.2453
|
||||
|
||||
(5,.,.) =
|
||||
0.2549 0.0833 0.1429
|
||||
0.1000 0.0476 0.1250
|
||||
0.1429 0.2286 0.2453
|
||||
|
||||
(6,.,.) =
|
||||
0.5098 0.0833 0.1429
|
||||
0.5000 0.0476 0.1250
|
||||
0.2857 0.0571 0.1132
|
||||
[ CPUFloatType{6,3,3} ]
|
||||
)";
|
||||
REQUIRE(res == expected);
|
||||
}
|
||||
|
||||
|
84
tests/TestBayesNode.cc
Normal file
84
tests/TestBayesNode.cc
Normal file
@ -0,0 +1,84 @@
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <string>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
|
||||
|
||||
|
||||
TEST_CASE("Test Node children and parents", "[Node]")
|
||||
{
|
||||
auto node = bayesnet::Node("Node");
|
||||
REQUIRE(node.getName() == "Node");
|
||||
auto parent_1 = bayesnet::Node("P1");
|
||||
auto parent_2 = bayesnet::Node("P2");
|
||||
auto child_1 = bayesnet::Node("H1");
|
||||
auto child_2 = bayesnet::Node("H2");
|
||||
auto child_3 = bayesnet::Node("H3");
|
||||
node.addParent(&parent_1);
|
||||
node.addParent(&parent_2);
|
||||
node.addChild(&child_1);
|
||||
node.addChild(&child_2);
|
||||
node.addChild(&child_3);
|
||||
auto parents = node.getParents();
|
||||
auto children = node.getChildren();
|
||||
REQUIRE(parents.size() == 2);
|
||||
REQUIRE(children.size() == 3);
|
||||
REQUIRE(parents[0]->getName() == "P1");
|
||||
REQUIRE(parents[1]->getName() == "P2");
|
||||
REQUIRE(children[0]->getName() == "H1");
|
||||
REQUIRE(children[1]->getName() == "H2");
|
||||
REQUIRE(children[2]->getName() == "H3");
|
||||
node.removeParent(&parent_1);
|
||||
node.removeChild(&child_1);
|
||||
parents = node.getParents();
|
||||
children = node.getChildren();
|
||||
REQUIRE(parents.size() == 1);
|
||||
REQUIRE(children.size() == 2);
|
||||
node.clear();
|
||||
parents = node.getParents();
|
||||
children = node.getChildren();
|
||||
REQUIRE(parents.size() == 0);
|
||||
REQUIRE(children.size() == 0);
|
||||
}
|
||||
TEST_CASE("TEST MinFill method", "[Node]")
|
||||
{
|
||||
// Generate a test to test the minFill method of the Node class
|
||||
// Create a graph with 5 nodes
|
||||
// The graph is a chain with some additional edges
|
||||
// 0 -> 1,2,3
|
||||
// 1 -> 2,4
|
||||
// 2 -> 3
|
||||
// 3 -> 4
|
||||
auto node_0 = bayesnet::Node("0");
|
||||
auto node_1 = bayesnet::Node("1");
|
||||
auto node_2 = bayesnet::Node("2");
|
||||
auto node_3 = bayesnet::Node("3");
|
||||
auto node_4 = bayesnet::Node("4");
|
||||
// node 0
|
||||
node_0.addChild(&node_1);
|
||||
node_0.addChild(&node_2);
|
||||
node_0.addChild(&node_3);
|
||||
// node 1
|
||||
node_1.addChild(&node_2);
|
||||
node_1.addChild(&node_4);
|
||||
node_1.addParent(&node_0);
|
||||
// node 2
|
||||
node_2.addChild(&node_3);
|
||||
node_2.addChild(&node_4);
|
||||
node_2.addParent(&node_0);
|
||||
node_2.addParent(&node_1);
|
||||
// node 3
|
||||
node_3.addChild(&node_4);
|
||||
node_3.addParent(&node_0);
|
||||
node_3.addParent(&node_2);
|
||||
// node 4
|
||||
node_4.addParent(&node_1);
|
||||
node_4.addParent(&node_3);
|
||||
REQUIRE(node_0.minFill() == 3);
|
||||
REQUIRE(node_1.minFill() == 3);
|
||||
REQUIRE(node_2.minFill() == 6);
|
||||
REQUIRE(node_3.minFill() == 3);
|
||||
REQUIRE(node_4.minFill() == 1);
|
||||
}
|
71
tests/TestFeatureSelection.cc
Normal file
71
tests/TestFeatureSelection.cc
Normal file
@ -0,0 +1,71 @@
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include "bayesnet/utils/BayesMetrics.h"
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold)
|
||||
{
|
||||
if (selector == "CFS") {
|
||||
return new bayesnet::CFS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights);
|
||||
} else if (selector == "FCBF") {
|
||||
return new bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, threshold);
|
||||
} else if (selector == "IWSS") {
|
||||
return new bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, threshold);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TEST_CASE("Features Selected", "[FeatureSelection]")
|
||||
{
|
||||
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
|
||||
|
||||
auto raw = RawDatasets(file_name, true);
|
||||
|
||||
SECTION("Test features selected, scores and sizes")
|
||||
{
|
||||
map<pair<std::string, std::string>, pair<std::vector<int>, std::vector<double>>> results = {
|
||||
{ {"glass", "CFS"}, { { 2, 3, 6, 1, 8, 4 }, {0.365513, 0.42895, 0.369809, 0.298294, 0.240952, 0.200915} } },
|
||||
{ {"iris", "CFS"}, { { 3, 2, 1, 0 }, {0.870521, 0.890375, 0.588155, 0.41843} } },
|
||||
{ {"ecoli", "CFS"}, { { 5, 0, 4, 2, 1, 6 }, {0.512319, 0.565381, 0.486025, 0.41087, 0.331423, 0.266251} } },
|
||||
{ {"diabetes", "CFS"}, { { 1, 5, 7, 6, 4, 2 }, {0.132858, 0.151209, 0.14244, 0.126591, 0.106028, 0.0825904} } },
|
||||
{ {"glass", "IWSS" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.42895, 0.359907, 0.273784, 0.223346} } },
|
||||
{ {"iris", "IWSS"}, { { 3, 2, 0 }, {0.870521, 0.890375, 0.585426} }},
|
||||
{ {"ecoli", "IWSS"}, { { 5, 6, 0, 1, 4 }, {0.512319, 0.550978, 0.475025, 0.382607, 0.308203} } },
|
||||
{ {"diabetes", "IWSS"}, { { 1, 5, 4, 7, 3 }, {0.132858, 0.151209, 0.136576, 0.122097, 0.0802232} } },
|
||||
{ {"glass", "FCBF" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.304911, 0.302109, 0.281621, 0.253297} } },
|
||||
{ {"iris", "FCBF"}, {{ 3, 2 }, {0.870521, 0.816401} }},
|
||||
{ {"ecoli", "FCBF"}, {{ 5, 0, 1, 4, 2 }, {0.512319, 0.350406, 0.260905, 0.203132, 0.11229} }},
|
||||
{ {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }}
|
||||
};
|
||||
double threshold;
|
||||
std::string selector;
|
||||
std::vector<std::pair<std::string, double>> selectors = {
|
||||
{ "CFS", 0.0 },
|
||||
{ "IWSS", 0.5 },
|
||||
{ "FCBF", 1e-7 }
|
||||
};
|
||||
for (const auto item : selectors) {
|
||||
selector = item.first; threshold = item.second;
|
||||
bayesnet::FeatureSelect* featureSelector = build_selector(raw, selector, threshold);
|
||||
featureSelector->fit();
|
||||
INFO("file_name: " << file_name << ", selector: " << selector);
|
||||
// Features
|
||||
auto expected_features = results.at({ file_name, selector }).first;
|
||||
std::vector<int> selected_features = featureSelector->getFeatures();
|
||||
REQUIRE(selected_features.size() == expected_features.size());
|
||||
REQUIRE(selected_features == expected_features);
|
||||
// Scores
|
||||
auto expected_scores = results.at({ file_name, selector }).second;
|
||||
std::vector<double> selected_scores = featureSelector->getScores();
|
||||
REQUIRE(selected_scores.size() == selected_features.size());
|
||||
for (int i = 0; i < selected_scores.size(); i++) {
|
||||
REQUIRE(selected_scores[i] == Catch::Approx(expected_scores[i]).epsilon(raw.epsilon));
|
||||
}
|
||||
delete featureSelector;
|
||||
}
|
||||
}
|
||||
}
|
25
update_coverage.py
Normal file
25
update_coverage.py
Normal file
@ -0,0 +1,25 @@
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
|
||||
readme_file = "README.md"
|
||||
print("Updating coverage...")
|
||||
# Generate badge line
|
||||
output = subprocess.check_output(
|
||||
"lcov --summary " + sys.argv[1] + "/coverage.info|cut -d' ' -f4 |head -2|"
|
||||
"tail -1",
|
||||
shell=True,
|
||||
)
|
||||
percentage = output.decode("utf-8").strip().replace(".", ",")
|
||||
coverage_line = (
|
||||
f"![Static Badge](https://img.shields.io/badge/Coverage-{percentage}25-green)"
|
||||
)
|
||||
# Update README.md
|
||||
with open(readme_file, "r") as f:
|
||||
lines = f.readlines()
|
||||
with open(readme_file, "w") as f:
|
||||
for line in lines:
|
||||
if "Coverage" in line:
|
||||
f.write(coverage_line + "\n")
|
||||
else:
|
||||
f.write(line)
|
Loading…
Reference in New Issue
Block a user