Compare commits

..

25 Commits

Author SHA1 Message Date
46e929ff4d Merge pull request 'predict_single' (#22) from predict_single into main
Reviewed-on: #22

close #19
2024-03-06 16:16:15 +00:00
d858e26e4b Update version number and Changelog 2024-03-06 17:04:16 +01:00
0ee3eaed53 Update select features models significance 2024-03-05 12:10:58 +01:00
093c197f0a Replace constant strings in BoostAODE 2024-03-05 11:05:11 +01:00
78d7ea7c4d Add predict_single proposal detailed info 2024-03-03 22:56:01 +01:00
d6af1ffe8e Update gcovr config and fix some warnings 2024-02-28 11:51:37 +01:00
20669dd161 Translate BoostAODE.md to English 2024-02-27 20:29:01 +01:00
272dbad4f3 Update README and docs 2024-02-27 17:16:26 +01:00
8bccc3e4bc Update boostaode algorithm explain 2024-02-27 14:24:58 +01:00
903b143338 Refactor library structure and add sample 2024-02-27 13:06:13 +01:00
f10d0daf2e Update test 2024-02-27 10:16:20 +01:00
d39a17089e Begin implementing predict_single hyperparameter in BoostAODE 2024-02-26 20:29:08 +01:00
2e325cd114 Merge pull request 'change boostaode ascending hyperparameter to order {asc,desc,rand}' (#21) from baode_random into main
Reviewed-on: #21

This PR closes #18
2024-02-26 16:28:48 +00:00
fc3d63b7db change boostaode ascending hyperparameter to order {asc,desc,rand} 2024-02-26 17:07:57 +01:00
43dc79a345 Update version number in ChangeLog 2024-02-25 18:07:50 +01:00
b8589bcd0a Merge pull request 'Add the probabilities aggregation method to compute prediction with ensembles' (#16) from baode_proba into main
Reviewed-on: #16

As only the voting method was implemented, this approach computes the classifiers prediction using a weighted average of the probabilities computed by each model.
Added the predict_proba methods to BaseClassifier - Classifier and Ensemble classes.
Add a hyperparameter to decide the type of computation for ensembles voting - probability aggregation
2024-02-25 11:26:26 +00:00
3007e22a7d Add info to CHANGELOG
Update submodules
2024-02-24 21:33:28 +01:00
02e456befb Complete predict & predict_proba in ensemble 2024-02-24 18:36:09 +01:00
8477698d8d Complete predict & predict_proba with voting & probabilities 2024-02-23 23:11:14 +01:00
52abd2d670 Implement the proba branch and begin with the voting one 2024-02-23 20:36:11 +01:00
3116eaa763 Begin testing ensemble predict_proba 2024-02-22 18:44:40 +01:00
443e5cc882 Implement classifier.predict_proba & test 2024-02-22 11:45:40 +01:00
e1c4221c11 Add predict_voting and predict_prob to ensemble 2024-02-20 10:58:21 +01:00
a63a35df3f Fix epsilont early stopping in BoostAODE 2024-02-20 10:11:22 +01:00
c7555dac3f Add comments to BoostAODE algorithm 2024-02-19 22:58:15 +01:00
65 changed files with 995 additions and 534 deletions

View File

@@ -5,6 +5,7 @@ Checks: '-*,
cppcoreguidelines-*, cppcoreguidelines-*,
modernize-*, modernize-*,
performance-*, performance-*,
-modernize-use-nodiscard,
-cppcoreguidelines-pro-type-vararg, -cppcoreguidelines-pro-type-vararg,
-modernize-use-trailing-return-type, -modernize-use-trailing-return-type,
-bugprone-exception-escape' -bugprone-exception-escape'

2
.gitmodules vendored
View File

@@ -16,3 +16,5 @@
[submodule "lib/folding"] [submodule "lib/folding"]
path = lib/folding path = lib/folding
url = https://github.com/rmontanana/folding url = https://github.com/rmontanana/folding
main = main
update = merge

115
.vscode/launch.json vendored
View File

@@ -5,126 +5,21 @@
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "sample", "name": "sample",
"program": "${workspaceFolder}/build_debug/sample/BayesNetSample", "program": "${workspaceFolder}/build_release/sample/bayesnet_sample",
"args": [ "args": [
"-d", "${workspaceFolder}/tests/data/glass.arff"
"iris",
"-m",
"TANLd",
"-s",
"271",
"-p",
"/Users/rmontanana/Code/discretizbench/datasets/",
], ],
//"cwd": "${workspaceFolder}/build/sample/",
},
{
"type": "lldb",
"request": "launch",
"name": "experimentPy",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"STree",
"--stratified",
"-d",
"iris",
//"--discretize"
// "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "gridsearch",
"program": "${workspaceFolder}/build_debug/src/Platform/b_grid",
"args": [
"-m",
"KDB",
"--discretize",
"--continue",
"glass",
"--only",
"--compute"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "experimentBayes",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"TAN",
"--stratified",
"--discretize",
"-d",
"iris",
"--hyperparameters",
"{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/home/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "best",
"program": "${workspaceFolder}/build_debug/src/Platform/b_best",
"args": [
"-m",
"BoostAODE",
"-s",
"accuracy",
"--build",
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "manage",
"program": "${workspaceFolder}/build_debug/src/Platform/b_manage",
"args": [
"-n",
"20"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "list",
"program": "${workspaceFolder}/build_debug/src/Platform/b_list",
"args": [],
//"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
}, },
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "test", "name": "test",
"program": "${workspaceFolder}/build_debug/tests/unit_tests", "program": "${workspaceFolder}/build_debug/tests/unit_tests_bayesnet",
"args": [ "args": [
"-c=\"Metrics Test\"", //"-c=\"Metrics Test\"",
// "-s", // "-s",
], ],
"cwd": "${workspaceFolder}/build/tests", "cwd": "${workspaceFolder}/build_debug/tests",
},
{
"name": "Build & debug active file",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build_debug/bayesnet",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "lldb",
"preLaunchTask": "CMake: build"
} }
] ]
} }

View File

@@ -5,6 +5,37 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.0.4]
### Added
- Change _ascending_ hyperparameter to _order_ with these possible values _{"asc", "desc", "rand"}_, Default is _"desc"_.
- Add the _predict_single_ hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true.
- sample app to show how to use the library (make sample)
### Changed
- Change the library structure adding folders for each group of classes (classifiers, ensembles, etc).
- The significances of the models generated under the feature selection algorithm are now computed after all the models have been generated and an &alpha;<sub>t</sub> value is computed and assigned to each model.
## [1.0.3]
### Added
- Voting / probability aggregation in Ensemble classes
- predict_proba method in Classifier
- predict_proba method in BoostAODE
- predict_voting parameter in BoostAODE constructor to use voting or probability to predict (default is voting)
- hyperparameter predict_voting to AODE, AODELd and BoostAODE (Ensemble child classes)
- tests to check predict & predict_proba coherence
## [1.0.2] - 2024-02-20
### Fixed
- Fix bug in BoostAODE: do not include the model if epsilon sub t is greater than 0.5
- Fix bug in BoostAODE: compare accuracy with previous accuracy instead of the first of the ensemble if convergence true
## [1.0.1] - 2024-02-12 ## [1.0.1] - 2024-02-12
### Added ### Added

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.20)
project(BayesNet project(BayesNet
VERSION 1.0.1 VERSION 1.0.4
DESCRIPTION "Bayesian Network and basic classifiers Library." DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX LANGUAGES CXX
@@ -36,6 +36,13 @@ option(CODE_COVERAGE "Collect coverage from test library" OFF)
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule) include(AddGitSubmodule)
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Debug mode")
set(ENABLE_TESTING ON)
set(CODE_COVERAGE ON)
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
if (CODE_COVERAGE) if (CODE_COVERAGE)
enable_testing() enable_testing()
include(CodeCoverage) include(CodeCoverage)
@@ -58,14 +65,11 @@ add_git_submodule("lib/json")
# -------------- # --------------
add_subdirectory(config) add_subdirectory(config)
add_subdirectory(lib/Files) add_subdirectory(lib/Files)
add_subdirectory(src/BayesNet) add_subdirectory(sample)
add_subdirectory(src)
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h)
file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cc ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cpp)
# Testing # Testing
# ------- # -------
if (ENABLE_TESTING) if (ENABLE_TESTING)
MESSAGE("Testing enabled") MESSAGE("Testing enabled")
add_git_submodule("lib/catch2") add_git_submodule("lib/catch2")

View File

@@ -1,6 +1,6 @@
SHELL := /bin/bash SHELL := /bin/bash
.DEFAULT_GOAL := help .DEFAULT_GOAL := help
.PHONY: coverage setup help buildr buildd test clean debug release .PHONY: coverage setup help buildr buildd test clean debug release sample
f_release = build_release f_release = build_release
f_debug = build_debug f_debug = build_debug
@@ -61,6 +61,13 @@ release: ## Build a Release version of the project
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release @cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
@echo ">>> Done"; @echo ">>> Done";
fname = "tests/data/iris.arff"
sample: ## Build sample
@echo ">>> Building Sample...";
cmake --build $(f_release) -t bayesnet_sample $(n_procs)
$(f_release)/sample/bayesnet_sample $(fname)
@echo ">>> Done";
opt = "" opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet & Platform tests..."; @echo ">>> Running BayesNet & Platform tests...";

View File

@@ -19,4 +19,14 @@ make test
make coverage make coverage
``` ```
## 1. Introduction ### Sample app
```bash
make release
make sample
make sample fname=tests/data/glass.arff
```
## Models
### [BoostAODE](docs/BoostAODE.md)

71
docs/BoostAODE.md Normal file
View File

@@ -0,0 +1,71 @@
# BoostAODE Algorithm Operation
The algorithm is based on the AdaBoost algorithm with some new proposals that can be activated using the following hyperparameters.
## Hyperparameters
The hyperparameters defined in the algorithm are:
- ***repeatSparent*** (*boolean*): Allows dataset variables to be repeated as parents of an *SPODE*. Default value: *false*.
- ***maxModels*** (*int*): Maximum number of models (*SPODEs*) to build. This hyperparameter is only taken into account if ***repeatSparent*** is set to *true*. Default value: *0*.
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*.
- ***tolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *0*.
- ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same &alpha;<sub>t</sub>. Default value: *""*.
- ***threshold*** (*double*): Sets the necessary value for the IWSS and FCBF algorithms to function. Accepted values are:
- IWSS: $threshold \in [0, 0.5]$
- FCBF: $threshold \in [10^{-7}, 1]$
Default value is *-1* so every time any of those algorithms are called, the threshold has to be set to the desired value.
- ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *true*.
- ***predict_single*** (*boolean*): Sets whether the algorithm will use single-model prediction in the learning process. If set to *false*, all models trained up to that point will be used to calculate the prediction necessary to update the weights in the learning process. Default value: *true*.
## Operation
The algorithm performs the following steps:
1. **Initialization**
- If ***select_features*** is set, as many *SPODEs* are created as variables selected by the corresponding feature selection algorithm, and these variables are marked as used.
- Initial weights of the examples are set to *1/m*.
1. **Main Training Loop:**
- Variables are sorted by mutual information order with the class variable and processed in ascending, descending or random order, according to the value of the *order* hyperparameter. If it is random, the variables are shuffled.
- If the parent repetition is not established, the variable is marked as used.
- A *SPODE* is created using the selected variable as the parent.
- The model is trained, and the class variable corresponding to the training dataset is calculated. The calculation can be done using the last trained model or the set of models trained up to that point, according to the value of the *predict_single* hyperparameter.
- The weights associated with the examples are updated using this expression:
- w<sub>i</sub> · e<sup>&alpha;<sub>t</sub></sup> (if the example has been misclassified)
- w<sub>i</sub> · e<sup>-&alpha;<sub>t</sub></sup> (if the example has been correctly classified)
- The model significance is set to &alpha;<sub>t</sub>.
- If the ***convergence*** hyperparameter is set, the accuracy value on the test dataset that we separated in an initial step is calculated.
1. **Exit Conditions:**
- &epsilon;<sub>t</sub> > 0.5 => misclassified examples are penalized.
- Number of models with worse accuracy greater than ***tolerance*** and ***convergence*** established.
- There are no more variables to create models, and ***repeatSparent*** is not set.
- Number of models > ***maxModels*** if ***repeatSparent*** is set.
### [Proposal for *predict_single = false*](./BoostAODE_train_predict.pdf)

Binary file not shown.

Binary file not shown.

View File

@@ -1,4 +1,4 @@
filter = src/ filter = src/
exclude-directories = build_debug/lib/ exclude-directories = build_debug/lib/
print-summary = yes print-summary = yes
sort-percentage = yes sort = uncovered-percent

14
sample/CMakeLists.txt Normal file
View File

@@ -0,0 +1,14 @@
include_directories(
${BayesNet_SOURCE_DIR}/src
${BayesNet_SOURCE_DIR}/src/classifiers
${BayesNet_SOURCE_DIR}/src/ensembles
${BayesNet_SOURCE_DIR}/src/bayesian_network
${BayesNet_SOURCE_DIR}/src/utils
${BayesNet_SOURCE_DIR}/src/feature_selection
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/json/include
${CMAKE_BINARY_DIR}/configured_files/include
)
add_executable(bayesnet_sample sample.cc)
target_link_libraries(bayesnet_sample ArffFiles BayesNet)

62
sample/sample.cc Normal file
View File

@@ -0,0 +1,62 @@
#include "ArffFiles.h"
#include "CPPFImdlp.h"
#include "BoostAODE.h"
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{
std::vector<mdlp::labels_t> Xd;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
Xd.push_back(xd);
}
return Xd;
}
tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last)
{
auto handler = ArffFiles();
handler.load(name, class_last);
// Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
std::vector<std::string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
torch::Tensor Xd;
auto states = map<std::string, std::vector<int>>();
auto Xr = discretizeDataset(X, y);
Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
}
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0);
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
}
int main(int argc, char* argv[])
{
if (argc < 2) {
std::cerr << "Usage: " << argv[0] << " <file_name>" << std::endl;
return 1;
}
std::string file_name = argv[1];
torch::Tensor X, y;
std::vector<std::string> features;
std::string className;
map<std::string, std::vector<int>> states;
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
std::cout << "Library version: " << clf.getVersion() << std::endl;
tie(X, y, features, className, states) = loadDataset(file_name, true);
clf.fit(X, y, features, className, states);
auto score = clf.score(X, y);
std::cout << "File: " << file_name << " score: " << score << std::endl;
return 0;
}

View File

@@ -16,12 +16,15 @@ namespace bayesnet {
virtual ~BaseClassifier() = default; virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0; torch::Tensor virtual predict(torch::Tensor& X) = 0;
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0; std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
torch::Tensor virtual predict_proba(torch::Tensor& X) = 0;
std::vector<std::vector<double>> virtual predict_proba(std::vector<std::vector<int >>& X) = 0;
status_t virtual getStatus() const = 0; status_t virtual getStatus() const = 0;
float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0; float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes()const = 0; int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges()const = 0; int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() const = 0; int virtual getNumberOfStates() const = 0;
int virtual getClassNumStates() const = 0;
std::vector<std::string> virtual show() const = 0; std::vector<std::string> virtual show() const = 0;
std::vector<std::string> virtual graph(const std::string& title = "") const = 0; std::vector<std::string> virtual graph(const std::string& title = "") const = 0;
virtual std::string getVersion() = 0; virtual std::string getVersion() = 0;

View File

@@ -1,18 +0,0 @@
#include "AODE.h"
namespace bayesnet {
AODE::AODE() : Ensemble() {}
void AODE::buildModel(const torch::Tensor& weights)
{
models.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i));
}
n_models = models.size();
significanceModels = std::vector<double>(n_models, 1.0);
}
std::vector<std::string> AODE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}
}

View File

@@ -1,13 +0,0 @@
include_directories(
${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR}/src/BayesNet
${CMAKE_BINARY_DIR}/configured_files/include
)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc )
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,141 +0,0 @@
#include "Ensemble.h"
namespace bayesnet {
Ensemble::Ensemble() : Classifier(Network()), n_models(0) {}
void Ensemble::trainModel(const torch::Tensor& weights)
{
n_models = models.size();
for (auto i = 0; i < n_models; ++i) {
// fit with std::vectors
models[i]->fit(dataset, features, className, states);
}
}
std::vector<int> Ensemble::voting(torch::Tensor& y_pred)
{
auto y_pred_ = y_pred.accessor<int, 2>();
std::vector<int> y_pred_final;
int numClasses = states.at(className).size();
// y_pred is m x n_models with the prediction of every model for each sample
for (int i = 0; i < y_pred.size(0); ++i) {
// votes store in each index (value of class) the significance added by each model
// i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
std::vector<double> votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
votes[y_pred_[i][j]] += significanceModels.at(j);
}
// argsort in descending order
auto indices = argsort(votes);
y_pred_final.push_back(indices[0]);
}
return y_pred_final;
}
torch::Tensor Ensemble::predict(torch::Tensor& X)
{
if (!fitted) {
throw std::logic_error("Ensemble has not been fitted");
}
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict(X);
std::lock_guard<std::mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict);
}));
}
for (auto& thread : threads) {
thread.join();
}
return torch::tensor(voting(y_pred));
}
std::vector<int> Ensemble::predict(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw std::logic_error("Ensemble has not been fitted");
}
long m_ = X[0].size();
long n_ = X.size();
std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
}
torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32);
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32));
}
return voting(y_pred);
}
float Ensemble::score(torch::Tensor& X, torch::Tensor& y)
{
if (!fitted) {
throw std::logic_error("Ensemble has not been fitted");
}
auto y_pred = predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(0); ++i) {
if (y_pred[i].item<int>() == y[i].item<int>()) {
correct++;
}
}
return (double)correct / y_pred.size(0);
}
float Ensemble::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{
if (!fitted) {
throw std::logic_error("Ensemble has not been fitted");
}
auto y_pred = predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == y[i]) {
correct++;
}
}
return (double)correct / y_pred.size();
}
std::vector<std::string> Ensemble::show() const
{
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->show();
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
std::vector<std::string> Ensemble::graph(const std::string& title) const
{
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->graph(title + "_" + std::to_string(i));
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
int Ensemble::getNumberOfNodes() const
{
int nodes = 0;
for (auto i = 0; i < n_models; ++i) {
nodes += models[i]->getNumberOfNodes();
}
return nodes;
}
int Ensemble::getNumberOfEdges() const
{
int edges = 0;
for (auto i = 0; i < n_models; ++i) {
edges += models[i]->getNumberOfEdges();
}
return edges;
}
int Ensemble::getNumberOfStates() const
{
int nstates = 0;
for (auto i = 0; i < n_models; ++i) {
nstates += models[i]->getNumberOfStates();
}
return nstates;
}
}

View File

@@ -1,25 +0,0 @@
#include "bayesnetUtils.h"
namespace bayesnet {
// Return the indices in descending order
std::vector<int> argsort(std::vector<double>& nums)
{
int n = nums.size();
std::vector<int> indices(n);
iota(indices.begin(), indices.end(), 0);
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
return indices;
}
std::vector<std::vector<int>> tensorToVector(torch::Tensor& tensor)
{
// convert mxn tensor to nxm std::vector
std::vector<std::vector<int>> result;
// Iterate over cols
for (int i = 0; i < tensor.size(1); ++i) {
auto col_tensor = tensor.index({ "...", i });
auto col = std::vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
result.push_back(col);
}
return result;
}
}

18
src/CMakeLists.txt Normal file
View File

@@ -0,0 +1,18 @@
include_directories(
${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR}/src
${BayesNet_SOURCE_DIR}/src/feature_selection
${BayesNet_SOURCE_DIR}/src/bayesian_network
${BayesNet_SOURCE_DIR}/src/classifiers
${BayesNet_SOURCE_DIR}/src/ensembles
${BayesNet_SOURCE_DIR}/src/utils
${CMAKE_BINARY_DIR}/configured_files/include
)
file(GLOB_RECURSE Sources "*.cc")
add_library(BayesNet ${Sources})
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -71,7 +71,7 @@ namespace bayesnet {
for (Node* child : nodes[nodeId]->getChildren()) { for (Node* child : nodes[nodeId]->getChildren()) {
if (visited.find(child->getName()) == visited.end() && isCyclic(child->getName(), visited, recStack)) if (visited.find(child->getName()) == visited.end() && isCyclic(child->getName(), visited, recStack))
return true; return true;
else if (recStack.find(child->getName()) != recStack.end()) if (recStack.find(child->getName()) != recStack.end())
return true; return true;
} }
} }
@@ -238,6 +238,7 @@ namespace bayesnet {
return predictions; return predictions;
} }
// Return mxn std::vector of probabilities // Return mxn std::vector of probabilities
// tsamples is nxm std::vector of samples
std::vector<std::vector<double>> Network::predict_proba(const std::vector<std::vector<int>>& tsamples) std::vector<std::vector<double>> Network::predict_proba(const std::vector<std::vector<int>>& tsamples)
{ {
if (!fitted) { if (!fitted) {

View File

@@ -7,23 +7,6 @@
namespace bayesnet { namespace bayesnet {
class Network { class Network {
private:
std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted;
float maxThreads = 0.95;
int classNumStates;
std::vector<std::string> features; // Including classname
std::string className;
double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&);
std::vector<double> exactInference(std::map<std::string, int>&);
double computeFactor(std::map<std::string, int>&);
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const std::map<std::string, std::vector<int>>&);
public: public:
Network(); Network();
explicit Network(float); explicit Network(float);
@@ -58,6 +41,23 @@ namespace bayesnet {
void initialize(); void initialize();
void dump_cpt() const; void dump_cpt() const;
inline std::string version() { return { project_version.begin(), project_version.end() }; } inline std::string version() { return { project_version.begin(), project_version.end() }; }
private:
std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted;
float maxThreads = 0.95;
int classNumStates;
std::vector<std::string> features; // Including classname
std::string className;
double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&);
std::vector<double> exactInference(std::map<std::string, int>&);
double computeFactor(std::map<std::string, int>&);
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const std::map<std::string, std::vector<int>>&);
}; };
} }
#endif #endif

View File

@@ -3,6 +3,7 @@
namespace bayesnet { namespace bayesnet {
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{ {
this->features = features; this->features = features;
@@ -87,14 +88,14 @@ namespace bayesnet {
torch::Tensor Classifier::predict(torch::Tensor& X) torch::Tensor Classifier::predict(torch::Tensor& X)
{ {
if (!fitted) { if (!fitted) {
throw std::logic_error("Classifier has not been fitted"); throw std::logic_error(CLASSIFIER_NOT_FITTED);
} }
return model.predict(X); return model.predict(X);
} }
std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X) std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X)
{ {
if (!fitted) { if (!fitted) {
throw std::logic_error("Classifier has not been fitted"); throw std::logic_error(CLASSIFIER_NOT_FITTED);
} }
auto m_ = X[0].size(); auto m_ = X[0].size();
auto n_ = X.size(); auto n_ = X.size();
@@ -105,18 +106,37 @@ namespace bayesnet {
auto yp = model.predict(Xd); auto yp = model.predict(Xd);
return yp; return yp;
} }
float Classifier::score(torch::Tensor& X, torch::Tensor& y) torch::Tensor Classifier::predict_proba(torch::Tensor& X)
{ {
if (!fitted) { if (!fitted) {
throw std::logic_error("Classifier has not been fitted"); throw std::logic_error(CLASSIFIER_NOT_FITTED);
} }
return model.predict_proba(X);
}
std::vector<std::vector<double>> Classifier::predict_proba(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw std::logic_error(CLASSIFIER_NOT_FITTED);
}
auto m_ = X[0].size();
auto n_ = X.size();
std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
// Convert to nxm vector
for (auto i = 0; i < n_; i++) {
Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
}
auto yp = model.predict_proba(Xd);
return yp;
}
float Classifier::score(torch::Tensor& X, torch::Tensor& y)
{
torch::Tensor y_pred = predict(X); torch::Tensor y_pred = predict(X);
return (y_pred == y).sum().item<float>() / y.size(0); return (y_pred == y).sum().item<float>() / y.size(0);
} }
float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y) float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{ {
if (!fitted) { if (!fitted) {
throw std::logic_error("Classifier has not been fitted"); throw std::logic_error(CLASSIFIER_NOT_FITTED);
} }
return model.score(X, y); return model.score(X, y);
} }
@@ -145,6 +165,10 @@ namespace bayesnet {
{ {
return fitted ? model.getStates() : 0; return fitted ? model.getStates() : 0;
} }
int Classifier::getClassNumStates() const
{
return fitted ? model.getClassNumStates() : 0;
}
std::vector<std::string> Classifier::topological_order() std::vector<std::string> Classifier::topological_order()
{ {
return model.topological_sort(); return model.topological_sort();

View File

@@ -7,11 +7,34 @@
namespace bayesnet { namespace bayesnet {
class Classifier : public BaseClassifier { class Classifier : public BaseClassifier {
private: public:
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights); Classifier(Network model);
virtual ~Classifier() = default;
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override;
void addNodes();
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
int getClassNumStates() const override;
torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
torch::Tensor predict_proba(torch::Tensor& X) override;
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
status_t getStatus() const override { return status; }
std::string getVersion() override { return { project_version.begin(), project_version.end() }; };
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
std::vector<std::string> show() const override;
std::vector<std::string> topological_order() override;
std::vector<std::string> getNotes() const override { return notes; }
void dump_cpt() const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
protected: protected:
bool fitted; bool fitted;
int m, n; // m: number of samples, n: number of features unsigned int m, n; // m: number of samples, n: number of features
Network model; Network model;
Metrics metrics; Metrics metrics;
std::vector<std::string> features; std::vector<std::string> features;
@@ -24,28 +47,8 @@ namespace bayesnet {
virtual void buildModel(const torch::Tensor& weights) = 0; virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
void buildDataset(torch::Tensor& y); void buildDataset(torch::Tensor& y);
public: private:
Classifier(Network model); Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
virtual ~Classifier() = default;
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override;
void addNodes();
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
torch::Tensor predict(torch::Tensor& X) override;
status_t getStatus() const override { return status; }
std::string getVersion() override { return { project_version.begin(), project_version.end() }; };
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
std::vector<std::string> show() const override;
std::vector<std::string> topological_order() override;
std::vector<std::string> getNotes() const override { return notes; }
void dump_cpt() const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
}; };
} }
#endif #endif

34
src/ensembles/AODE.cc Normal file
View File

@@ -0,0 +1,34 @@
#include "AODE.h"
namespace bayesnet {
AODE::AODE(bool predict_voting) : Ensemble(predict_voting)
{
validHyperparameters = { "predict_voting" };
}
void AODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
}
}
void AODE::buildModel(const torch::Tensor& weights)
{
models.clear();
significanceModels.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i));
}
n_models = models.size();
significanceModels = std::vector<double>(n_models, 1.0);
}
std::vector<std::string> AODE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}
}

View File

@@ -4,12 +4,13 @@
#include "SPODE.h" #include "SPODE.h"
namespace bayesnet { namespace bayesnet {
class AODE : public Ensemble { class AODE : public Ensemble {
public:
AODE(bool predict_voting = true);
virtual ~AODE() {};
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& title = "AODE") const override;
protected: protected:
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
public:
AODE();
virtual ~AODE() {};
std::vector<std::string> graph(const std::string& title = "AODE") const override;
}; };
} }
#endif #endif

View File

@@ -1,7 +1,22 @@
#include "AODELd.h" #include "AODELd.h"
namespace bayesnet { namespace bayesnet {
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {} AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
{
validHyperparameters = { "predict_voting" };
}
void AODELd::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
}
}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{ {
checkInput(X_, y_); checkInput(X_, y_);

View File

@@ -6,15 +6,15 @@
namespace bayesnet { namespace bayesnet {
class AODELd : public Ensemble, public Proposal { class AODELd : public Ensemble, public Proposal {
public:
AODELd(bool predict_voting = true);
virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
protected: protected:
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
public:
AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
virtual ~AODELd() = default;
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
static inline std::string version() { return "0.0.1"; };
}; };
} }
#endif // !AODELD_H #endif // !AODELD_H

View File

@@ -1,6 +1,7 @@
#include <set> #include <set>
#include <functional> #include <functional>
#include <limits.h> #include <limits.h>
#include <tuple>
#include "BoostAODE.h" #include "BoostAODE.h"
#include "CFS.h" #include "CFS.h"
#include "FCBF.h" #include "FCBF.h"
@@ -8,15 +9,29 @@
#include "folding.hpp" #include "folding.hpp"
namespace bayesnet { namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() struct {
std::string CFS = "CFS";
std::string FCBF = "FCBF";
std::string IWSS = "IWSS";
}SelectFeatures;
struct {
std::string ASC = "asc";
std::string DESC = "desc";
std::string RAND = "rand";
}Orders;
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
{ {
validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance" }; validHyperparameters = {
"repeatSparent", "maxModels", "order", "convergence", "threshold",
"select_features", "tolerance", "predict_voting", "predict_single"
};
} }
void BoostAODE::buildModel(const torch::Tensor& weights) void BoostAODE::buildModel(const torch::Tensor& weights)
{ {
// Models shall be built in trainModel // Models shall be built in trainModel
models.clear(); models.clear();
significanceModels.clear();
n_models = 0; n_models = 0;
// Prepare the validation dataset // Prepare the validation dataset
auto y_ = dataset.index({ -1, "..." }); auto y_ = dataset.index({ -1, "..." });
@@ -56,14 +71,22 @@ namespace bayesnet {
maxModels = hyperparameters["maxModels"]; maxModels = hyperparameters["maxModels"];
hyperparameters.erase("maxModels"); hyperparameters.erase("maxModels");
} }
if (hyperparameters.contains("ascending")) { if (hyperparameters.contains("order")) {
ascending = hyperparameters["ascending"]; std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
hyperparameters.erase("ascending"); order_algorithm = hyperparameters["order"];
if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC + ", " + Orders.RAND + "]");
}
hyperparameters.erase("order");
} }
if (hyperparameters.contains("convergence")) { if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"]; convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence"); hyperparameters.erase("convergence");
} }
if (hyperparameters.contains("predict_single")) {
predict_single = hyperparameters["predict_single"];
hyperparameters.erase("predict_single");
}
if (hyperparameters.contains("threshold")) { if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"]; threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold"); hyperparameters.erase("threshold");
@@ -72,13 +95,17 @@ namespace bayesnet {
tolerance = hyperparameters["tolerance"]; tolerance = hyperparameters["tolerance"];
hyperparameters.erase("tolerance"); hyperparameters.erase("tolerance");
} }
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
if (hyperparameters.contains("select_features")) { if (hyperparameters.contains("select_features")) {
auto selectedAlgorithm = hyperparameters["select_features"]; auto selectedAlgorithm = hyperparameters["select_features"];
std::vector<std::string> algos = { "IWSS", "FCBF", "CFS" }; std::vector<std::string> algos = { SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.CFS };
selectFeatures = true; selectFeatures = true;
algorithm = selectedAlgorithm; select_features_algorithm = selectedAlgorithm;
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) { if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
throw std::invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]"); throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " + SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
} }
hyperparameters.erase("select_features"); hyperparameters.erase("select_features");
} }
@@ -86,28 +113,54 @@ namespace bayesnet {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump()); throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
} }
} }
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
{
bool terminate = false;
double alpha_t = 0;
auto mask_wrong = ypred != ytrain;
auto mask_right = ypred == ytrain;
auto masked_weights = weights * mask_wrong.to(weights.dtype());
double epsilon_t = masked_weights.sum().item<double>();
if (epsilon_t > 0.5) {
// Inverse the weights policy (plot ln(wt))
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
// learner is better than random guess" (Zhi-Hua Zhou, 2012)
terminate = true;
} else {
double wt = (1 - epsilon_t) / epsilon_t;
alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights;
// Step 3.2.2: Update weights of right samples
weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights).item<double>();
weights = weights / totalWeights;
}
return { weights, alpha_t, terminate };
}
std::unordered_set<int> BoostAODE::initializeModels() std::unordered_set<int> BoostAODE::initializeModels()
{ {
std::unordered_set<int> featuresUsed; std::unordered_set<int> featuresUsed;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
int maxFeatures = 0; int maxFeatures = 0;
if (algorithm == "CFS") { if (select_features_algorithm == SelectFeatures.CFS) {
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_); featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
} else if (algorithm == "IWSS") { } else if (select_features_algorithm == SelectFeatures.IWSS) {
if (threshold < 0 || threshold >0.5) { if (threshold < 0 || threshold >0.5) {
throw std::invalid_argument("Invalid threshold value for IWSS [0, 0.5]"); throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]");
} }
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
} else if (algorithm == "FCBF") { } else if (select_features_algorithm == SelectFeatures.FCBF) {
if (threshold < 1e-7 || threshold > 1) { if (threshold < 1e-7 || threshold > 1) {
throw std::invalid_argument("Invalid threshold value [1e-7, 1]"); throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]");
} }
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold); featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
} }
featureSelector->fit(); featureSelector->fit();
auto cfsFeatures = featureSelector->getFeatures(); auto cfsFeatures = featureSelector->getFeatures();
for (const int& feature : cfsFeatures) { for (const int& feature : cfsFeatures) {
// std::cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << std::endl;
featuresUsed.insert(feature); featuresUsed.insert(feature);
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature); std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_);
@@ -115,35 +168,68 @@ namespace bayesnet {
significanceModels.push_back(1.0); significanceModels.push_back(1.0);
n_models++; n_models++;
} }
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + algorithm); notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
delete featureSelector; delete featureSelector;
return featuresUsed; return featuresUsed;
} }
torch::Tensor BoostAODE::ensemble_predict(torch::Tensor& X, SPODE* model)
{
if (initialize_prob_table) {
initialize_prob_table = false;
prob_table = model->predict_proba(X) * 1.0;
} else {
prob_table += model->predict_proba(X) * 1.0;
}
// prob_table doesn't store probabilities but the sum of them
// to have them we need to divide by the sum of the "weights" used to
// consider the results obtanined in the model's predict_proba.
return prob_table.argmax(1);
}
void BoostAODE::trainModel(const torch::Tensor& weights) void BoostAODE::trainModel(const torch::Tensor& weights)
{ {
// Algorithm based on the adaboost algorithm for classification
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
initialize_prob_table = true;
fitted = true;
double alpha_t = 0;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool exitCondition = false;
std::unordered_set<int> featuresUsed; std::unordered_set<int> featuresUsed;
if (selectFeatures) { if (selectFeatures) {
featuresUsed = initializeModels(); featuresUsed = initializeModels();
auto ypred = predict(X_train);
std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_);
// Update significance of the models
for (int i = 0; i < n_models; ++i) {
significanceModels[i] = alpha_t;
} }
if (maxModels == 0) if (exitCondition) {
return;
}
}
bool resetMaxModels = false;
if (maxModels == 0) {
maxModels = .1 * n > 10 ? .1 * n : n; maxModels = .1 * n > 10 ? .1 * n : n;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); resetMaxModels = true; // Flag to unset maxModels
bool exitCondition = false; }
// Variables to control the accuracy finish condition // Variables to control the accuracy finish condition
double priorAccuracy = 0.0; double priorAccuracy = 0.0;
double delta = 1.0; double delta = 1.0;
double threshold = 1e-4; double convergence_threshold = 1e-4;
int count = 0; // number of times the accuracy is lower than the threshold int count = 0; // number of times the accuracy is lower than the convergence_threshold
fitted = true; // to enable predict
// Step 0: Set the finish condition // Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features // if not repeatSparent a finish condition is run out of features
// n_models == maxModels // n_models == maxModels
// epsilon sub t > 0.5 => inverse the weights policy // epsilon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing // validation error is not decreasing
bool ascending = order_algorithm == Orders.ASC;
std::mt19937 g{ 173 };
while (!exitCondition) { while (!exitCondition) {
// Step 1: Build ranking with mutual information // Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
std::unique_ptr<Classifier> model; if (order_algorithm == Orders.RAND) {
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
}
auto feature = featureSelection[0]; auto feature = featureSelection[0];
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) { if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
bool used = true; bool used = true;
@@ -160,26 +246,22 @@ namespace bayesnet {
continue; continue;
} }
} }
featuresUsed.insert(feature); std::unique_ptr<Classifier> model;
model = std::make_unique<SPODE>(feature); model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_);
auto ypred = model->predict(X_train); torch::Tensor ypred;
if (predict_single) {
ypred = model->predict(X_train);
} else {
ypred = ensemble_predict(X_train, dynamic_cast<SPODE*>(model.get()));
}
// Step 3.1: Compute the classifier amout of say // Step 3.1: Compute the classifier amout of say
auto mask_wrong = ypred != y_train; std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_);
auto mask_right = ypred == y_train; if (exitCondition) {
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype()); break;
double epsilon_t = masked_weights.sum().item<double>(); }
double wt = (1 - epsilon_t) / epsilon_t;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
weights_ = weights_ / totalWeights;
// Step 3.4: Store classifier and its accuracy to weigh its future vote // Step 3.4: Store classifier and its accuracy to weigh its future vote
featuresUsed.insert(feature);
models.push_back(std::move(model)); models.push_back(std::move(model));
significanceModels.push_back(alpha_t); significanceModels.push_back(alpha_t);
n_models++; n_models++;
@@ -191,17 +273,21 @@ namespace bayesnet {
} else { } else {
delta = accuracy - priorAccuracy; delta = accuracy - priorAccuracy;
} }
if (delta < threshold) { if (delta < convergence_threshold) {
count++; count++;
} }
priorAccuracy = accuracy;
} }
exitCondition = n_models >= maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance; exitCondition = n_models >= maxModels && repeatSparent || count > tolerance;
} }
if (featuresUsed.size() != features.size()) { if (featuresUsed.size() != features.size()) {
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size())); notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
status = WARNING; status = WARNING;
} }
notes.push_back("Number of models: " + std::to_string(n_models)); notes.push_back("Number of models: " + std::to_string(n_models));
if (resetMaxModels) {
maxModels = 0;
}
} }
std::vector<std::string> BoostAODE::graph(const std::string& title) const std::vector<std::string> BoostAODE::graph(const std::string& title) const
{ {

View File

@@ -7,7 +7,7 @@
namespace bayesnet { namespace bayesnet {
class BoostAODE : public Ensemble { class BoostAODE : public Ensemble {
public: public:
BoostAODE(); BoostAODE(bool predict_voting = true);
virtual ~BoostAODE() = default; virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override; std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; void setHyperparameters(const nlohmann::json& hyperparameters) override;
@@ -15,17 +15,21 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
private: private:
std::unordered_set<int> initializeModels();
torch::Tensor ensemble_predict(torch::Tensor& X, SPODE* model);
torch::Tensor dataset_; torch::Tensor dataset_;
torch::Tensor X_train, y_train, X_test, y_test; torch::Tensor X_train, y_train, X_test, y_test;
std::unordered_set<int> initializeModels();
// Hyperparameters // Hyperparameters
bool repeatSparent = false; // if true, a feature can be selected more than once bool repeatSparent = false; // if true, a feature can be selected more than once
int maxModels = 0; int maxModels = 0;
int tolerance = 0; int tolerance = 0;
bool ascending = false; //Process KBest features ascending or descending order bool predict_single = true; // wether the last model is used to predict in training or the whole ensemble
std::string order_algorithm; // order to process the KBest features asc, desc, rand
bool convergence = false; //if true, stop when the model does not improve bool convergence = false; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection bool selectFeatures = false; // if true, use feature selection
std::string algorithm = ""; // Selected feature selection algorithm std::string select_features_algorithm = "desc"; // Selected feature selection algorithm
bool initialize_prob_table; // if true, initialize the prob_table with the first model (used in train)
torch::Tensor prob_table; // Table of probabilities for ensemble predicting if predict_single is false
FeatureSelect* featureSelector = nullptr; FeatureSelect* featureSelector = nullptr;
double threshold = -1; double threshold = -1;
}; };

216
src/ensembles/Ensemble.cc Normal file
View File

@@ -0,0 +1,216 @@
#include "Ensemble.h"
namespace bayesnet {
Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting)
{
};
const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted";
void Ensemble::trainModel(const torch::Tensor& weights)
{
n_models = models.size();
for (auto i = 0; i < n_models; ++i) {
// fit with std::vectors
models[i]->fit(dataset, features, className, states);
}
}
std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X)
{
std::vector<int> y_pred;
for (auto i = 0; i < X.size(); ++i) {
auto max = std::max_element(X[i].begin(), X[i].end());
y_pred.push_back(std::distance(X[i].begin(), max));
}
return y_pred;
}
torch::Tensor Ensemble::compute_arg_max(torch::Tensor& X)
{
auto y_pred = torch::argmax(X, 1);
return y_pred;
}
torch::Tensor Ensemble::voting(torch::Tensor& votes)
{
// Convert m x n_models tensor to a m x n_class_states with voting probabilities
auto y_pred_ = votes.accessor<int, 2>();
std::vector<int> y_pred_final;
int numClasses = states.at(className).size();
// votes is m x n_models with the prediction of every model for each sample
auto result = torch::zeros({ votes.size(0), numClasses }, torch::kFloat32);
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
for (int i = 0; i < votes.size(0); ++i) {
// n_votes store in each index (value of class) the significance added by each model
// i.e. n_votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
std::vector<double> n_votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
n_votes[y_pred_[i][j]] += significanceModels.at(j);
}
result[i] = torch::tensor(n_votes);
}
// To only do one division and gain precision
result /= sum;
return result;
}
std::vector<std::vector<double>> Ensemble::predict_proba(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw std::logic_error(ENSEMBLE_NOT_FITTED);
}
return predict_voting ? predict_average_voting(X) : predict_average_proba(X);
}
torch::Tensor Ensemble::predict_proba(torch::Tensor& X)
{
if (!fitted) {
throw std::logic_error(ENSEMBLE_NOT_FITTED);
}
return predict_voting ? predict_average_voting(X) : predict_average_proba(X);
}
std::vector<int> Ensemble::predict(std::vector<std::vector<int>>& X)
{
auto res = predict_proba(X);
return compute_arg_max(res);
}
torch::Tensor Ensemble::predict(torch::Tensor& X)
{
auto res = predict_proba(X);
return compute_arg_max(res);
}
torch::Tensor Ensemble::predict_average_proba(torch::Tensor& X)
{
auto n_states = models[0]->getClassNumStates();
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict_proba(X);
std::lock_guard<std::mutex> lock(mtx);
y_pred += ypredict * significanceModels[i];
}));
}
for (auto& thread : threads) {
thread.join();
}
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
y_pred /= sum;
return y_pred;
}
std::vector<std::vector<double>> Ensemble::predict_average_proba(std::vector<std::vector<int>>& X)
{
auto n_states = models[0]->getClassNumStates();
std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0));
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict_proba(X);
assert(ypredict.size() == y_pred.size());
assert(ypredict[0].size() == y_pred[0].size());
std::lock_guard<std::mutex> lock(mtx);
// Multiply each prediction by the significance of the model and then add it to the final prediction
for (auto j = 0; j < ypredict.size(); ++j) {
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
}
}));
}
for (auto& thread : threads) {
thread.join();
}
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
//Divide each element of the prediction by the sum of the significances
for (auto j = 0; j < y_pred.size(); ++j) {
std::transform(y_pred[j].begin(), y_pred[j].end(), y_pred[j].begin(), [sum](double x) { return x / sum; });
}
return y_pred;
}
std::vector<std::vector<double>> Ensemble::predict_average_voting(std::vector<std::vector<int>>& X)
{
torch::Tensor Xt = bayesnet::vectorToTensor(X, false);
auto y_pred = predict_average_voting(Xt);
std::vector<std::vector<double>> result = tensorToVectorDouble(y_pred);
return result;
}
torch::Tensor Ensemble::predict_average_voting(torch::Tensor& X)
{
// Build a m x n_models tensor with the predictions of each model
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict(X);
std::lock_guard<std::mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict);
}));
}
for (auto& thread : threads) {
thread.join();
}
return voting(y_pred);
}
float Ensemble::score(torch::Tensor& X, torch::Tensor& y)
{
auto y_pred = predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(0); ++i) {
if (y_pred[i].item<int>() == y[i].item<int>()) {
correct++;
}
}
return (double)correct / y_pred.size(0);
}
float Ensemble::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{
auto y_pred = predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == y[i]) {
correct++;
}
}
return (double)correct / y_pred.size();
}
std::vector<std::string> Ensemble::show() const
{
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->show();
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
std::vector<std::string> Ensemble::graph(const std::string& title) const
{
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->graph(title + "_" + std::to_string(i));
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
int Ensemble::getNumberOfNodes() const
{
int nodes = 0;
for (auto i = 0; i < n_models; ++i) {
nodes += models[i]->getNumberOfNodes();
}
return nodes;
}
int Ensemble::getNumberOfEdges() const
{
int edges = 0;
for (auto i = 0; i < n_models; ++i) {
edges += models[i]->getNumberOfEdges();
}
return edges;
}
int Ensemble::getNumberOfStates() const
{
int nstates = 0;
for (auto i = 0; i < n_models; ++i) {
nstates += models[i]->getNumberOfStates();
}
return nstates;
}
}

View File

@@ -7,19 +7,13 @@
namespace bayesnet { namespace bayesnet {
class Ensemble : public Classifier { class Ensemble : public Classifier {
private:
Ensemble& build(std::vector<std::string>& features, std::string className, std::map<std::string, std::vector<int>>& states);
protected:
unsigned n_models;
std::vector<std::unique_ptr<Classifier>> models;
std::vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override;
std::vector<int> voting(torch::Tensor& y_pred);
public: public:
Ensemble(); Ensemble(bool predict_voting = true);
virtual ~Ensemble() = default; virtual ~Ensemble() = default;
torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int>>& X) override; std::vector<int> predict(std::vector<std::vector<int>>& X) override;
torch::Tensor predict_proba(torch::Tensor& X) override;
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override; float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override; float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
int getNumberOfNodes() const override; int getNumberOfNodes() const override;
@@ -34,6 +28,19 @@ namespace bayesnet {
void dump_cpt() const override void dump_cpt() const override
{ {
} }
protected:
torch::Tensor predict_average_voting(torch::Tensor& X);
std::vector<std::vector<double>> predict_average_voting(std::vector<std::vector<int>>& X);
torch::Tensor predict_average_proba(torch::Tensor& X);
std::vector<std::vector<double>> predict_average_proba(std::vector<std::vector<int>>& X);
torch::Tensor compute_arg_max(torch::Tensor& X);
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X);
torch::Tensor voting(torch::Tensor& votes);
unsigned n_models;
std::vector<std::unique_ptr<Classifier>> models;
std::vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override;
bool predict_voting;
}; };
} }
#endif #endif

View File

@@ -50,7 +50,6 @@ namespace bayesnet {
} }
double FeatureSelect::computeMeritCFS() double FeatureSelect::computeMeritCFS()
{ {
double result;
double rcf = 0; double rcf = 0;
for (auto feature : selectedFeatures) { for (auto feature : selectedFeatures) {
rcf += suLabels[feature]; rcf += suLabels[feature];

View File

@@ -28,7 +28,7 @@ namespace bayesnet {
selectedFeatures.push_back(feature); selectedFeatures.push_back(feature);
// Compute merit with selectedFeatures // Compute merit with selectedFeatures
auto meritNew = computeMeritCFS(); auto meritNew = computeMeritCFS();
double delta = merit != 0.0 ? abs(merit - meritNew) / merit : 0.0; double delta = merit != 0.0 ? std::abs(merit - meritNew) / merit : 0.0;
if (meritNew > merit || delta < threshold) { if (meritNew > merit || delta < threshold) {
if (meritNew > merit) { if (meritNew > merit) {
merit = meritNew; merit = meritNew;

View File

@@ -0,0 +1,50 @@
#include "bayesnetUtils.h"
namespace bayesnet {
// Return the indices in descending order
std::vector<int> argsort(std::vector<double>& nums)
{
int n = nums.size();
std::vector<int> indices(n);
iota(indices.begin(), indices.end(), 0);
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
return indices;
}
std::vector<std::vector<int>> tensorToVector(torch::Tensor& dtensor)
{
// convert mxn tensor to nxm std::vector
std::vector<std::vector<int>> result;
// Iterate over cols
for (int i = 0; i < dtensor.size(1); ++i) {
auto col_tensor = dtensor.index({ "...", i });
auto col = std::vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + dtensor.size(0));
result.push_back(col);
}
return result;
}
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor)
{
// convert mxn tensor to mxn std::vector
std::vector<std::vector<double>> result;
// Iterate over cols
for (int i = 0; i < dtensor.size(0); ++i) {
auto col_tensor = dtensor.index({ i, "..." });
auto col = std::vector<double>(col_tensor.data_ptr<float>(), col_tensor.data_ptr<float>() + dtensor.size(1));
result.push_back(col);
}
return result;
}
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector, bool transpose)
{
// convert nxm std::vector to mxn tensor if transpose
long int m = transpose ? vector[0].size() : vector.size();
long int n = transpose ? vector.size() : vector[0].size();
auto tensor = torch::zeros({ m, n }, torch::kInt32);
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
tensor[i][j] = transpose ? vector[j][i] : vector[i][j];
}
}
return tensor;
}
}

View File

@@ -4,6 +4,8 @@
#include <vector> #include <vector>
namespace bayesnet { namespace bayesnet {
std::vector<int> argsort(std::vector<double>& nums); std::vector<int> argsort(std::vector<double>& nums);
std::vector<std::vector<int>> tensorToVector(torch::Tensor& tensor); std::vector<std::vector<int>> tensorToVector(torch::Tensor& dtensor);
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor);
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector, bool transpose = true);
} }
#endif //BAYESNET_UTILS_H #endif //BAYESNET_UTILS_H

View File

@@ -1,16 +1,21 @@
if(ENABLE_TESTING) if(ENABLE_TESTING)
set(TEST_BAYESNET "unit_tests_bayesnet") set(TEST_BAYESNET "unit_tests_bayesnet")
include_directories( include_directories(
${BayesNet_SOURCE_DIR}/src/BayesNet ${BayesNet_SOURCE_DIR}/src
${BayesNet_SOURCE_DIR}/src/Platform ${BayesNet_SOURCE_DIR}/src/feature_selection
${BayesNet_SOURCE_DIR}/src/bayesian_network
${BayesNet_SOURCE_DIR}/src/classifiers
${BayesNet_SOURCE_DIR}/src/utils
${BayesNet_SOURCE_DIR}/src/ensembles
${BayesNet_SOURCE_DIR}/lib/Files ${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/mdlp ${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include ${BayesNet_SOURCE_DIR}/lib/json/include
${CMAKE_BINARY_DIR}/configured_files/include ${CMAKE_BINARY_DIR}/configured_files/include
) )
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/src/*.cc")
set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES}) set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES})
add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET}) add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET})
target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain) target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
add_test(NAME ${TEST_BAYESNET} COMMAND ${TEST_BAYESNET}) add_test(NAME ${TEST_BAYESNET} COMMAND ${TEST_BAYESNET})
endif(ENABLE_TESTING) endif(ENABLE_TESTING)

View File

@@ -2,9 +2,6 @@
#include <catch2/catch_test_macros.hpp> #include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp> #include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp> #include <catch2/generators/catch_generators.hpp>
#include <vector>
#include <map>
#include <string>
#include "KDB.h" #include "KDB.h"
#include "TAN.h" #include "TAN.h"
#include "SPODE.h" #include "SPODE.h"
@@ -16,14 +13,11 @@
#include "AODELd.h" #include "AODELd.h"
#include "TestUtils.h" #include "TestUtils.h"
TEST_CASE("Library check version", "[BayesNet]") const std::string ACTUAL_VERSION = "1.0.3";
TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]")
{ {
auto clf = bayesnet::KDB(2); map <pair<std::string, std::string>, float> scores{
REQUIRE(clf.getVersion() == "1.0.1");
}
TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
{
map <pair<std::string, std::string>, float> scores = {
// Diabetes // Diabetes
{{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
{{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f}, {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
@@ -37,87 +31,34 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}, {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
{{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f}
}; };
std::map<std::string, bayesnet::BaseClassifier*> models{
{"AODE", new bayesnet::AODE()}, {"AODELd", new bayesnet::AODELd()},
{"BoostAODE", new bayesnet::BoostAODE()},
{"KDB", new bayesnet::KDB(2)}, {"KDBLd", new bayesnet::KDBLd(2)},
{"SPODE", new bayesnet::SPODE(1)}, {"SPODELd", new bayesnet::SPODELd(1)},
{"TAN", new bayesnet::TAN()}, {"TANLd", new bayesnet::TANLd()}
};
std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "SPODELd", "TAN", "TANLd");
auto clf = models[name];
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); SECTION("Test " + name + " classifier")
auto raw = RawDatasets(file_name, false);
SECTION("Test TAN classifier (" + file_name + ")")
{ {
auto clf = bayesnet::TAN(); for (const std::string& file_name : { "glass", "iris", "ecoli", "diabetes" }) {
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); auto clf = models[name];
auto score = clf.score(raw.Xv, raw.yv); auto discretize = name.substr(name.length() - 2) != "Ld";
//scores[{file_name, "TAN"}] = score; auto raw = RawDatasets(file_name, discretize);
REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon)); clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf->score(raw.Xt, raw.yt);
INFO("File: " + file_name);
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
} }
SECTION("Test TANLd classifier (" + file_name + ")") }
SECTION("Library check version")
{ {
auto clf = bayesnet::TANLd(); INFO("Checking version of " + name + " classifier");
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); REQUIRE(clf->getVersion() == ACTUAL_VERSION);
auto score = clf.score(raw.Xt, raw.yt);
//scores[{file_name, "TANLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon));
} }
SECTION("Test KDB classifier (" + file_name + ")") delete clf;
{
auto clf = bayesnet::KDB(2);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
//scores[{file_name, "KDB"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDB"
}]).epsilon(raw.epsilon));
}
SECTION("Test KDBLd classifier (" + file_name + ")")
{
auto clf = bayesnet::KDBLd(2);
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
//scores[{file_name, "KDBLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd"
}]).epsilon(raw.epsilon));
}
SECTION("Test SPODE classifier (" + file_name + ")")
{
auto clf = bayesnet::SPODE(1);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
// scores[{file_name, "SPODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon));
}
SECTION("Test SPODELd classifier (" + file_name + ")")
{
auto clf = bayesnet::SPODELd(1);
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
// scores[{file_name, "SPODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon));
}
SECTION("Test AODE classifier (" + file_name + ")")
{
auto clf = bayesnet::AODE();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
// scores[{file_name, "AODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon));
}
SECTION("Test AODELd classifier (" + file_name + ")")
{
auto clf = bayesnet::AODELd();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
// scores[{file_name, "AODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon));
}
SECTION("Test BoostAODE classifier (" + file_name + ")")
{
auto clf = bayesnet::BoostAODE();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
// scores[{file_name, "BoostAODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon));
}
// for (auto scores : scores) {
// std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, ";
// }
} }
TEST_CASE("Models features", "[BayesNet]") TEST_CASE("Models features", "[BayesNet]")
{ {
@@ -133,6 +74,8 @@ TEST_CASE("Models features", "[BayesNet]")
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 7); REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.getNumberOfStates() == 19);
REQUIRE(clf.getClassNumStates() == 3);
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
REQUIRE(clf.graph("Test") == graph); REQUIRE(clf.graph("Test") == graph);
} }
@@ -156,15 +99,15 @@ TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]")
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9"); REQUIRE(clf.getNotes()[1] == "Number of models: 9");
} }
TEST_CASE("BoostAODE test used features in train note", "[BayesNet]") TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]")
{ {
auto raw = RawDatasets("diabetes", true); auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostAODE(); auto clf = bayesnet::BoostAODE(true);
clf.setHyperparameters({ clf.setHyperparameters({
{"ascending",true}, {"order", "asc"},
{"convergence", true}, {"convergence", true},
{"repeatSparent",true}, {"repeatSparent",true},
{"select_features","CFS"} {"select_features","CFS"},
}); });
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 72); REQUIRE(clf.getNumberOfNodes() == 72);
@@ -173,4 +116,154 @@ TEST_CASE("BoostAODE test used features in train note", "[BayesNet]")
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8"); REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8");
REQUIRE(clf.getNotes()[2] == "Number of models: 8"); REQUIRE(clf.getNotes()[2] == "Number of models: 8");
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon));
}
TEST_CASE("Model predict_proba", "[BayesNet]")
{
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
auto res_prob_tan = std::vector<std::vector<double>>({
{ 0.00375671, 0.994457, 0.00178621 },
{ 0.00137462, 0.992734, 0.00589123 },
{ 0.00137462, 0.992734, 0.00589123 },
{ 0.00137462, 0.992734, 0.00589123 },
{ 0.00218225, 0.992877, 0.00494094 },
{ 0.00494209, 0.0978534, 0.897205 },
{ 0.0054192, 0.974275, 0.0203054 },
{ 0.00433012, 0.985054, 0.0106159 },
{ 0.000860806, 0.996922, 0.00221698 }
});
auto res_prob_spode = std::vector<std::vector<double>>({
{0.00419032, 0.994247, 0.00156265},
{0.00172808, 0.993433, 0.00483862},
{0.00172808, 0.993433, 0.00483862},
{0.00172808, 0.993433, 0.00483862},
{0.00279211, 0.993737, 0.00347077},
{0.0120674, 0.357909, 0.630024},
{0.00386239, 0.913919, 0.0822185},
{0.0244389, 0.966447, 0.00911374},
{0.003135, 0.991799, 0.0050661}
});
auto res_prob_baode = std::vector<std::vector<double>>({
{0.00803291, 0.9676, 0.0243672},
{0.00398714, 0.945126, 0.050887},
{0.00398714, 0.945126, 0.050887},
{0.00398714, 0.945126, 0.050887},
{0.00189227, 0.859575, 0.138533},
{0.0118341, 0.442149, 0.546017},
{0.0216135, 0.785781, 0.192605},
{0.0204803, 0.844276, 0.135244},
{0.00576313, 0.961665, 0.0325716},
});
auto res_prob_voting = std::vector<std::vector<double>>({
{0, 1, 0},
{0, 1, 0},
{0, 1, 0},
{0, 1, 0},
{0, 1, 0},
{0, 0.447909, 0.552091},
{0, 0.811482, 0.188517},
{0, 1, 0},
{0, 1, 0}
});
std::map<std::string, std::vector<std::vector<double>>> res_prob{ {"TAN", res_prob_tan}, {"SPODE", res_prob_spode} , {"BoostAODEproba", res_prob_baode }, {"BoostAODEvoting", res_prob_voting } };
std::map<std::string, bayesnet::BaseClassifier*> models{ {"TAN", new bayesnet::TAN()}, {"SPODE", new bayesnet::SPODE(0)}, {"BoostAODEproba", new bayesnet::BoostAODE(false)}, {"BoostAODEvoting", new bayesnet::BoostAODE(true)} };
int init_index = 78;
auto raw = RawDatasets("iris", true);
SECTION("Test " + model + " predict_proba")
{
auto clf = models[model];
clf->fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto y_pred_proba = clf->predict_proba(raw.Xv);
auto yt_pred_proba = clf->predict_proba(raw.Xt);
auto y_pred = clf->predict(raw.Xv);
auto yt_pred = clf->predict(raw.Xt);
REQUIRE(y_pred.size() == yt_pred.size(0));
REQUIRE(y_pred.size() == y_pred_proba.size());
REQUIRE(y_pred.size() == yt_pred_proba.size(0));
REQUIRE(y_pred.size() == raw.yv.size());
REQUIRE(y_pred_proba[0].size() == 3);
REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size());
for (int i = 0; i < y_pred_proba.size(); ++i) {
auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end());
int predictedClass = distance(y_pred_proba[i].begin(), maxElem);
REQUIRE(predictedClass == y_pred[i]);
// Check predict is coherent with predict_proba
REQUIRE(yt_pred_proba[i].argmax().item<int>() == y_pred[i]);
for (int j = 0; j < yt_pred_proba.size(1); j++) {
REQUIRE(yt_pred_proba[i][j].item<double>() == Catch::Approx(y_pred_proba[i][j]).epsilon(raw.epsilon));
}
}
// Check predict_proba values for vectors and tensors
for (int i = 0; i < res_prob.size(); i++) {
REQUIRE(y_pred[i] == yt_pred[i].item<int>());
for (int j = 0; j < 3; j++) {
REQUIRE(res_prob[model][i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon));
REQUIRE(res_prob[model][i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item<double>()).epsilon(raw.epsilon));
}
}
delete clf;
}
}
TEST_CASE("BoostAODE voting-proba", "[BayesNet]")
{
auto raw = RawDatasets("iris", false);
auto clf = bayesnet::BoostAODE(false);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score_proba = clf.score(raw.Xv, raw.yv);
auto pred_proba = clf.predict_proba(raw.Xv);
clf.setHyperparameters({
{"predict_voting",true},
});
auto score_voting = clf.score(raw.Xv, raw.yv);
auto pred_voting = clf.predict_proba(raw.Xv);
REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon));
REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon));
clf.dump_cpt();
REQUIRE(clf.topological_order() == std::vector<std::string>());
}
TEST_CASE("BoostAODE order asc, desc & random", "[BayesNet]")
{
auto raw = RawDatasets("glass", true);
std::map<std::string, double> scores{
{"asc", 0.83178f }, { "desc", 0.84579f }, { "rand", 0.83645f }
};
for (const std::string& order : { "asc", "desc", "rand" }) {
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({
{"order", order},
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
INFO("order: " + order);
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
}
}
TEST_CASE("BoostAODE predict_single", "[BayesNet]")
{
auto raw = RawDatasets("glass", true);
std::map<bool, double> scores{
{true, 0.84579f }, { false, 0.80841f }
};
for (const bool kind : { true, false}) {
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({
{"predict_single", kind}, {"order", "desc" },
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
INFO("kind: " + std::string(kind ? "true" : "false"));
REQUIRE(score == Catch::Approx(scores[kind]).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(scores[kind]).epsilon(raw.epsilon));
}
} }