Compare commits
21 Commits
0d6a081d01
...
v1.0.5
Author | SHA1 | Date | |
---|---|---|---|
b956aa3873
|
|||
1f06631f69
|
|||
6dd589bd61
|
|||
6475f10825
|
|||
7d906b24d1 | |||
464fe029ea
|
|||
09a1369122
|
|||
503ad687dc
|
|||
8eeaa1beee
|
|||
a2de1c9522
|
|||
cf9b5716ac
|
|||
1326891d6a
|
|||
da2a969686
|
|||
f9553a38d7
|
|||
8b6121eaf2
|
|||
fbbed8ad68
|
|||
a1178554ff
|
|||
d12a779bd9 | |||
a8fc29e2b2
|
|||
50543e7929
|
|||
9014649a0d
|
39
.clang-uml
Normal file
39
.clang-uml
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
compilation_database_dir: build_debug
|
||||||
|
output_directory: diagrams
|
||||||
|
diagrams:
|
||||||
|
BayesNet:
|
||||||
|
type: class
|
||||||
|
glob:
|
||||||
|
- bayesnet/*.h
|
||||||
|
- bayesnet/classifiers/*.h
|
||||||
|
- bayesnet/classifiers/*.cc
|
||||||
|
- bayesnet/ensembles/*.h
|
||||||
|
- bayesnet/ensembles/*.cc
|
||||||
|
- bayesnet/feature_selection/*.h
|
||||||
|
- bayesnet/feature_selection/*.cc
|
||||||
|
- bayesnet/network/*.h
|
||||||
|
- bayesnet/network/*.cc
|
||||||
|
- bayesnet/utils/*.h
|
||||||
|
- bayesnet/utils/*.cc
|
||||||
|
include:
|
||||||
|
# Only include entities from the following namespaces
|
||||||
|
namespaces:
|
||||||
|
- bayesnet
|
||||||
|
exclude:
|
||||||
|
access:
|
||||||
|
- private
|
||||||
|
plantuml:
|
||||||
|
style:
|
||||||
|
# Apply this style to all classes in the diagram
|
||||||
|
class: "#aliceblue;line:blue;line.dotted;text:blue"
|
||||||
|
# Apply this style to all packages in the diagram
|
||||||
|
package: "#back:grey"
|
||||||
|
# Make all template instantiation relations point upwards and draw them
|
||||||
|
# as green and dotted lines
|
||||||
|
instantiation: "up[#green,dotted]"
|
||||||
|
cmd: "/usr/bin/plantuml -tsvg \"diagrams/{}.puml\""
|
||||||
|
before:
|
||||||
|
- 'title clang-uml class diagram model'
|
||||||
|
mermaid:
|
||||||
|
before:
|
||||||
|
- 'classDiagram'
|
12
.github/workflows/main.yml
vendored
Normal file
12
.github/workflows/main.yml
vendored
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
name: CI
|
||||||
|
on: push
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
tests:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- run: sudo apt-get install ninja-build cmake
|
||||||
|
- run: ninja --version
|
||||||
|
- run: cmake --version
|
||||||
|
- run: g++ --version
|
32
.vscode/launch.json
vendored
32
.vscode/launch.json
vendored
@@ -8,7 +8,7 @@
|
|||||||
"program": "${workspaceFolder}/build_release/sample/bayesnet_sample",
|
"program": "${workspaceFolder}/build_release/sample/bayesnet_sample",
|
||||||
"args": [
|
"args": [
|
||||||
"${workspaceFolder}/tests/data/glass.arff"
|
"${workspaceFolder}/tests/data/glass.arff"
|
||||||
],
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "lldb",
|
"type": "lldb",
|
||||||
@@ -16,11 +16,33 @@
|
|||||||
"name": "test",
|
"name": "test",
|
||||||
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet",
|
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet",
|
||||||
"args": [
|
"args": [
|
||||||
"[Network]"
|
"Block Update"
|
||||||
//"-c=\"Metrics Test\"",
|
|
||||||
// "-s",
|
|
||||||
],
|
],
|
||||||
"cwd": "${workspaceFolder}/build_debug/tests",
|
"cwd": "${workspaceFolder}/build_debug/tests"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "(gdb) Launch",
|
||||||
|
"type": "cppdbg",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "enter program name, for example ${workspaceFolder}/a.out",
|
||||||
|
"args": [],
|
||||||
|
"stopAtEntry": false,
|
||||||
|
"cwd": "${fileDirname}",
|
||||||
|
"environment": [],
|
||||||
|
"externalConsole": false,
|
||||||
|
"MIMode": "gdb",
|
||||||
|
"setupCommands": [
|
||||||
|
{
|
||||||
|
"description": "Enable pretty-printing for gdb",
|
||||||
|
"text": "-enable-pretty-printing",
|
||||||
|
"ignoreFailures": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Set Disassembly Flavor to Intel",
|
||||||
|
"text": "-gdb-set disassembly-flavor intel",
|
||||||
|
"ignoreFailures": true
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
15
CHANGELOG.md
15
CHANGELOG.md
@@ -5,23 +5,32 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
## [unreleased]
|
## [1.0.5] 2024-04-20
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
- Install command and instructions in README.md
|
- Install command and instructions in README.md
|
||||||
|
- Prefix to install command to install the package in the any location.
|
||||||
|
- The 'block_update' hyperparameter to the BoostAODE class, to control the way weights/significances are updated. Default value is false.
|
||||||
|
- Html report of coverage in the coverage folder. It is created with *make viewcoverage*
|
||||||
|
- Badges of coverage and code quality (codacy) in README.md. Coverage badge is updated with *make viewcoverage*
|
||||||
|
- Tests to reach 97% of coverage.
|
||||||
|
- Copyright header to source files.
|
||||||
|
- Diagrams to README.md: UML class diagram & dependency diagram
|
||||||
|
- Action to create diagrams to Makefile: *make diagrams*
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
- Sample app now is a separate target in the Makefile and shows how to use the library with a sample dataset
|
- Sample app now is a separate target in the Makefile and shows how to use the library with a sample dataset
|
||||||
- The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy.
|
- The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy.
|
||||||
|
- Default hyperparameter values in BoostAODE: bisection is true, maxTolerance is 3, convergence is true
|
||||||
|
|
||||||
## [1.0.4] 2024-03-06
|
## [1.0.4] 2024-03-06
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
- Change _ascending_ hyperparameter to _order_ with these possible values _{"asc", "desc", "rand"}_, Default is _"desc"_.
|
- Change *ascending* hyperparameter to *order* with these possible values *{"asc", "desc", "rand"}*, Default is *"desc"*.
|
||||||
- Add the _predict_single_ hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true.
|
- Add the *predict_single* hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true.
|
||||||
- sample app to show how to use the library (make sample)
|
- sample app to show how to use the library (make sample)
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
5
CMakeGraphVizOptions.cmake
Normal file
5
CMakeGraphVizOptions.cmake
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Set the default graph title
|
||||||
|
set(GRAPHVIZ_GRAPH_NAME "BayesNet dependency graph")
|
||||||
|
|
||||||
|
set(GRAPHVIZ_SHARED_LIBS OFF)
|
||||||
|
set(GRAPHVIZ_STATIC_LIBS ON)
|
@@ -1,7 +1,7 @@
|
|||||||
cmake_minimum_required(VERSION 3.20)
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
project(BayesNet
|
project(BayesNet
|
||||||
VERSION 1.0.4
|
VERSION 1.0.5
|
||||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||||
LANGUAGES CXX
|
LANGUAGES CXX
|
||||||
|
32
Makefile
32
Makefile
@@ -1,11 +1,15 @@
|
|||||||
SHELL := /bin/bash
|
SHELL := /bin/bash
|
||||||
.DEFAULT_GOAL := help
|
.DEFAULT_GOAL := help
|
||||||
.PHONY: viewcoverage coverage setup help install uninstall buildr buildd test clean debug release sample
|
.PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean debug release sample updatebadge
|
||||||
|
|
||||||
f_release = build_release
|
f_release = build_release
|
||||||
f_debug = build_debug
|
f_debug = build_debug
|
||||||
|
f_diagrams = diagrams
|
||||||
app_targets = BayesNet
|
app_targets = BayesNet
|
||||||
test_targets = TestBayesNet
|
test_targets = TestBayesNet
|
||||||
|
clang-uml = clang-uml
|
||||||
|
plantuml = plantuml
|
||||||
|
dot = dot
|
||||||
n_procs = -j 16
|
n_procs = -j 16
|
||||||
|
|
||||||
define ClearTests
|
define ClearTests
|
||||||
@@ -31,11 +35,21 @@ setup: ## Install dependencies for tests and coverage
|
|||||||
pip install gcovr; \
|
pip install gcovr; \
|
||||||
sudo dnf install lcov;\
|
sudo dnf install lcov;\
|
||||||
fi
|
fi
|
||||||
|
@echo "* You should install plantuml & graphviz for the diagrams"
|
||||||
|
|
||||||
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
|
diagrams: ## Create an UML class diagram & depnendency of the project (diagrams/BayesNet.png)
|
||||||
|
@which $(plantuml) || (echo ">>> Please install plantuml"; exit 1)
|
||||||
|
@which $(dot) || (echo ">>> Please install graphviz"; exit 1)
|
||||||
|
@which $(clang-uml) || (echo ">>> Please install clang-uml"; exit 1)
|
||||||
|
@export PLANTUML_LIMIT_SIZE=16384
|
||||||
|
@echo ">>> Creating UML class diagram of the project...";
|
||||||
|
@$(clang-uml) -p
|
||||||
|
@cd $(f_diagrams); \
|
||||||
|
$(plantuml) -tsvg BayesNet.puml
|
||||||
@echo ">>> Creating dependency graph diagram of the project...";
|
@echo ">>> Creating dependency graph diagram of the project...";
|
||||||
$(MAKE) debug
|
$(MAKE) debug
|
||||||
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
|
cd $(f_debug) && cmake .. --graphviz=dependency.dot
|
||||||
|
@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
|
||||||
|
|
||||||
buildd: ## Build the debug targets
|
buildd: ## Build the debug targets
|
||||||
cmake --build $(f_debug) -t $(app_targets) $(n_procs)
|
cmake --build $(f_debug) -t $(app_targets) $(n_procs)
|
||||||
@@ -53,9 +67,10 @@ uninstall: ## Uninstall library
|
|||||||
xargs rm < $(f_release)/install_manifest.txt
|
xargs rm < $(f_release)/install_manifest.txt
|
||||||
@echo ">>> Done";
|
@echo ">>> Done";
|
||||||
|
|
||||||
|
prefix = "/usr/local"
|
||||||
install: ## Install library
|
install: ## Install library
|
||||||
@echo ">>> Installing BayesNet...";
|
@echo ">>> Installing BayesNet...";
|
||||||
@cmake --install $(f_release)
|
@cmake --install $(f_release) --prefix $(prefix)
|
||||||
@echo ">>> Done";
|
@echo ">>> Done";
|
||||||
|
|
||||||
debug: ## Build a debug version of the project
|
debug: ## Build a debug version of the project
|
||||||
@@ -112,10 +127,15 @@ viewcoverage: ## Run tests, generate coverage report and upload it to codecov (b
|
|||||||
lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \
|
lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \
|
||||||
genhtml coverage.info --output-directory $(f_debug)/tests/coverage >/dev/null 2>&1; \
|
genhtml coverage.info --output-directory coverage >/dev/null 2>&1;
|
||||||
xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null
|
@$(MAKE) updatebadge
|
||||||
|
@xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null
|
||||||
@echo ">>> Done";
|
@echo ">>> Done";
|
||||||
|
|
||||||
|
updatebadge: ## Update the coverage badge in README.md
|
||||||
|
@echo ">>> Updating coverage badge..."
|
||||||
|
@env python update_coverage.py $(f_debug)/tests
|
||||||
|
@echo ">>> Done";
|
||||||
|
|
||||||
help: ## Show help message
|
help: ## Show help message
|
||||||
@IFS=$$'\n' ; \
|
@IFS=$$'\n' ; \
|
||||||
|
22
README.md
22
README.md
@@ -5,10 +5,20 @@
|
|||||||

|

|
||||||
[](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
[](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
||||||

|

|
||||||
|

|
||||||
|
|
||||||
Bayesian Network Classifiers using libtorch from scratch
|
Bayesian Network Classifiers using libtorch from scratch
|
||||||
|
|
||||||
## Installation
|
## Dependencies
|
||||||
|
|
||||||
|
The only external dependency is [libtorch](https://pytorch.org/cppdocs/installing.html) which can be installed with the following commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget https://download.pytorch.org/libtorch/nightly/cpu/libtorch-shared-with-deps-latest.zip
|
||||||
|
unzip libtorch-shared-with-deps-latest.zips
|
||||||
|
```
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
### Release
|
### Release
|
||||||
|
|
||||||
@@ -38,3 +48,13 @@ make sample fname=tests/data/glass.arff
|
|||||||
## Models
|
## Models
|
||||||
|
|
||||||
### [BoostAODE](docs/BoostAODE.md)
|
### [BoostAODE](docs/BoostAODE.md)
|
||||||
|
|
||||||
|
## Diagrams
|
||||||
|
|
||||||
|
### UML Class Diagram
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Dependency Diagram
|
||||||
|
|
||||||
|

|
||||||
|
@@ -1,5 +1,10 @@
|
|||||||
#ifndef BASE_H
|
// ***************************************************************
|
||||||
#define BASE_H
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#pragma once
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
@@ -30,7 +35,7 @@ namespace bayesnet {
|
|||||||
virtual std::string getVersion() = 0;
|
virtual std::string getVersion() = 0;
|
||||||
std::vector<std::string> virtual topological_order() = 0;
|
std::vector<std::string> virtual topological_order() = 0;
|
||||||
std::vector<std::string> virtual getNotes() const = 0;
|
std::vector<std::string> virtual getNotes() const = 0;
|
||||||
void virtual dump_cpt()const = 0;
|
std::string virtual dump_cpt()const = 0;
|
||||||
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
||||||
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
||||||
protected:
|
protected:
|
||||||
@@ -38,4 +43,3 @@ namespace bayesnet {
|
|||||||
std::vector<std::string> validHyperparameters;
|
std::vector<std::string> validHyperparameters;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
|
@@ -1,3 +1,10 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
#include "bayesnet/utils/bayesnetUtils.h"
|
#include "bayesnet/utils/bayesnetUtils.h"
|
||||||
#include "Classifier.h"
|
#include "Classifier.h"
|
||||||
|
|
||||||
@@ -27,10 +34,11 @@ namespace bayesnet {
|
|||||||
dataset = torch::cat({ dataset, yresized }, 0);
|
dataset = torch::cat({ dataset, yresized }, 0);
|
||||||
}
|
}
|
||||||
catch (const std::exception& e) {
|
catch (const std::exception& e) {
|
||||||
std::cerr << e.what() << '\n';
|
std::stringstream oss;
|
||||||
std::cout << "X dimensions: " << dataset.sizes() << "\n";
|
oss << "* Error in X and y dimensions *\n";
|
||||||
std::cout << "y dimensions: " << ytmp.sizes() << "\n";
|
oss << "X dimensions: " << dataset.sizes() << "\n";
|
||||||
exit(1);
|
oss << "y dimensions: " << ytmp.sizes();
|
||||||
|
throw std::runtime_error(oss.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void Classifier::trainModel(const torch::Tensor& weights)
|
void Classifier::trainModel(const torch::Tensor& weights)
|
||||||
@@ -73,11 +81,11 @@ namespace bayesnet {
|
|||||||
if (torch::is_floating_point(dataset)) {
|
if (torch::is_floating_point(dataset)) {
|
||||||
throw std::invalid_argument("dataset (X, y) must be of type Integer");
|
throw std::invalid_argument("dataset (X, y) must be of type Integer");
|
||||||
}
|
}
|
||||||
if (n != features.size()) {
|
if (dataset.size(0) - 1 != features.size()) {
|
||||||
throw std::invalid_argument("Classifier: X " + std::to_string(n) + " and features " + std::to_string(features.size()) + " must have the same number of features");
|
throw std::invalid_argument("Classifier: X " + std::to_string(dataset.size(0) - 1) + " and features " + std::to_string(features.size()) + " must have the same number of features");
|
||||||
}
|
}
|
||||||
if (states.find(className) == states.end()) {
|
if (states.find(className) == states.end()) {
|
||||||
throw std::invalid_argument("className not found in states");
|
throw std::invalid_argument("class name not found in states");
|
||||||
}
|
}
|
||||||
for (auto feature : features) {
|
for (auto feature : features) {
|
||||||
if (states.find(feature) == states.end()) {
|
if (states.find(feature) == states.end()) {
|
||||||
@@ -173,12 +181,14 @@ namespace bayesnet {
|
|||||||
{
|
{
|
||||||
return model.topological_sort();
|
return model.topological_sort();
|
||||||
}
|
}
|
||||||
void Classifier::dump_cpt() const
|
std::string Classifier::dump_cpt() const
|
||||||
{
|
{
|
||||||
model.dump_cpt();
|
return model.dump_cpt();
|
||||||
}
|
}
|
||||||
void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
|
void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||||
{
|
{
|
||||||
//For classifiers that don't have hyperparameters
|
if (!hyperparameters.empty()) {
|
||||||
|
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef CLASSIFIER_H
|
#ifndef CLASSIFIER_H
|
||||||
#define CLASSIFIER_H
|
#define CLASSIFIER_H
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
@@ -30,7 +36,7 @@ namespace bayesnet {
|
|||||||
std::vector<std::string> show() const override;
|
std::vector<std::string> show() const override;
|
||||||
std::vector<std::string> topological_order() override;
|
std::vector<std::string> topological_order() override;
|
||||||
std::vector<std::string> getNotes() const override { return notes; }
|
std::vector<std::string> getNotes() const override { return notes; }
|
||||||
void dump_cpt() const override;
|
std::string dump_cpt() const override;
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
|
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
|
||||||
protected:
|
protected:
|
||||||
bool fitted;
|
bool fitted;
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "KDB.h"
|
#include "KDB.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
@@ -6,14 +12,18 @@ namespace bayesnet {
|
|||||||
validHyperparameters = { "k", "theta" };
|
validHyperparameters = { "k", "theta" };
|
||||||
|
|
||||||
}
|
}
|
||||||
void KDB::setHyperparameters(const nlohmann::json& hyperparameters)
|
void KDB::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||||
{
|
{
|
||||||
|
auto hyperparameters = hyperparameters_;
|
||||||
if (hyperparameters.contains("k")) {
|
if (hyperparameters.contains("k")) {
|
||||||
k = hyperparameters["k"];
|
k = hyperparameters["k"];
|
||||||
|
hyperparameters.erase("k");
|
||||||
}
|
}
|
||||||
if (hyperparameters.contains("theta")) {
|
if (hyperparameters.contains("theta")) {
|
||||||
theta = hyperparameters["theta"];
|
theta = hyperparameters["theta"];
|
||||||
|
hyperparameters.erase("theta");
|
||||||
}
|
}
|
||||||
|
Classifier::setHyperparameters(hyperparameters);
|
||||||
}
|
}
|
||||||
void KDB::buildModel(const torch::Tensor& weights)
|
void KDB::buildModel(const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef KDB_H
|
#ifndef KDB_H
|
||||||
#define KDB_H
|
#define KDB_H
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
@@ -14,7 +20,7 @@ namespace bayesnet {
|
|||||||
public:
|
public:
|
||||||
explicit KDB(int k, float theta = 0.03);
|
explicit KDB(int k, float theta = 0.03);
|
||||||
virtual ~KDB() = default;
|
virtual ~KDB() = default;
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "KDBLd.h"
|
#include "KDBLd.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef KDBLD_H
|
#ifndef KDBLD_H
|
||||||
#define KDBLD_H
|
#define KDBLD_H
|
||||||
#include "Proposal.h"
|
#include "Proposal.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <ArffFiles.h>
|
#include <ArffFiles.h>
|
||||||
#include "Proposal.h"
|
#include "Proposal.h"
|
||||||
|
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef PROPOSAL_H
|
#ifndef PROPOSAL_H
|
||||||
#define PROPOSAL_H
|
#define PROPOSAL_H
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "SPODE.h"
|
#include "SPODE.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef SPODE_H
|
#ifndef SPODE_H
|
||||||
#define SPODE_H
|
#define SPODE_H
|
||||||
#include "Classifier.h"
|
#include "Classifier.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "SPODELd.h"
|
#include "SPODELd.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef SPODELD_H
|
#ifndef SPODELD_H
|
||||||
#define SPODELD_H
|
#define SPODELD_H
|
||||||
#include "SPODE.h"
|
#include "SPODE.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "TAN.h"
|
#include "TAN.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef TAN_H
|
#ifndef TAN_H
|
||||||
#define TAN_H
|
#define TAN_H
|
||||||
#include "Classifier.h"
|
#include "Classifier.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "TANLd.h"
|
#include "TANLd.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef TANLD_H
|
#ifndef TANLD_H
|
||||||
#define TANLD_H
|
#define TANLD_H
|
||||||
#include "TAN.h"
|
#include "TAN.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "AODE.h"
|
#include "AODE.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
@@ -13,9 +19,7 @@ namespace bayesnet {
|
|||||||
predict_voting = hyperparameters["predict_voting"];
|
predict_voting = hyperparameters["predict_voting"];
|
||||||
hyperparameters.erase("predict_voting");
|
hyperparameters.erase("predict_voting");
|
||||||
}
|
}
|
||||||
if (!hyperparameters.empty()) {
|
Classifier::setHyperparameters(hyperparameters);
|
||||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
void AODE::buildModel(const torch::Tensor& weights)
|
void AODE::buildModel(const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef AODE_H
|
#ifndef AODE_H
|
||||||
#define AODE_H
|
#define AODE_H
|
||||||
#include "bayesnet/classifiers/SPODE.h"
|
#include "bayesnet/classifiers/SPODE.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "AODELd.h"
|
#include "AODELd.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef AODELD_H
|
#ifndef AODELD_H
|
||||||
#define AODELD_H
|
#define AODELD_H
|
||||||
#include "bayesnet/classifiers/Proposal.h"
|
#include "bayesnet/classifiers/Proposal.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
@@ -8,15 +14,13 @@
|
|||||||
#include "bayesnet/feature_selection/IWSS.h"
|
#include "bayesnet/feature_selection/IWSS.h"
|
||||||
#include "BoostAODE.h"
|
#include "BoostAODE.h"
|
||||||
|
|
||||||
#include "bayesnet/utils/loguru.cpp"
|
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
|
||||||
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
|
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
|
||||||
{
|
{
|
||||||
validHyperparameters = {
|
validHyperparameters = {
|
||||||
"maxModels", "bisection", "order", "convergence", "threshold",
|
"maxModels", "bisection", "order", "convergence", "threshold",
|
||||||
"select_features", "maxTolerance", "predict_voting"
|
"select_features", "maxTolerance", "predict_voting", "block_update"
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -94,9 +98,11 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
hyperparameters.erase("select_features");
|
hyperparameters.erase("select_features");
|
||||||
}
|
}
|
||||||
if (!hyperparameters.empty()) {
|
if (hyperparameters.contains("block_update")) {
|
||||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
block_update = hyperparameters["block_update"];
|
||||||
|
hyperparameters.erase("block_update");
|
||||||
}
|
}
|
||||||
|
Classifier::setHyperparameters(hyperparameters);
|
||||||
}
|
}
|
||||||
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
|
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
@@ -125,6 +131,102 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
return { weights, alpha_t, terminate };
|
return { weights, alpha_t, terminate };
|
||||||
}
|
}
|
||||||
|
std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
|
||||||
|
{
|
||||||
|
/* Update Block algorithm
|
||||||
|
k = # of models in block
|
||||||
|
n_models = # of models in ensemble to make predictions
|
||||||
|
n_models_bak = # models saved
|
||||||
|
models = vector of models to make predictions
|
||||||
|
models_bak = models not used to make predictions
|
||||||
|
significances_bak = backup of significances vector
|
||||||
|
|
||||||
|
Case list
|
||||||
|
A) k = 1, n_models = 1 => n = 0 , n_models = n + k
|
||||||
|
B) k = 1, n_models = n + 1 => n_models = n + k
|
||||||
|
C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
|
||||||
|
D) k > 1, n_models = k => n = 0, n_models = n + k
|
||||||
|
E) k > 1, n_models = k + n => n_models = n + k
|
||||||
|
|
||||||
|
A, D) n=0, k > 0, n_models == k
|
||||||
|
1. n_models_bak <- n_models
|
||||||
|
2. significances_bak <- significances
|
||||||
|
3. significances = vector(k, 1)
|
||||||
|
4. Don’t move any classifiers out of models
|
||||||
|
5. n_models <- k
|
||||||
|
6. Make prediction, compute alpha, update weights
|
||||||
|
7. Don’t restore any classifiers to models
|
||||||
|
8. significances <- significances_bak
|
||||||
|
9. Update last k significances
|
||||||
|
10. n_models <- n_models_bak
|
||||||
|
|
||||||
|
B, C, E) n > 0, k > 0, n_models == n + k
|
||||||
|
1. n_models_bak <- n_models
|
||||||
|
2. significances_bak <- significances
|
||||||
|
3. significances = vector(k, 1)
|
||||||
|
4. Move first n classifiers to models_bak
|
||||||
|
5. n_models <- k
|
||||||
|
6. Make prediction, compute alpha, update weights
|
||||||
|
7. Insert classifiers in models_bak to be the first n models
|
||||||
|
8. significances <- significances_bak
|
||||||
|
9. Update last k significances
|
||||||
|
10. n_models <- n_models_bak
|
||||||
|
*/
|
||||||
|
//
|
||||||
|
// Make predict with only the last k models
|
||||||
|
//
|
||||||
|
std::unique_ptr<Classifier> model;
|
||||||
|
std::vector<std::unique_ptr<Classifier>> models_bak;
|
||||||
|
// 1. n_models_bak <- n_models 2. significances_bak <- significances
|
||||||
|
auto significance_bak = significanceModels;
|
||||||
|
auto n_models_bak = n_models;
|
||||||
|
// 3. significances = vector(k, 1)
|
||||||
|
significanceModels = std::vector<double>(k, 1.0);
|
||||||
|
// 4. Move first n classifiers to models_bak
|
||||||
|
// backup the first n_models - k models (if n_models == k, don't backup any)
|
||||||
|
for (int i = 0; i < n_models - k; ++i) {
|
||||||
|
model = std::move(models[0]);
|
||||||
|
models.erase(models.begin());
|
||||||
|
models_bak.push_back(std::move(model));
|
||||||
|
}
|
||||||
|
assert(models.size() == k);
|
||||||
|
// 5. n_models <- k
|
||||||
|
n_models = k;
|
||||||
|
// 6. Make prediction, compute alpha, update weights
|
||||||
|
auto ypred = predict(X_train);
|
||||||
|
//
|
||||||
|
// Update weights
|
||||||
|
//
|
||||||
|
double alpha_t;
|
||||||
|
bool terminate;
|
||||||
|
std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
|
||||||
|
//
|
||||||
|
// Restore the models if needed
|
||||||
|
//
|
||||||
|
// 7. Insert classifiers in models_bak to be the first n models
|
||||||
|
// if n_models_bak == k, don't restore any, because none of them were moved
|
||||||
|
if (k != n_models_bak) {
|
||||||
|
// Insert in the same order as they were extracted
|
||||||
|
int bak_size = models_bak.size();
|
||||||
|
for (int i = 0; i < bak_size; ++i) {
|
||||||
|
model = std::move(models_bak[bak_size - 1 - i]);
|
||||||
|
models_bak.erase(models_bak.end() - 1);
|
||||||
|
models.insert(models.begin(), std::move(model));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 8. significances <- significances_bak
|
||||||
|
significanceModels = significance_bak;
|
||||||
|
//
|
||||||
|
// Update the significance of the last k models
|
||||||
|
//
|
||||||
|
// 9. Update last k significances
|
||||||
|
for (int i = 0; i < k; ++i) {
|
||||||
|
significanceModels[n_models_bak - k + i] = alpha_t;
|
||||||
|
}
|
||||||
|
// 10. n_models <- n_models_bak
|
||||||
|
n_models = n_models_bak;
|
||||||
|
return { weights, alpha_t, terminate };
|
||||||
|
}
|
||||||
std::vector<int> BoostAODE::initializeModels()
|
std::vector<int> BoostAODE::initializeModels()
|
||||||
{
|
{
|
||||||
std::vector<int> featuresUsed;
|
std::vector<int> featuresUsed;
|
||||||
@@ -146,15 +248,12 @@ namespace bayesnet {
|
|||||||
featureSelector->fit();
|
featureSelector->fit();
|
||||||
auto cfsFeatures = featureSelector->getFeatures();
|
auto cfsFeatures = featureSelector->getFeatures();
|
||||||
auto scores = featureSelector->getScores();
|
auto scores = featureSelector->getScores();
|
||||||
for (int i = 0; i < cfsFeatures.size(); ++i) {
|
|
||||||
LOG_F(INFO, "Feature: %d Score: %f", cfsFeatures[i], scores[i]);
|
|
||||||
}
|
|
||||||
for (const int& feature : cfsFeatures) {
|
for (const int& feature : cfsFeatures) {
|
||||||
featuresUsed.push_back(feature);
|
featuresUsed.push_back(feature);
|
||||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||||
model->fit(dataset, features, className, states, weights_);
|
model->fit(dataset, features, className, states, weights_);
|
||||||
models.push_back(std::move(model));
|
models.push_back(std::move(model));
|
||||||
significanceModels.push_back(1.0);
|
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||||
n_models++;
|
n_models++;
|
||||||
}
|
}
|
||||||
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||||
@@ -163,12 +262,6 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
//
|
|
||||||
// Logging setup
|
|
||||||
//
|
|
||||||
loguru::set_thread_name("BoostAODE");
|
|
||||||
loguru::g_stderr_verbosity = loguru::Verbosity_OFF;;
|
|
||||||
loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
|
||||||
// Algorithm based on the adaboost algorithm for classification
|
// Algorithm based on the adaboost algorithm for classification
|
||||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||||
fitted = true;
|
fitted = true;
|
||||||
@@ -187,11 +280,6 @@ namespace bayesnet {
|
|||||||
if (finished) {
|
if (finished) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG_F(INFO, "Initial models: %d", n_models);
|
|
||||||
LOG_F(INFO, "Significances: ");
|
|
||||||
for (int i = 0; i < n_models; ++i) {
|
|
||||||
LOG_F(INFO, "i=%d significance=%f", i, significanceModels[i]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||||
// Variables to control the accuracy finish condition
|
// Variables to control the accuracy finish condition
|
||||||
@@ -208,7 +296,6 @@ namespace bayesnet {
|
|||||||
while (!finished) {
|
while (!finished) {
|
||||||
// Step 1: Build ranking with mutual information
|
// Step 1: Build ranking with mutual information
|
||||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||||
VLOG_SCOPE_F(1, "featureSelection.size: %zu featuresUsed.size: %zu", featureSelection.size(), featuresUsed.size());
|
|
||||||
if (order_algorithm == Orders.RAND) {
|
if (order_algorithm == Orders.RAND) {
|
||||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||||
}
|
}
|
||||||
@@ -219,21 +306,17 @@ namespace bayesnet {
|
|||||||
);
|
);
|
||||||
int k = pow(2, tolerance);
|
int k = pow(2, tolerance);
|
||||||
int counter = 0; // The model counter of the current pack
|
int counter = 0; // The model counter of the current pack
|
||||||
VLOG_SCOPE_F(1, "k=%d featureSelection.size: %zu", k, featureSelection.size());
|
|
||||||
while (counter++ < k && featureSelection.size() > 0) {
|
while (counter++ < k && featureSelection.size() > 0) {
|
||||||
VLOG_SCOPE_F(2, "counter: %d numItemsPack: %d", counter, numItemsPack);
|
|
||||||
auto feature = featureSelection[0];
|
auto feature = featureSelection[0];
|
||||||
featureSelection.erase(featureSelection.begin());
|
featureSelection.erase(featureSelection.begin());
|
||||||
std::unique_ptr<Classifier> model;
|
std::unique_ptr<Classifier> model;
|
||||||
model = std::make_unique<SPODE>(feature);
|
model = std::make_unique<SPODE>(feature);
|
||||||
model->fit(dataset, features, className, states, weights_);
|
model->fit(dataset, features, className, states, weights_);
|
||||||
torch::Tensor ypred;
|
alpha_t = 0.0;
|
||||||
ypred = model->predict(X_train);
|
if (!block_update) {
|
||||||
|
auto ypred = model->predict(X_train);
|
||||||
// Step 3.1: Compute the classifier amout of say
|
// Step 3.1: Compute the classifier amout of say
|
||||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||||
if (finished) {
|
|
||||||
VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||||
numItemsPack++;
|
numItemsPack++;
|
||||||
@@ -241,22 +324,21 @@ namespace bayesnet {
|
|||||||
models.push_back(std::move(model));
|
models.push_back(std::move(model));
|
||||||
significanceModels.push_back(alpha_t);
|
significanceModels.push_back(alpha_t);
|
||||||
n_models++;
|
n_models++;
|
||||||
VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
|
}
|
||||||
|
if (block_update) {
|
||||||
|
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||||
}
|
}
|
||||||
if (convergence && !finished) {
|
if (convergence && !finished) {
|
||||||
auto y_val_predict = predict(X_test);
|
auto y_val_predict = predict(X_test);
|
||||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||||
if (priorAccuracy == 0) {
|
if (priorAccuracy == 0) {
|
||||||
priorAccuracy = accuracy;
|
priorAccuracy = accuracy;
|
||||||
VLOG_SCOPE_F(3, "First accuracy: %f", priorAccuracy);
|
|
||||||
} else {
|
} else {
|
||||||
improvement = accuracy - priorAccuracy;
|
improvement = accuracy - priorAccuracy;
|
||||||
}
|
}
|
||||||
if (improvement < convergence_threshold) {
|
if (improvement < convergence_threshold) {
|
||||||
VLOG_SCOPE_F(3, "(improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
|
||||||
tolerance++;
|
tolerance++;
|
||||||
} else {
|
} else {
|
||||||
VLOG_SCOPE_F(3, "*(improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
|
||||||
tolerance = 0; // Reset the counter if the model performs better
|
tolerance = 0; // Reset the counter if the model performs better
|
||||||
numItemsPack = 0;
|
numItemsPack = 0;
|
||||||
}
|
}
|
||||||
@@ -264,20 +346,17 @@ namespace bayesnet {
|
|||||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||||
// priorAccuracy = accuracy;
|
// priorAccuracy = accuracy;
|
||||||
}
|
}
|
||||||
VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
|
||||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||||
}
|
}
|
||||||
if (tolerance > maxTolerance) {
|
if (tolerance > maxTolerance) {
|
||||||
if (numItemsPack < n_models) {
|
if (numItemsPack < n_models) {
|
||||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||||
VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
|
||||||
for (int i = 0; i < numItemsPack; ++i) {
|
for (int i = 0; i < numItemsPack; ++i) {
|
||||||
significanceModels.pop_back();
|
significanceModels.pop_back();
|
||||||
models.pop_back();
|
models.pop_back();
|
||||||
n_models--;
|
n_models--;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
|
||||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef BOOSTAODE_H
|
#ifndef BOOSTAODE_H
|
||||||
#define BOOSTAODE_H
|
#define BOOSTAODE_H
|
||||||
#include <map>
|
#include <map>
|
||||||
@@ -20,22 +26,24 @@ namespace bayesnet {
|
|||||||
BoostAODE(bool predict_voting = false);
|
BoostAODE(bool predict_voting = false);
|
||||||
virtual ~BoostAODE() = default;
|
virtual ~BoostAODE() = default;
|
||||||
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
|
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||||
protected:
|
protected:
|
||||||
void buildModel(const torch::Tensor& weights) override;
|
void buildModel(const torch::Tensor& weights) override;
|
||||||
void trainModel(const torch::Tensor& weights) override;
|
void trainModel(const torch::Tensor& weights) override;
|
||||||
private:
|
private:
|
||||||
|
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
|
||||||
std::vector<int> initializeModels();
|
std::vector<int> initializeModels();
|
||||||
torch::Tensor X_train, y_train, X_test, y_test;
|
torch::Tensor X_train, y_train, X_test, y_test;
|
||||||
// Hyperparameters
|
// Hyperparameters
|
||||||
bool bisection = false; // if true, use bisection stratety to add k models at once to the ensemble
|
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
||||||
int maxTolerance = 1;
|
int maxTolerance = 3;
|
||||||
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
||||||
bool convergence = false; //if true, stop when the model does not improve
|
bool convergence = true; //if true, stop when the model does not improve
|
||||||
bool selectFeatures = false; // if true, use feature selection
|
bool selectFeatures = false; // if true, use feature selection
|
||||||
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
||||||
FeatureSelect* featureSelector = nullptr;
|
FeatureSelect* featureSelector = nullptr;
|
||||||
double threshold = -1;
|
double threshold = -1;
|
||||||
|
bool block_update = false;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "Ensemble.h"
|
#include "Ensemble.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef ENSEMBLE_H
|
#ifndef ENSEMBLE_H
|
||||||
#define ENSEMBLE_H
|
#define ENSEMBLE_H
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
@@ -25,8 +31,9 @@ namespace bayesnet {
|
|||||||
{
|
{
|
||||||
return std::vector<std::string>();
|
return std::vector<std::string>();
|
||||||
}
|
}
|
||||||
void dump_cpt() const override
|
std::string dump_cpt() const override
|
||||||
{
|
{
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
torch::Tensor predict_average_voting(torch::Tensor& X);
|
torch::Tensor predict_average_voting(torch::Tensor& X);
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include "bayesnet/utils/bayesnetUtils.h"
|
#include "bayesnet/utils/bayesnetUtils.h"
|
||||||
#include "CFS.h"
|
#include "CFS.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef CFS_H
|
#ifndef CFS_H
|
||||||
#define CFS_H
|
#define CFS_H
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "bayesnet/utils/bayesnetUtils.h"
|
#include "bayesnet/utils/bayesnetUtils.h"
|
||||||
#include "FCBF.h"
|
#include "FCBF.h"
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef FCBF_H
|
#ifndef FCBF_H
|
||||||
#define FCBF_H
|
#define FCBF_H
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include "bayesnet/utils/bayesnetUtils.h"
|
#include "bayesnet/utils/bayesnetUtils.h"
|
||||||
#include "FeatureSelect.h"
|
#include "FeatureSelect.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef FEATURE_SELECT_H
|
#ifndef FEATURE_SELECT_H
|
||||||
#define FEATURE_SELECT_H
|
#define FEATURE_SELECT_H
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include "bayesnet/utils/bayesnetUtils.h"
|
#include "bayesnet/utils/bayesnetUtils.h"
|
||||||
#include "IWSS.h"
|
#include "IWSS.h"
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef IWSS_H
|
#ifndef IWSS_H
|
||||||
#define IWSS_H
|
#define IWSS_H
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef NETWORK_H
|
#ifndef NETWORK_H
|
||||||
#define NETWORK_H
|
#define NETWORK_H
|
||||||
#include <map>
|
#include <map>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "Node.h"
|
#include "Node.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef NODE_H
|
#ifndef NODE_H
|
||||||
#define NODE_H
|
#define NODE_H
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "Mst.h"
|
#include "Mst.h"
|
||||||
#include "BayesMetrics.h"
|
#include "BayesMetrics.h"
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef BAYESNET_METRICS_H
|
#ifndef BAYESNET_METRICS_H
|
||||||
#define BAYESNET_METRICS_H
|
#define BAYESNET_METRICS_H
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@@ -1,3 +1,10 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include "Mst.h"
|
#include "Mst.h"
|
||||||
@@ -45,15 +52,6 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void Graph::display_mst()
|
|
||||||
{
|
|
||||||
std::cout << "Edge :" << " Weight" << std::endl;
|
|
||||||
for (int i = 0; i < T.size(); i++) {
|
|
||||||
std::cout << T[i].second.first << " - " << T[i].second.second << " : "
|
|
||||||
<< T[i].first;
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void insertElement(std::list<int>& variables, int variable)
|
void insertElement(std::list<int>& variables, int variable)
|
||||||
{
|
{
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef MST_H
|
#ifndef MST_H
|
||||||
#define MST_H
|
#define MST_H
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@@ -5,29 +11,28 @@
|
|||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
class MST {
|
class MST {
|
||||||
private:
|
|
||||||
torch::Tensor weights;
|
|
||||||
std::vector<std::string> features;
|
|
||||||
int root = 0;
|
|
||||||
public:
|
public:
|
||||||
MST() = default;
|
MST() = default;
|
||||||
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||||
std::vector<std::pair<int, int>> maximumSpanningTree();
|
std::vector<std::pair<int, int>> maximumSpanningTree();
|
||||||
|
private:
|
||||||
|
torch::Tensor weights;
|
||||||
|
std::vector<std::string> features;
|
||||||
|
int root = 0;
|
||||||
};
|
};
|
||||||
class Graph {
|
class Graph {
|
||||||
private:
|
|
||||||
int V; // number of nodes in graph
|
|
||||||
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
|
|
||||||
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
|
|
||||||
std::vector<int> parent;
|
|
||||||
public:
|
public:
|
||||||
explicit Graph(int V);
|
explicit Graph(int V);
|
||||||
void addEdge(int u, int v, float wt);
|
void addEdge(int u, int v, float wt);
|
||||||
int find_set(int i);
|
int find_set(int i);
|
||||||
void union_set(int u, int v);
|
void union_set(int u, int v);
|
||||||
void kruskal_algorithm();
|
void kruskal_algorithm();
|
||||||
void display_mst();
|
|
||||||
std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; }
|
std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; }
|
||||||
|
private:
|
||||||
|
int V; // number of nodes in graph
|
||||||
|
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
|
||||||
|
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
|
||||||
|
std::vector<int> parent;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
|
||||||
#include "bayesnetUtils.h"
|
#include "bayesnetUtils.h"
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef BAYESNET_UTILS_H
|
#ifndef BAYESNET_UTILS_H
|
||||||
#define BAYESNET_UTILS_H
|
#define BAYESNET_UTILS_H
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
412
diagrams/BayesNet.puml
Normal file
412
diagrams/BayesNet.puml
Normal file
@@ -0,0 +1,412 @@
|
|||||||
|
@startuml
|
||||||
|
title clang-uml class diagram model
|
||||||
|
class "bayesnet::Metrics" as C_0000736965376885623323
|
||||||
|
class C_0000736965376885623323 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+Metrics() = default : void
|
||||||
|
+Metrics(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||||
|
+Metrics(const std::vector<std::vector<int>> & vsamples, const std::vector<int> & labels, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||||
|
..
|
||||||
|
+SelectKBestWeighted(const torch::Tensor & weights, bool ascending = false, unsigned int k = 0) : std::vector<int>
|
||||||
|
+conditionalEdge(const torch::Tensor & weights) : torch::Tensor
|
||||||
|
+conditionalEdgeWeights(std::vector<float> & weights) : std::vector<float>
|
||||||
|
#doCombinations<T>(const std::vector<T> & source) : std::vector<std::pair<T, T> >
|
||||||
|
#entropy(const torch::Tensor & feature, const torch::Tensor & weights) : double
|
||||||
|
+getScoresKBest() const : std::vector<double>
|
||||||
|
+maximumSpanningTree(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : std::vector<std::pair<int,int>>
|
||||||
|
+mutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & weights) : double
|
||||||
|
#pop_first<T>(std::vector<T> & v) : T
|
||||||
|
__
|
||||||
|
#className : std::string
|
||||||
|
#features : std::vector<std::string>
|
||||||
|
#samples : torch::Tensor
|
||||||
|
}
|
||||||
|
class "bayesnet::Node" as C_0001303524929067080934
|
||||||
|
class C_0001303524929067080934 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+Node(const std::string &) : void
|
||||||
|
..
|
||||||
|
+addChild(Node *) : void
|
||||||
|
+addParent(Node *) : void
|
||||||
|
+clear() : void
|
||||||
|
+computeCPT(const torch::Tensor & dataset, const std::vector<std::string> & features, const double laplaceSmoothing, const torch::Tensor & weights) : void
|
||||||
|
+getCPT() : torch::Tensor &
|
||||||
|
+getChildren() : std::vector<Node *> &
|
||||||
|
+getFactorValue(std::map<std::string,int> &) : float
|
||||||
|
+getName() const : std::string
|
||||||
|
+getNumStates() const : int
|
||||||
|
+getParents() : std::vector<Node *> &
|
||||||
|
+graph(const std::string & clasName) : std::vector<std::string>
|
||||||
|
+minFill() : unsigned int
|
||||||
|
+removeChild(Node *) : void
|
||||||
|
+removeParent(Node *) : void
|
||||||
|
+setNumStates(int) : void
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::Network" as C_0001186707649890429575
|
||||||
|
class C_0001186707649890429575 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+Network() : void
|
||||||
|
+Network(float) : void
|
||||||
|
+Network(const Network &) : void
|
||||||
|
+~Network() = default : void
|
||||||
|
..
|
||||||
|
+addEdge(const std::string &, const std::string &) : void
|
||||||
|
+addNode(const std::string &) : void
|
||||||
|
+dump_cpt() const : std::string
|
||||||
|
+fit(const torch::Tensor & samples, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||||
|
+fit(const torch::Tensor & X, const torch::Tensor & y, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||||
|
+fit(const std::vector<std::vector<int>> & input_data, const std::vector<int> & labels, const std::vector<double> & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||||
|
+getClassName() const : std::string
|
||||||
|
+getClassNumStates() const : int
|
||||||
|
+getEdges() const : std::vector<std::pair<std::string,std::string>>
|
||||||
|
+getFeatures() const : std::vector<std::string>
|
||||||
|
+getMaxThreads() const : float
|
||||||
|
+getNodes() : std::map<std::string,std::unique_ptr<Node>> &
|
||||||
|
+getNumEdges() const : int
|
||||||
|
+getSamples() : torch::Tensor &
|
||||||
|
+getStates() const : int
|
||||||
|
+graph(const std::string & title) const : std::vector<std::string>
|
||||||
|
+initialize() : void
|
||||||
|
+predict(const std::vector<std::vector<int>> &) : std::vector<int>
|
||||||
|
+predict(const torch::Tensor &) : torch::Tensor
|
||||||
|
+predict_proba(const std::vector<std::vector<int>> &) : std::vector<std::vector<double>>
|
||||||
|
+predict_proba(const torch::Tensor &) : torch::Tensor
|
||||||
|
+predict_tensor(const torch::Tensor & samples, const bool proba) : torch::Tensor
|
||||||
|
+score(const std::vector<std::vector<int>> &, const std::vector<int> &) : double
|
||||||
|
+show() const : std::vector<std::string>
|
||||||
|
+topological_sort() : std::vector<std::string>
|
||||||
|
+version() : std::string
|
||||||
|
__
|
||||||
|
}
|
||||||
|
enum "bayesnet::status_t" as C_0000738420730783851375
|
||||||
|
enum C_0000738420730783851375 {
|
||||||
|
NORMAL
|
||||||
|
WARNING
|
||||||
|
ERROR
|
||||||
|
}
|
||||||
|
abstract "bayesnet::BaseClassifier" as C_0000327135989451974539
|
||||||
|
abstract C_0000327135989451974539 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+~BaseClassifier() = default : void
|
||||||
|
..
|
||||||
|
{abstract} +dump_cpt() const = 0 : std::string
|
||||||
|
{abstract} +fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||||
|
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||||
|
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights) = 0 : BaseClassifier &
|
||||||
|
{abstract} +fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||||
|
{abstract} +getClassNumStates() const = 0 : int
|
||||||
|
{abstract} +getNotes() const = 0 : std::vector<std::string>
|
||||||
|
{abstract} +getNumberOfEdges() const = 0 : int
|
||||||
|
{abstract} +getNumberOfNodes() const = 0 : int
|
||||||
|
{abstract} +getNumberOfStates() const = 0 : int
|
||||||
|
{abstract} +getStatus() const = 0 : status_t
|
||||||
|
+getValidHyperparameters() : std::vector<std::string> &
|
||||||
|
{abstract} +getVersion() = 0 : std::string
|
||||||
|
{abstract} +graph(const std::string & title = "") const = 0 : std::vector<std::string>
|
||||||
|
{abstract} +predict(std::vector<std::vector<int>> & X) = 0 : std::vector<int>
|
||||||
|
{abstract} +predict(torch::Tensor & X) = 0 : torch::Tensor
|
||||||
|
{abstract} +predict_proba(std::vector<std::vector<int>> & X) = 0 : std::vector<std::vector<double>>
|
||||||
|
{abstract} +predict_proba(torch::Tensor & X) = 0 : torch::Tensor
|
||||||
|
{abstract} +score(std::vector<std::vector<int>> & X, std::vector<int> & y) = 0 : float
|
||||||
|
{abstract} +score(torch::Tensor & X, torch::Tensor & y) = 0 : float
|
||||||
|
{abstract} +setHyperparameters(const nlohmann::json & hyperparameters) = 0 : void
|
||||||
|
{abstract} +show() const = 0 : std::vector<std::string>
|
||||||
|
{abstract} +topological_order() = 0 : std::vector<std::string>
|
||||||
|
{abstract} #trainModel(const torch::Tensor & weights) = 0 : void
|
||||||
|
__
|
||||||
|
#validHyperparameters : std::vector<std::string>
|
||||||
|
}
|
||||||
|
abstract "bayesnet::Classifier" as C_0002043996622900301644
|
||||||
|
abstract C_0002043996622900301644 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+Classifier(Network model) : void
|
||||||
|
+~Classifier() = default : void
|
||||||
|
..
|
||||||
|
+addNodes() : void
|
||||||
|
#buildDataset(torch::Tensor & y) : void
|
||||||
|
{abstract} #buildModel(const torch::Tensor & weights) = 0 : void
|
||||||
|
#checkFitParameters() : void
|
||||||
|
+dump_cpt() const : std::string
|
||||||
|
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||||
|
+fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||||
|
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||||
|
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights) : Classifier &
|
||||||
|
+getClassNumStates() const : int
|
||||||
|
+getNotes() const : std::vector<std::string>
|
||||||
|
+getNumberOfEdges() const : int
|
||||||
|
+getNumberOfNodes() const : int
|
||||||
|
+getNumberOfStates() const : int
|
||||||
|
+getStatus() const : status_t
|
||||||
|
+getVersion() : std::string
|
||||||
|
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
|
||||||
|
+predict(torch::Tensor & X) : torch::Tensor
|
||||||
|
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||||
|
+predict_proba(torch::Tensor & X) : torch::Tensor
|
||||||
|
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||||
|
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
|
||||||
|
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||||
|
+show() const : std::vector<std::string>
|
||||||
|
+topological_order() : std::vector<std::string>
|
||||||
|
#trainModel(const torch::Tensor & weights) : void
|
||||||
|
__
|
||||||
|
#className : std::string
|
||||||
|
#dataset : torch::Tensor
|
||||||
|
#features : std::vector<std::string>
|
||||||
|
#fitted : bool
|
||||||
|
#m : unsigned int
|
||||||
|
#metrics : Metrics
|
||||||
|
#model : Network
|
||||||
|
#n : unsigned int
|
||||||
|
#notes : std::vector<std::string>
|
||||||
|
#states : std::map<std::string,std::vector<int>>
|
||||||
|
#status : status_t
|
||||||
|
}
|
||||||
|
class "bayesnet::KDB" as C_0001112865019015250005
|
||||||
|
class C_0001112865019015250005 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+KDB(int k, float theta = 0.03) : void
|
||||||
|
+~KDB() = default : void
|
||||||
|
..
|
||||||
|
#buildModel(const torch::Tensor & weights) : void
|
||||||
|
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||||
|
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::TAN" as C_0001760994424884323017
|
||||||
|
class C_0001760994424884323017 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+TAN() : void
|
||||||
|
+~TAN() = default : void
|
||||||
|
..
|
||||||
|
#buildModel(const torch::Tensor & weights) : void
|
||||||
|
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::Proposal" as C_0002219995589162262979
|
||||||
|
class C_0002219995589162262979 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+Proposal(torch::Tensor & pDataset, std::vector<std::string> & features_, std::string & className_) : void
|
||||||
|
+~Proposal() : void
|
||||||
|
..
|
||||||
|
#checkInput(const torch::Tensor & X, const torch::Tensor & y) : void
|
||||||
|
#fit_local_discretization(const torch::Tensor & y) : std::map<std::string,std::vector<int>>
|
||||||
|
#localDiscretizationProposal(const std::map<std::string,std::vector<int>> & states, Network & model) : std::map<std::string,std::vector<int>>
|
||||||
|
#prepareX(torch::Tensor & X) : torch::Tensor
|
||||||
|
__
|
||||||
|
#Xf : torch::Tensor
|
||||||
|
#discretizers : map<std::string,mdlp::CPPFImdlp *>
|
||||||
|
#y : torch::Tensor
|
||||||
|
}
|
||||||
|
class "bayesnet::TANLd" as C_0001668829096702037834
|
||||||
|
class C_0001668829096702037834 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+TANLd() : void
|
||||||
|
+~TANLd() = default : void
|
||||||
|
..
|
||||||
|
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : TANLd &
|
||||||
|
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||||
|
+predict(torch::Tensor & X) : torch::Tensor
|
||||||
|
{static} +version() : std::string
|
||||||
|
__
|
||||||
|
}
|
||||||
|
abstract "bayesnet::FeatureSelect" as C_0001695326193250580823
|
||||||
|
abstract C_0001695326193250580823 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+FeatureSelect(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||||
|
+~FeatureSelect() : void
|
||||||
|
..
|
||||||
|
#computeMeritCFS() : double
|
||||||
|
#computeSuFeatures(const int a, const int b) : double
|
||||||
|
#computeSuLabels() : void
|
||||||
|
{abstract} +fit() = 0 : void
|
||||||
|
+getFeatures() const : std::vector<int>
|
||||||
|
+getScores() const : std::vector<double>
|
||||||
|
#initialize() : void
|
||||||
|
#symmetricalUncertainty(int a, int b) : double
|
||||||
|
__
|
||||||
|
#fitted : bool
|
||||||
|
#maxFeatures : int
|
||||||
|
#selectedFeatures : std::vector<int>
|
||||||
|
#selectedScores : std::vector<double>
|
||||||
|
#suFeatures : std::map<std::pair<int,int>,double>
|
||||||
|
#suLabels : std::vector<double>
|
||||||
|
#weights : const torch::Tensor &
|
||||||
|
}
|
||||||
|
class "bayesnet::CFS" as C_0000011627355691342494
|
||||||
|
class C_0000011627355691342494 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+CFS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||||
|
+~CFS() : void
|
||||||
|
..
|
||||||
|
+fit() : void
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::FCBF" as C_0000144682015341746929
|
||||||
|
class C_0000144682015341746929 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+FCBF(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||||
|
+~FCBF() : void
|
||||||
|
..
|
||||||
|
+fit() : void
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::IWSS" as C_0000008268514674428553
|
||||||
|
class C_0000008268514674428553 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+IWSS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||||
|
+~IWSS() : void
|
||||||
|
..
|
||||||
|
+fit() : void
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::SPODE" as C_0000512022813807538451
|
||||||
|
class C_0000512022813807538451 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+SPODE(int root) : void
|
||||||
|
+~SPODE() = default : void
|
||||||
|
..
|
||||||
|
#buildModel(const torch::Tensor & weights) : void
|
||||||
|
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::Ensemble" as C_0001985241386355360576
|
||||||
|
class C_0001985241386355360576 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+Ensemble(bool predict_voting = true) : void
|
||||||
|
+~Ensemble() = default : void
|
||||||
|
..
|
||||||
|
#compute_arg_max(std::vector<std::vector<double>> & X) : std::vector<int>
|
||||||
|
#compute_arg_max(torch::Tensor & X) : torch::Tensor
|
||||||
|
+dump_cpt() const : std::string
|
||||||
|
+getNumberOfEdges() const : int
|
||||||
|
+getNumberOfNodes() const : int
|
||||||
|
+getNumberOfStates() const : int
|
||||||
|
+graph(const std::string & title) const : std::vector<std::string>
|
||||||
|
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
|
||||||
|
+predict(torch::Tensor & X) : torch::Tensor
|
||||||
|
#predict_average_proba(torch::Tensor & X) : torch::Tensor
|
||||||
|
#predict_average_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||||
|
#predict_average_voting(torch::Tensor & X) : torch::Tensor
|
||||||
|
#predict_average_voting(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||||
|
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||||
|
+predict_proba(torch::Tensor & X) : torch::Tensor
|
||||||
|
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
|
||||||
|
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||||
|
+show() const : std::vector<std::string>
|
||||||
|
+topological_order() : std::vector<std::string>
|
||||||
|
#trainModel(const torch::Tensor & weights) : void
|
||||||
|
#voting(torch::Tensor & votes) : torch::Tensor
|
||||||
|
__
|
||||||
|
#models : std::vector<std::unique_ptr<Classifier>>
|
||||||
|
#n_models : unsigned int
|
||||||
|
#predict_voting : bool
|
||||||
|
#significanceModels : std::vector<double>
|
||||||
|
}
|
||||||
|
class "bayesnet::(anonymous_45089536)" as C_0001186398587753535158
|
||||||
|
class C_0001186398587753535158 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
__
|
||||||
|
+CFS : std::string
|
||||||
|
+FCBF : std::string
|
||||||
|
+IWSS : std::string
|
||||||
|
}
|
||||||
|
class "bayesnet::(anonymous_45090163)" as C_0000602764946063116717
|
||||||
|
class C_0000602764946063116717 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
__
|
||||||
|
+ASC : std::string
|
||||||
|
+DESC : std::string
|
||||||
|
+RAND : std::string
|
||||||
|
}
|
||||||
|
class "bayesnet::BoostAODE" as C_0000358471592399852382
|
||||||
|
class C_0000358471592399852382 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+BoostAODE(bool predict_voting = false) : void
|
||||||
|
+~BoostAODE() = default : void
|
||||||
|
..
|
||||||
|
#buildModel(const torch::Tensor & weights) : void
|
||||||
|
+graph(const std::string & title = "BoostAODE") const : std::vector<std::string>
|
||||||
|
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||||
|
#trainModel(const torch::Tensor & weights) : void
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::MST" as C_0000131858426172291700
|
||||||
|
class C_0000131858426172291700 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+MST() = default : void
|
||||||
|
+MST(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : void
|
||||||
|
..
|
||||||
|
+maximumSpanningTree() : std::vector<std::pair<int,int>>
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::Graph" as C_0001197041682001898467
|
||||||
|
class C_0001197041682001898467 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+Graph(int V) : void
|
||||||
|
..
|
||||||
|
+addEdge(int u, int v, float wt) : void
|
||||||
|
+find_set(int i) : int
|
||||||
|
+get_mst() : std::vector<std::pair<float,std::pair<int,int>>>
|
||||||
|
+kruskal_algorithm() : void
|
||||||
|
+union_set(int u, int v) : void
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::KDBLd" as C_0000344502277874806837
|
||||||
|
class C_0000344502277874806837 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+KDBLd(int k) : void
|
||||||
|
+~KDBLd() = default : void
|
||||||
|
..
|
||||||
|
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : KDBLd &
|
||||||
|
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||||
|
+predict(torch::Tensor & X) : torch::Tensor
|
||||||
|
{static} +version() : std::string
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::AODE" as C_0000786111576121788282
|
||||||
|
class C_0000786111576121788282 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+AODE(bool predict_voting = false) : void
|
||||||
|
+~AODE() : void
|
||||||
|
..
|
||||||
|
#buildModel(const torch::Tensor & weights) : void
|
||||||
|
+graph(const std::string & title = "AODE") const : std::vector<std::string>
|
||||||
|
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::SPODELd" as C_0001369655639257755354
|
||||||
|
class C_0001369655639257755354 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+SPODELd(int root) : void
|
||||||
|
+~SPODELd() = default : void
|
||||||
|
..
|
||||||
|
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||||
|
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||||
|
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||||
|
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||||
|
+predict(torch::Tensor & X) : torch::Tensor
|
||||||
|
{static} +version() : std::string
|
||||||
|
__
|
||||||
|
}
|
||||||
|
class "bayesnet::AODELd" as C_0000487273479333793647
|
||||||
|
class C_0000487273479333793647 #aliceblue;line:blue;line.dotted;text:blue {
|
||||||
|
+AODELd(bool predict_voting = true) : void
|
||||||
|
+~AODELd() = default : void
|
||||||
|
..
|
||||||
|
#buildModel(const torch::Tensor & weights) : void
|
||||||
|
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_) : AODELd &
|
||||||
|
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
|
||||||
|
#trainModel(const torch::Tensor & weights) : void
|
||||||
|
__
|
||||||
|
}
|
||||||
|
C_0001303524929067080934 --> C_0001303524929067080934 : -parents
|
||||||
|
C_0001303524929067080934 --> C_0001303524929067080934 : -children
|
||||||
|
C_0001186707649890429575 o-- C_0001303524929067080934 : -nodes
|
||||||
|
C_0000327135989451974539 ..> C_0000738420730783851375
|
||||||
|
C_0002043996622900301644 o-- C_0001186707649890429575 : #model
|
||||||
|
C_0002043996622900301644 o-- C_0000736965376885623323 : #metrics
|
||||||
|
C_0002043996622900301644 o-- C_0000738420730783851375 : #status
|
||||||
|
C_0000327135989451974539 <|-- C_0002043996622900301644
|
||||||
|
C_0002043996622900301644 <|-- C_0001112865019015250005
|
||||||
|
C_0002043996622900301644 <|-- C_0001760994424884323017
|
||||||
|
C_0002219995589162262979 ..> C_0001186707649890429575
|
||||||
|
C_0001760994424884323017 <|-- C_0001668829096702037834
|
||||||
|
C_0002219995589162262979 <|-- C_0001668829096702037834
|
||||||
|
C_0000736965376885623323 <|-- C_0001695326193250580823
|
||||||
|
C_0001695326193250580823 <|-- C_0000011627355691342494
|
||||||
|
C_0001695326193250580823 <|-- C_0000144682015341746929
|
||||||
|
C_0001695326193250580823 <|-- C_0000008268514674428553
|
||||||
|
C_0002043996622900301644 <|-- C_0000512022813807538451
|
||||||
|
C_0001985241386355360576 o-- C_0002043996622900301644 : #models
|
||||||
|
C_0002043996622900301644 <|-- C_0001985241386355360576
|
||||||
|
C_0000358471592399852382 --> C_0001695326193250580823 : -featureSelector
|
||||||
|
C_0001985241386355360576 <|-- C_0000358471592399852382
|
||||||
|
C_0001112865019015250005 <|-- C_0000344502277874806837
|
||||||
|
C_0002219995589162262979 <|-- C_0000344502277874806837
|
||||||
|
C_0001985241386355360576 <|-- C_0000786111576121788282
|
||||||
|
C_0000512022813807538451 <|-- C_0001369655639257755354
|
||||||
|
C_0002219995589162262979 <|-- C_0001369655639257755354
|
||||||
|
C_0001985241386355360576 <|-- C_0000487273479333793647
|
||||||
|
C_0002219995589162262979 <|-- C_0000487273479333793647
|
||||||
|
|
||||||
|
'Generated with clang-uml, version 0.5.1
|
||||||
|
'LLVM version clang version 17.0.6 (Fedora 17.0.6-2.fc39)
|
||||||
|
@enduml
|
1
diagrams/BayesNet.svg
Normal file
1
diagrams/BayesNet.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 139 KiB |
128
diagrams/dependency.svg
Normal file
128
diagrams/dependency.svg
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||||
|
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||||
|
<!-- Generated by graphviz version 8.1.0 (20230707.0739)
|
||||||
|
-->
|
||||||
|
<!-- Title: BayesNet Pages: 1 -->
|
||||||
|
<svg width="1632pt" height="288pt"
|
||||||
|
viewBox="0.00 0.00 1631.95 287.80" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||||
|
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 283.8)">
|
||||||
|
<title>BayesNet</title>
|
||||||
|
<polygon fill="white" stroke="none" points="-4,4 -4,-283.8 1627.95,-283.8 1627.95,4 -4,4"/>
|
||||||
|
<!-- node1 -->
|
||||||
|
<g id="node1" class="node">
|
||||||
|
<title>node1</title>
|
||||||
|
<polygon fill="none" stroke="black" points="826.43,-254.35 826.43,-269.26 796.69,-279.8 754.63,-279.8 724.89,-269.26 724.89,-254.35 754.63,-243.8 796.69,-243.8 826.43,-254.35"/>
|
||||||
|
<text text-anchor="middle" x="775.66" y="-257.53" font-family="Times,serif" font-size="12.00">BayesNet</text>
|
||||||
|
</g>
|
||||||
|
<!-- node2 -->
|
||||||
|
<g id="node2" class="node">
|
||||||
|
<title>node2</title>
|
||||||
|
<polygon fill="none" stroke="black" points="413.32,-185.8 372.39,-201.03 206.66,-207.8 40.93,-201.03 0,-185.8 114.69,-173.59 298.64,-173.59 413.32,-185.8"/>
|
||||||
|
<text text-anchor="middle" x="206.66" y="-185.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10.so</text>
|
||||||
|
</g>
|
||||||
|
<!-- node1->node2 -->
|
||||||
|
<g id="edge1" class="edge">
|
||||||
|
<title>node1->node2</title>
|
||||||
|
<path fill="none" stroke="black" d="M724.41,-254.5C634.7,-243.46 447.04,-220.38 324.01,-205.24"/>
|
||||||
|
<polygon fill="black" stroke="black" points="324.77,-201.69 314.42,-203.94 323.92,-208.63 324.77,-201.69"/>
|
||||||
|
</g>
|
||||||
|
<!-- node3 -->
|
||||||
|
<g id="node3" class="node">
|
||||||
|
<title>node3</title>
|
||||||
|
<polygon fill="none" stroke="black" points="857.68,-185.8 815.49,-201.03 644.66,-207.8 473.84,-201.03 431.65,-185.8 549.86,-173.59 739.46,-173.59 857.68,-185.8"/>
|
||||||
|
<text text-anchor="middle" x="644.66" y="-185.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libkineto.a</text>
|
||||||
|
</g>
|
||||||
|
<!-- node1->node3 -->
|
||||||
|
<g id="edge2" class="edge">
|
||||||
|
<title>node1->node3</title>
|
||||||
|
<path fill="none" stroke="black" d="M747.56,-245.79C729.21,-235.98 704.97,-223.03 684.63,-212.16"/>
|
||||||
|
<polygon fill="black" stroke="black" points="686.47,-208.64 676,-207.02 683.17,-214.82 686.47,-208.64"/>
|
||||||
|
</g>
|
||||||
|
<!-- node4 -->
|
||||||
|
<g id="node4" class="node">
|
||||||
|
<title>node4</title>
|
||||||
|
<polygon fill="none" stroke="black" points="939.33,-182.35 939.33,-197.26 920.78,-207.8 894.54,-207.8 875.99,-197.26 875.99,-182.35 894.54,-171.8 920.78,-171.8 939.33,-182.35"/>
|
||||||
|
<text text-anchor="middle" x="907.66" y="-185.53" font-family="Times,serif" font-size="12.00">mdlp</text>
|
||||||
|
</g>
|
||||||
|
<!-- node1->node4 -->
|
||||||
|
<g id="edge3" class="edge">
|
||||||
|
<title>node1->node4</title>
|
||||||
|
<path fill="none" stroke="black" d="M803.66,-245.96C824.66,-234.82 853.45,-219.56 875.41,-207.91"/>
|
||||||
|
<polygon fill="black" stroke="black" points="876.78,-210.61 883.97,-202.84 873.5,-204.43 876.78,-210.61"/>
|
||||||
|
</g>
|
||||||
|
<!-- node9 -->
|
||||||
|
<g id="node5" class="node">
|
||||||
|
<title>node9</title>
|
||||||
|
<polygon fill="none" stroke="black" points="1107.74,-195.37 1032.66,-207.8 957.58,-195.37 986.26,-175.24 1079.06,-175.24 1107.74,-195.37"/>
|
||||||
|
<text text-anchor="middle" x="1032.66" y="-185.53" font-family="Times,serif" font-size="12.00">torch_library</text>
|
||||||
|
</g>
|
||||||
|
<!-- node1->node9 -->
|
||||||
|
<g id="edge4" class="edge">
|
||||||
|
<title>node1->node9</title>
|
||||||
|
<path fill="none" stroke="black" d="M815.25,-250.02C860.25,-237.77 933.77,-217.74 982.68,-204.42"/>
|
||||||
|
<polygon fill="black" stroke="black" points="983.3,-207.61 992.02,-201.6 981.46,-200.85 983.3,-207.61"/>
|
||||||
|
</g>
|
||||||
|
<!-- node10 -->
|
||||||
|
<g id="node6" class="node">
|
||||||
|
<title>node10</title>
|
||||||
|
<polygon fill="none" stroke="black" points="1159.81,-113.8 1086.89,-129.03 791.66,-135.8 496.43,-129.03 423.52,-113.8 627.82,-101.59 955.5,-101.59 1159.81,-113.8"/>
|
||||||
|
<text text-anchor="middle" x="791.66" y="-113.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch.so" -Wl,--as-needed</text>
|
||||||
|
</g>
|
||||||
|
<!-- node9->node10 -->
|
||||||
|
<g id="edge5" class="edge">
|
||||||
|
<title>node9->node10</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M985.62,-175.14C949.2,-164.56 898.31,-149.78 857.79,-138.01"/>
|
||||||
|
<polygon fill="black" stroke="black" points="859.04,-134.44 848.46,-135.01 857.09,-141.16 859.04,-134.44"/>
|
||||||
|
</g>
|
||||||
|
<!-- node5 -->
|
||||||
|
<g id="node7" class="node">
|
||||||
|
<title>node5</title>
|
||||||
|
<polygon fill="none" stroke="black" points="1371.56,-123.37 1274.66,-135.8 1177.77,-123.37 1214.78,-103.24 1334.55,-103.24 1371.56,-123.37"/>
|
||||||
|
<text text-anchor="middle" x="1274.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch_cpu_library</text>
|
||||||
|
</g>
|
||||||
|
<!-- node9->node5 -->
|
||||||
|
<g id="edge6" class="edge">
|
||||||
|
<title>node9->node5</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1079.61,-175.22C1120.66,-163.35 1180.2,-146.13 1222.68,-133.84"/>
|
||||||
|
<polygon fill="black" stroke="black" points="1223.46,-136.97 1232.09,-130.83 1221.51,-130.24 1223.46,-136.97"/>
|
||||||
|
</g>
|
||||||
|
<!-- node6 -->
|
||||||
|
<g id="node8" class="node">
|
||||||
|
<title>node6</title>
|
||||||
|
<polygon fill="none" stroke="black" points="1191.4,-27.9 1114.6,-43.12 803.66,-49.9 492.72,-43.12 415.93,-27.9 631.1,-15.68 976.22,-15.68 1191.4,-27.9"/>
|
||||||
|
<text text-anchor="middle" x="803.66" y="-27.63" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cpu.so" -Wl,--as-needed</text>
|
||||||
|
</g>
|
||||||
|
<!-- node5->node6 -->
|
||||||
|
<g id="edge7" class="edge">
|
||||||
|
<title>node5->node6</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1210.16,-105.31C1130.55,-91.13 994.37,-66.87 901.77,-50.38"/>
|
||||||
|
<polygon fill="black" stroke="black" points="902.44,-46.77 891.98,-48.46 901.22,-53.66 902.44,-46.77"/>
|
||||||
|
</g>
|
||||||
|
<!-- node7 -->
|
||||||
|
<g id="node9" class="node">
|
||||||
|
<title>node7</title>
|
||||||
|
<polygon fill="none" stroke="black" points="1339.72,-37.46 1274.66,-49.9 1209.61,-37.46 1234.46,-17.34 1314.87,-17.34 1339.72,-37.46"/>
|
||||||
|
<text text-anchor="middle" x="1274.66" y="-27.63" font-family="Times,serif" font-size="12.00">caffe2::mkl</text>
|
||||||
|
</g>
|
||||||
|
<!-- node5->node7 -->
|
||||||
|
<g id="edge8" class="edge">
|
||||||
|
<title>node5->node7</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1274.66,-102.95C1274.66,-91.56 1274.66,-75.07 1274.66,-60.95"/>
|
||||||
|
<polygon fill="black" stroke="black" points="1278.16,-61.27 1274.66,-51.27 1271.16,-61.27 1278.16,-61.27"/>
|
||||||
|
</g>
|
||||||
|
<!-- node8 -->
|
||||||
|
<g id="node10" class="node">
|
||||||
|
<title>node8</title>
|
||||||
|
<polygon fill="none" stroke="black" points="1623.95,-41.76 1490.66,-63.8 1357.37,-41.76 1408.28,-6.09 1573.04,-6.09 1623.95,-41.76"/>
|
||||||
|
<text text-anchor="middle" x="1490.66" y="-34.75" font-family="Times,serif" font-size="12.00">dummy</text>
|
||||||
|
<text text-anchor="middle" x="1490.66" y="-20.5" font-family="Times,serif" font-size="12.00">(protobuf::libprotobuf)</text>
|
||||||
|
</g>
|
||||||
|
<!-- node5->node8 -->
|
||||||
|
<g id="edge9" class="edge">
|
||||||
|
<title>node5->node8</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1310.82,-102.76C1341.68,-90.77 1386.88,-73.21 1424.25,-58.7"/>
|
||||||
|
<polygon fill="black" stroke="black" points="1425.01,-61.77 1433.06,-54.89 1422.47,-55.25 1425.01,-61.77"/>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 7.1 KiB |
@@ -1,18 +1,18 @@
|
|||||||
# BoostAODE Algorithm Operation
|
# BoostAODE Algorithm Operation
|
||||||
|
|
||||||
## Algorithm
|
|
||||||
|
|
||||||
## Hyperparameters
|
## Hyperparameters
|
||||||
|
|
||||||
The hyperparameters defined in the algorithm are:
|
The hyperparameters defined in the algorithm are:
|
||||||
|
|
||||||
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *false*.
|
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *true*.
|
||||||
|
|
||||||
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
|
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
|
||||||
|
|
||||||
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*.
|
- ***block_update*** (*boolean*): Sets whether the algorithm will update the weights of the models in blocks. If set to false, the algorithm will update the weights of the models one by one. Default value: *false*.
|
||||||
|
|
||||||
- ***maxTolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *1*. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once.
|
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *true*.
|
||||||
|
|
||||||
|
- ***maxTolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once. Default value: *3*
|
||||||
|
|
||||||
- ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same α<sub>t</sub>. Default value: *""*.
|
- ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same α<sub>t</sub>. Default value: *""*.
|
||||||
|
|
||||||
@@ -26,42 +26,4 @@ The hyperparameters defined in the algorithm are:
|
|||||||
|
|
||||||
## Operation
|
## Operation
|
||||||
|
|
||||||
The algorithm performs the following steps:
|
### [Algorithm](./algorithm.md)
|
||||||
|
|
||||||
1. **Initialization**
|
|
||||||
|
|
||||||
- If ***select_features*** is set, as many *SPODEs* are created as variables selected by the corresponding feature selection algorithm, and these variables are marked as used.
|
|
||||||
|
|
||||||
- Initial weights of the examples are set to *1/m*.
|
|
||||||
|
|
||||||
1. **Main Training Loop:**
|
|
||||||
|
|
||||||
- Variables are sorted by mutual information order with the class variable and processed in ascending, descending or random order, according to the value of the *order* hyperparameter. If it is random, the variables are shuffled.
|
|
||||||
|
|
||||||
- If the parent repetition is not established, the variable is marked as used.
|
|
||||||
|
|
||||||
- A *SPODE* is created using the selected variable as the parent.
|
|
||||||
|
|
||||||
- The model is trained, and the class variable corresponding to the training dataset is calculated. The calculation can be done using the last trained model or the set of models trained up to that point, according to the value of the *predict_single* hyperparameter.
|
|
||||||
|
|
||||||
- The weights associated with the examples are updated using this expression:
|
|
||||||
|
|
||||||
- w<sub>i</sub> · e<sup>α<sub>t</sub></sup> (if the example has been misclassified)
|
|
||||||
|
|
||||||
- w<sub>i</sub> · e<sup>-α<sub>t</sub></sup> (if the example has been correctly classified)
|
|
||||||
|
|
||||||
- The model significance is set to α<sub>t</sub>.
|
|
||||||
|
|
||||||
- If the ***convergence*** hyperparameter is set, the accuracy value on the test dataset that we separated in an initial step is calculated.
|
|
||||||
|
|
||||||
1. **Exit Conditions:**
|
|
||||||
|
|
||||||
- ε<sub>t</sub> > 0.5 => misclassified examples are penalized.
|
|
||||||
|
|
||||||
- Number of models with worse accuracy greater than ***tolerance*** and ***convergence*** established.
|
|
||||||
|
|
||||||
- There are no more variables to create models, and ***repeatSparent*** is not set.
|
|
||||||
|
|
||||||
- Number of models > ***maxModels*** if ***repeatSparent*** is set.
|
|
||||||
|
|
||||||
### [Proposal for *predict_single = false*](./BoostAODE_train_predict.pdf)
|
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "ArffFiles.h"
|
#include "ArffFiles.h"
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef ARFFFILES_H
|
#ifndef ARFFFILES_H
|
||||||
#define ARFFFILES_H
|
#define ARFFFILES_H
|
||||||
|
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef CPPFIMDLP_H
|
#ifndef CPPFIMDLP_H
|
||||||
#define CPPFIMDLP_H
|
#define CPPFIMDLP_H
|
||||||
|
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef CCMETRICS_H
|
#ifndef CCMETRICS_H
|
||||||
#define CCMETRICS_H
|
#define CCMETRICS_H
|
||||||
|
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef TYPES_H
|
#ifndef TYPES_H
|
||||||
#define TYPES_H
|
#define TYPES_H
|
||||||
|
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <ArffFiles.h>
|
#include <ArffFiles.h>
|
||||||
#include <CPPFImdlp.h>
|
#include <CPPFImdlp.h>
|
||||||
#include <bayesnet/ensembles/BoostAODE.h>
|
#include <bayesnet/ensembles/BoostAODE.h>
|
||||||
|
@@ -8,12 +8,17 @@ if(ENABLE_TESTING)
|
|||||||
${CMAKE_BINARY_DIR}/configured_files/include
|
${CMAKE_BINARY_DIR}/configured_files/include
|
||||||
)
|
)
|
||||||
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
||||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestUtils.cc ${BayesNet_SOURCES})
|
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc
|
||||||
|
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc
|
||||||
|
TestUtils.cc TestBayesEnsemble.cc ${BayesNet_SOURCES})
|
||||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
|
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
|
||||||
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
||||||
add_test(NAME Network COMMAND TestBayesNet "[Network]")
|
add_test(NAME Network COMMAND TestBayesNet "[Network]")
|
||||||
add_test(NAME Node COMMAND TestBayesNet "[Node]")
|
add_test(NAME Node COMMAND TestBayesNet "[Node]")
|
||||||
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
|
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
|
||||||
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
|
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
|
||||||
|
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
|
||||||
|
add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
|
||||||
add_test(NAME Models COMMAND TestBayesNet "[Models]")
|
add_test(NAME Models COMMAND TestBayesNet "[Models]")
|
||||||
|
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
|
||||||
endif(ENABLE_TESTING)
|
endif(ENABLE_TESTING)
|
||||||
|
125
tests/TestBayesClassifier.cc
Normal file
125
tests/TestBayesClassifier.cc
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#include <catch2/catch_test_macros.hpp>
|
||||||
|
#include <catch2/matchers/catch_matchers.hpp>
|
||||||
|
#include <string>
|
||||||
|
#include "TestUtils.h"
|
||||||
|
#include "bayesnet/classifiers/TAN.h"
|
||||||
|
#include "bayesnet/classifiers/KDB.h"
|
||||||
|
#include "bayesnet/classifiers/KDBLd.h"
|
||||||
|
|
||||||
|
|
||||||
|
TEST_CASE("Test Cannot build dataset with wrong data vector", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::TAN();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
raw.yv.pop_back();
|
||||||
|
REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), std::runtime_error);
|
||||||
|
REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||||
|
}
|
||||||
|
TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::TAN();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto yshort = torch::zeros({ 149 }, torch::kInt32);
|
||||||
|
REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.featurest, raw.classNamet, raw.statest), std::runtime_error);
|
||||||
|
REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.featurest, raw.classNamet, raw.statest), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||||
|
}
|
||||||
|
TEST_CASE("Invalid data type", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::TAN();
|
||||||
|
auto raw = RawDatasets("iris", false);
|
||||||
|
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), "dataset (X, y) must be of type Integer");
|
||||||
|
}
|
||||||
|
TEST_CASE("Invalid number of features", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::TAN();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0);
|
||||||
|
REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), "Classifier: X 5 and features 4 must have the same number of features");
|
||||||
|
}
|
||||||
|
TEST_CASE("Invalid class name", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::TAN();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, "duck", raw.statest), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, "duck", raw.statest), "class name not found in states");
|
||||||
|
}
|
||||||
|
TEST_CASE("Invalid feature name", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::TAN();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto statest = raw.statest;
|
||||||
|
statest.erase("petallength");
|
||||||
|
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), "feature [petallength] not found in states");
|
||||||
|
}
|
||||||
|
TEST_CASE("Invalid hyperparameter", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::KDB(2);
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
REQUIRE_THROWS_AS(model.setHyperparameters({ { "alpha", "0.0" } }), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(model.setHyperparameters({ { "alpha", "0.0" } }), "Invalid hyperparameters{\"alpha\":\"0.0\"}");
|
||||||
|
}
|
||||||
|
TEST_CASE("Topological order", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::TAN();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||||
|
auto order = model.topological_order();
|
||||||
|
REQUIRE(order.size() == 4);
|
||||||
|
REQUIRE(order[0] == "petallength");
|
||||||
|
REQUIRE(order[1] == "sepallength");
|
||||||
|
REQUIRE(order[2] == "sepalwidth");
|
||||||
|
REQUIRE(order[3] == "petalwidth");
|
||||||
|
}
|
||||||
|
TEST_CASE("Dump_cpt", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::TAN();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||||
|
auto cpt = model.dump_cpt();
|
||||||
|
REQUIRE(cpt.size() == 1713);
|
||||||
|
}
|
||||||
|
TEST_CASE("Not fitted model", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::TAN();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto message = "Classifier has not been fitted";
|
||||||
|
// tensors
|
||||||
|
REQUIRE_THROWS_AS(model.predict(raw.Xt), std::logic_error);
|
||||||
|
REQUIRE_THROWS_WITH(model.predict(raw.Xt), message);
|
||||||
|
REQUIRE_THROWS_AS(model.predict_proba(raw.Xt), std::logic_error);
|
||||||
|
REQUIRE_THROWS_WITH(model.predict_proba(raw.Xt), message);
|
||||||
|
REQUIRE_THROWS_AS(model.score(raw.Xt, raw.yt), std::logic_error);
|
||||||
|
REQUIRE_THROWS_WITH(model.score(raw.Xt, raw.yt), message);
|
||||||
|
// vectors
|
||||||
|
REQUIRE_THROWS_AS(model.predict(raw.Xv), std::logic_error);
|
||||||
|
REQUIRE_THROWS_WITH(model.predict(raw.Xv), message);
|
||||||
|
REQUIRE_THROWS_AS(model.predict_proba(raw.Xv), std::logic_error);
|
||||||
|
REQUIRE_THROWS_WITH(model.predict_proba(raw.Xv), message);
|
||||||
|
REQUIRE_THROWS_AS(model.score(raw.Xv, raw.yv), std::logic_error);
|
||||||
|
REQUIRE_THROWS_WITH(model.score(raw.Xv, raw.yv), message);
|
||||||
|
}
|
||||||
|
TEST_CASE("KDB Graph", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::KDB(2);
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
auto graph = model.graph();
|
||||||
|
REQUIRE(graph.size() == 15);
|
||||||
|
}
|
||||||
|
TEST_CASE("KDBLd Graph", "[Classifier]")
|
||||||
|
{
|
||||||
|
auto model = bayesnet::KDBLd(2);
|
||||||
|
auto raw = RawDatasets("iris", false);
|
||||||
|
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||||
|
auto graph = model.graph();
|
||||||
|
REQUIRE(graph.size() == 15);
|
||||||
|
}
|
126
tests/TestBayesEnsemble.cc
Normal file
126
tests/TestBayesEnsemble.cc
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#include <type_traits>
|
||||||
|
#include <catch2/catch_test_macros.hpp>
|
||||||
|
#include <catch2/catch_approx.hpp>
|
||||||
|
#include <catch2/generators/catch_generators.hpp>
|
||||||
|
#include "bayesnet/ensembles/BoostAODE.h"
|
||||||
|
#include "bayesnet/ensembles/AODE.h"
|
||||||
|
#include "bayesnet/ensembles/AODELd.h"
|
||||||
|
#include "TestUtils.h"
|
||||||
|
|
||||||
|
|
||||||
|
TEST_CASE("Topological Order", "[Ensemble]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("glass", true);
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
auto order = clf.topological_order();
|
||||||
|
REQUIRE(order.size() == 0);
|
||||||
|
}
|
||||||
|
TEST_CASE("Dump CPT", "[Ensemble]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("glass", true);
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
auto dump = clf.dump_cpt();
|
||||||
|
REQUIRE(dump == "");
|
||||||
|
}
|
||||||
|
TEST_CASE("Number of States", "[Ensemble]")
|
||||||
|
{
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
REQUIRE(clf.getNumberOfStates() == 76);
|
||||||
|
}
|
||||||
|
TEST_CASE("Show", "[Ensemble]")
|
||||||
|
{
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
clf.setHyperparameters({
|
||||||
|
{"bisection", false},
|
||||||
|
{"maxTolerance", 1},
|
||||||
|
{"convergence", false},
|
||||||
|
});
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
std::vector<std::string> expected = {
|
||||||
|
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||||
|
"petallength -> sepallength, sepalwidth, petalwidth, ",
|
||||||
|
"petalwidth -> ",
|
||||||
|
"sepallength -> ",
|
||||||
|
"sepalwidth -> ",
|
||||||
|
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||||
|
"petallength -> ",
|
||||||
|
"petalwidth -> sepallength, sepalwidth, petallength, ",
|
||||||
|
"sepallength -> ",
|
||||||
|
"sepalwidth -> ",
|
||||||
|
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||||
|
"petallength -> ",
|
||||||
|
"petalwidth -> ",
|
||||||
|
"sepallength -> sepalwidth, petallength, petalwidth, ",
|
||||||
|
"sepalwidth -> ",
|
||||||
|
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||||
|
"petallength -> ",
|
||||||
|
"petalwidth -> ",
|
||||||
|
"sepallength -> ",
|
||||||
|
"sepalwidth -> sepallength, petallength, petalwidth, ",
|
||||||
|
};
|
||||||
|
auto show = clf.show();
|
||||||
|
REQUIRE(show.size() == expected.size());
|
||||||
|
for (size_t i = 0; i < show.size(); i++)
|
||||||
|
REQUIRE(show[i] == expected[i]);
|
||||||
|
}
|
||||||
|
TEST_CASE("Graph", "[Ensemble]")
|
||||||
|
{
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
auto graph = clf.graph();
|
||||||
|
REQUIRE(graph.size() == 56);
|
||||||
|
auto clf2 = bayesnet::AODE();
|
||||||
|
clf2.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
graph = clf2.graph();
|
||||||
|
REQUIRE(graph.size() == 56);
|
||||||
|
raw = RawDatasets("glass", false);
|
||||||
|
auto clf3 = bayesnet::AODELd();
|
||||||
|
clf3.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||||
|
graph = clf3.graph();
|
||||||
|
REQUIRE(graph.size() == 261);
|
||||||
|
}
|
||||||
|
TEST_CASE("Compute ArgMax", "[Ensemble]")
|
||||||
|
{
|
||||||
|
class TestEnsemble : public bayesnet::BoostAODE {
|
||||||
|
public:
|
||||||
|
TestEnsemble() : bayesnet::BoostAODE() {}
|
||||||
|
torch::Tensor compute_arg_max(torch::Tensor& X) { return Ensemble::compute_arg_max(X); }
|
||||||
|
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X) { return Ensemble::compute_arg_max(X); }
|
||||||
|
};
|
||||||
|
TestEnsemble clf;
|
||||||
|
std::vector<std::vector<double>> X = {
|
||||||
|
{0.1f, 0.2f, 0.3f},
|
||||||
|
{0.4f, 0.9f, 0.6f},
|
||||||
|
{0.7f, 0.8f, 0.9f},
|
||||||
|
{0.5f, 0.2f, 0.1f},
|
||||||
|
{0.3f, 0.7f, 0.2f},
|
||||||
|
{0.5f, 0.5f, 0.2f}
|
||||||
|
};
|
||||||
|
std::vector<int> expected = { 2, 1, 2, 0, 1, 0 };
|
||||||
|
auto argmax = clf.compute_arg_max(X);
|
||||||
|
REQUIRE(argmax.size() == expected.size());
|
||||||
|
REQUIRE(argmax == expected);
|
||||||
|
auto Xt = torch::zeros({ 6, 3 }, torch::kFloat32);
|
||||||
|
Xt[0][0] = 0.1f; Xt[0][1] = 0.2f; Xt[0][2] = 0.3f;
|
||||||
|
Xt[1][0] = 0.4f; Xt[1][1] = 0.9f; Xt[1][2] = 0.6f;
|
||||||
|
Xt[2][0] = 0.7f; Xt[2][1] = 0.8f; Xt[2][2] = 0.9f;
|
||||||
|
Xt[3][0] = 0.5f; Xt[3][1] = 0.2f; Xt[3][2] = 0.1f;
|
||||||
|
Xt[4][0] = 0.3f; Xt[4][1] = 0.7f; Xt[4][2] = 0.2f;
|
||||||
|
Xt[5][0] = 0.5f; Xt[5][1] = 0.5f; Xt[5][2] = 0.2f;
|
||||||
|
auto argmaxt = clf.compute_arg_max(Xt);
|
||||||
|
REQUIRE(argmaxt.size(0) == expected.size());
|
||||||
|
for (int i = 0; i < argmaxt.size(0); i++)
|
||||||
|
REQUIRE(argmaxt[i].item<int>() == expected[i]);
|
||||||
|
}
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <catch2/catch_test_macros.hpp>
|
#include <catch2/catch_test_macros.hpp>
|
||||||
#include <catch2/catch_approx.hpp>
|
#include <catch2/catch_approx.hpp>
|
||||||
#include <catch2/generators/catch_generators.hpp>
|
#include <catch2/generators/catch_generators.hpp>
|
||||||
|
@@ -1,7 +1,14 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <catch2/catch_test_macros.hpp>
|
#include <catch2/catch_test_macros.hpp>
|
||||||
#include <catch2/catch_approx.hpp>
|
#include <catch2/catch_approx.hpp>
|
||||||
#include <catch2/generators/catch_generators.hpp>
|
#include <catch2/generators/catch_generators.hpp>
|
||||||
|
#include <catch2/matchers/catch_matchers.hpp>
|
||||||
#include "bayesnet/classifiers/KDB.h"
|
#include "bayesnet/classifiers/KDB.h"
|
||||||
#include "bayesnet/classifiers/TAN.h"
|
#include "bayesnet/classifiers/TAN.h"
|
||||||
#include "bayesnet/classifiers/SPODE.h"
|
#include "bayesnet/classifiers/SPODE.h"
|
||||||
@@ -13,7 +20,7 @@
|
|||||||
#include "bayesnet/ensembles/BoostAODE.h"
|
#include "bayesnet/ensembles/BoostAODE.h"
|
||||||
#include "TestUtils.h"
|
#include "TestUtils.h"
|
||||||
|
|
||||||
const std::string ACTUAL_VERSION = "1.0.4";
|
const std::string ACTUAL_VERSION = "1.0.5";
|
||||||
|
|
||||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||||
{
|
{
|
||||||
@@ -51,6 +58,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
|||||||
auto score = clf->score(raw.Xt, raw.yt);
|
auto score = clf->score(raw.Xt, raw.yt);
|
||||||
INFO("Classifier: " + name + " File: " + file_name);
|
INFO("Classifier: " + name + " File: " + file_name);
|
||||||
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SECTION("Library check version")
|
SECTION("Library check version")
|
||||||
@@ -60,7 +68,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
|||||||
}
|
}
|
||||||
delete clf;
|
delete clf;
|
||||||
}
|
}
|
||||||
TEST_CASE("Models features", "[Models]")
|
TEST_CASE("Models features & Graph", "[Models]")
|
||||||
{
|
{
|
||||||
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
|
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
|
||||||
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
||||||
@@ -69,6 +77,8 @@ TEST_CASE("Models features", "[Models]")
|
|||||||
"sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n"
|
"sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n"
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
SECTION("Test TAN")
|
||||||
|
{
|
||||||
auto raw = RawDatasets("iris", true);
|
auto raw = RawDatasets("iris", true);
|
||||||
auto clf = bayesnet::TAN();
|
auto clf = bayesnet::TAN();
|
||||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
@@ -79,6 +89,19 @@ TEST_CASE("Models features", "[Models]")
|
|||||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||||
REQUIRE(clf.graph("Test") == graph);
|
REQUIRE(clf.graph("Test") == graph);
|
||||||
}
|
}
|
||||||
|
SECTION("Test TANLd")
|
||||||
|
{
|
||||||
|
auto clf = bayesnet::TANLd();
|
||||||
|
auto raw = RawDatasets("iris", false);
|
||||||
|
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||||
|
REQUIRE(clf.getNumberOfStates() == 19);
|
||||||
|
REQUIRE(clf.getClassNumStates() == 3);
|
||||||
|
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||||
|
REQUIRE(clf.graph("Test") == graph);
|
||||||
|
}
|
||||||
|
}
|
||||||
TEST_CASE("Get num features & num edges", "[Models]")
|
TEST_CASE("Get num features & num edges", "[Models]")
|
||||||
{
|
{
|
||||||
auto raw = RawDatasets("iris", true);
|
auto raw = RawDatasets("iris", true);
|
||||||
@@ -87,62 +110,7 @@ TEST_CASE("Get num features & num edges", "[Models]")
|
|||||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||||
REQUIRE(clf.getNumberOfEdges() == 8);
|
REQUIRE(clf.getNumberOfEdges() == 8);
|
||||||
}
|
}
|
||||||
TEST_CASE("BoostAODE feature_select CFS", "[Models]")
|
|
||||||
{
|
|
||||||
auto raw = RawDatasets("glass", true);
|
|
||||||
auto clf = bayesnet::BoostAODE();
|
|
||||||
clf.setHyperparameters({ {"select_features", "CFS"} });
|
|
||||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
|
||||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
|
||||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
|
||||||
REQUIRE(clf.getNotes().size() == 2);
|
|
||||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
|
|
||||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
|
||||||
}
|
|
||||||
TEST_CASE("BoostAODE feature_select IWSS", "[Models]")
|
|
||||||
{
|
|
||||||
auto raw = RawDatasets("glass", true);
|
|
||||||
auto clf = bayesnet::BoostAODE();
|
|
||||||
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
|
||||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
|
||||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
|
||||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
|
||||||
REQUIRE(clf.getNotes().size() == 2);
|
|
||||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with IWSS");
|
|
||||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
|
||||||
}
|
|
||||||
TEST_CASE("BoostAODE feature_select FCBF", "[Models]")
|
|
||||||
{
|
|
||||||
auto raw = RawDatasets("glass", true);
|
|
||||||
auto clf = bayesnet::BoostAODE();
|
|
||||||
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
|
||||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
|
||||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
|
||||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
|
||||||
REQUIRE(clf.getNotes().size() == 2);
|
|
||||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
|
|
||||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
|
||||||
}
|
|
||||||
TEST_CASE("BoostAODE test used features in train note and score", "[Models]")
|
|
||||||
{
|
|
||||||
auto raw = RawDatasets("diabetes", true);
|
|
||||||
auto clf = bayesnet::BoostAODE(true);
|
|
||||||
clf.setHyperparameters({
|
|
||||||
{"order", "asc"},
|
|
||||||
{"convergence", true},
|
|
||||||
{"select_features","CFS"},
|
|
||||||
});
|
|
||||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
|
||||||
REQUIRE(clf.getNumberOfNodes() == 72);
|
|
||||||
REQUIRE(clf.getNumberOfEdges() == 120);
|
|
||||||
REQUIRE(clf.getNotes().size() == 2);
|
|
||||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
|
||||||
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
|
|
||||||
auto score = clf.score(raw.Xv, raw.yv);
|
|
||||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
|
||||||
REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
|
||||||
REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
|
||||||
}
|
|
||||||
TEST_CASE("Model predict_proba", "[Models]")
|
TEST_CASE("Model predict_proba", "[Models]")
|
||||||
{
|
{
|
||||||
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
|
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
|
||||||
@@ -169,15 +137,15 @@ TEST_CASE("Model predict_proba", "[Models]")
|
|||||||
{0.003135, 0.991799, 0.0050661}
|
{0.003135, 0.991799, 0.0050661}
|
||||||
});
|
});
|
||||||
auto res_prob_baode = std::vector<std::vector<double>>({
|
auto res_prob_baode = std::vector<std::vector<double>>({
|
||||||
{0.00803291, 0.9676, 0.0243672},
|
{0.0112349, 0.962274, 0.0264907},
|
||||||
{0.00398714, 0.945126, 0.050887},
|
{0.00371025, 0.950592, 0.0456973},
|
||||||
{0.00398714, 0.945126, 0.050887},
|
{0.00371025, 0.950592, 0.0456973},
|
||||||
{0.00398714, 0.945126, 0.050887},
|
{0.00371025, 0.950592, 0.0456973},
|
||||||
{0.00189227, 0.859575, 0.138533},
|
{0.00369275, 0.84967, 0.146637},
|
||||||
{0.0118341, 0.442149, 0.546017},
|
{0.0252205, 0.113564, 0.861215},
|
||||||
{0.0216135, 0.785781, 0.192605},
|
{0.0284828, 0.770524, 0.200993},
|
||||||
{0.0204803, 0.844276, 0.135244},
|
{0.0213182, 0.857189, 0.121493},
|
||||||
{0.00576313, 0.961665, 0.0325716},
|
{0.00868436, 0.949494, 0.0418215}
|
||||||
});
|
});
|
||||||
auto res_prob_voting = std::vector<std::vector<double>>({
|
auto res_prob_voting = std::vector<std::vector<double>>({
|
||||||
{0, 1, 0},
|
{0, 1, 0},
|
||||||
@@ -185,8 +153,8 @@ TEST_CASE("Model predict_proba", "[Models]")
|
|||||||
{0, 1, 0},
|
{0, 1, 0},
|
||||||
{0, 1, 0},
|
{0, 1, 0},
|
||||||
{0, 1, 0},
|
{0, 1, 0},
|
||||||
{0, 0.447909, 0.552091},
|
{0, 0, 1},
|
||||||
{0, 0.811482, 0.188517},
|
{0, 1, 0},
|
||||||
{0, 1, 0},
|
{0, 1, 0},
|
||||||
{0, 1, 0}
|
{0, 1, 0}
|
||||||
});
|
});
|
||||||
@@ -209,7 +177,7 @@ TEST_CASE("Model predict_proba", "[Models]")
|
|||||||
REQUIRE(y_pred.size() == raw.yv.size());
|
REQUIRE(y_pred.size() == raw.yv.size());
|
||||||
REQUIRE(y_pred_proba[0].size() == 3);
|
REQUIRE(y_pred_proba[0].size() == 3);
|
||||||
REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size());
|
REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size());
|
||||||
for (int i = 0; i < y_pred_proba.size(); ++i) {
|
for (int i = 0; i < 9; ++i) {
|
||||||
auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end());
|
auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end());
|
||||||
int predictedClass = distance(y_pred_proba[i].begin(), maxElem);
|
int predictedClass = distance(y_pred_proba[i].begin(), maxElem);
|
||||||
REQUIRE(predictedClass == y_pred[i]);
|
REQUIRE(predictedClass == y_pred[i]);
|
||||||
@@ -220,7 +188,7 @@ TEST_CASE("Model predict_proba", "[Models]")
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Check predict_proba values for vectors and tensors
|
// Check predict_proba values for vectors and tensors
|
||||||
for (int i = 0; i < res_prob.size(); i++) {
|
for (int i = 0; i < 9; i++) {
|
||||||
REQUIRE(y_pred[i] == yt_pred[i].item<int>());
|
REQUIRE(y_pred[i] == yt_pred[i].item<int>());
|
||||||
for (int j = 0; j < 3; j++) {
|
for (int j = 0; j < 3; j++) {
|
||||||
REQUIRE(res_prob[model][i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon));
|
REQUIRE(res_prob[model][i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon));
|
||||||
@@ -230,25 +198,7 @@ TEST_CASE("Model predict_proba", "[Models]")
|
|||||||
delete clf;
|
delete clf;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST_CASE("BoostAODE voting-proba", "[Models]")
|
|
||||||
{
|
|
||||||
auto raw = RawDatasets("iris", true);
|
|
||||||
auto clf = bayesnet::BoostAODE(false);
|
|
||||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
|
||||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
|
||||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
|
||||||
clf.setHyperparameters({
|
|
||||||
{"predict_voting",true},
|
|
||||||
});
|
|
||||||
auto score_voting = clf.score(raw.Xv, raw.yv);
|
|
||||||
auto pred_voting = clf.predict_proba(raw.Xv);
|
|
||||||
REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
|
|
||||||
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
|
||||||
REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon));
|
|
||||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon));
|
|
||||||
clf.dump_cpt();
|
|
||||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
|
||||||
}
|
|
||||||
TEST_CASE("AODE voting-proba", "[Models]")
|
TEST_CASE("AODE voting-proba", "[Models]")
|
||||||
{
|
{
|
||||||
auto raw = RawDatasets("glass", true);
|
auto raw = RawDatasets("glass", true);
|
||||||
@@ -294,22 +244,27 @@ TEST_CASE("KDB with hyperparameters", "[Models]")
|
|||||||
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
|
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
|
||||||
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
|
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
|
||||||
}
|
}
|
||||||
TEST_CASE("BoostAODE order asc, desc & random", "[Models]")
|
TEST_CASE("Incorrect type of data for SPODELd", "[Models]")
|
||||||
{
|
{
|
||||||
auto raw = RawDatasets("glass", true);
|
auto raw = RawDatasets("iris", true);
|
||||||
std::map<std::string, double> scores{
|
auto clf = bayesnet::SPODELd(0);
|
||||||
{"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
|
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest), std::runtime_error);
|
||||||
};
|
|
||||||
for (const std::string& order : { "asc", "desc", "rand" }) {
|
|
||||||
auto clf = bayesnet::BoostAODE();
|
|
||||||
clf.setHyperparameters({
|
|
||||||
{"order", order},
|
|
||||||
});
|
|
||||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
|
||||||
auto score = clf.score(raw.Xv, raw.yv);
|
|
||||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
|
||||||
INFO("BoostAODE order: " + order);
|
|
||||||
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
|
||||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
|
||||||
}
|
}
|
||||||
|
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
|
||||||
|
{
|
||||||
|
auto clf = bayesnet::AODE();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
std::string message = "Ensemble has not been fitted";
|
||||||
|
REQUIRE_THROWS_AS(clf.predict(raw.Xv), std::logic_error);
|
||||||
|
REQUIRE_THROWS_AS(clf.predict_proba(raw.Xv), std::logic_error);
|
||||||
|
REQUIRE_THROWS_AS(clf.predict(raw.Xt), std::logic_error);
|
||||||
|
REQUIRE_THROWS_AS(clf.predict_proba(raw.Xt), std::logic_error);
|
||||||
|
REQUIRE_THROWS_AS(clf.score(raw.Xv, raw.yv), std::logic_error);
|
||||||
|
REQUIRE_THROWS_AS(clf.score(raw.Xt, raw.yt), std::logic_error);
|
||||||
|
REQUIRE_THROWS_WITH(clf.predict(raw.Xv), message);
|
||||||
|
REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xv), message);
|
||||||
|
REQUIRE_THROWS_WITH(clf.predict(raw.Xt), message);
|
||||||
|
REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xt), message);
|
||||||
|
REQUIRE_THROWS_WITH(clf.score(raw.Xv, raw.yv), message);
|
||||||
|
REQUIRE_THROWS_WITH(clf.score(raw.Xt, raw.yt), message);
|
||||||
}
|
}
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
||||||
#include <catch2/catch_test_macros.hpp>
|
#include <catch2/catch_test_macros.hpp>
|
||||||
#include <catch2/catch_approx.hpp>
|
#include <catch2/catch_approx.hpp>
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <catch2/catch_test_macros.hpp>
|
#include <catch2/catch_test_macros.hpp>
|
||||||
#include <catch2/catch_approx.hpp>
|
#include <catch2/catch_approx.hpp>
|
||||||
#include <catch2/generators/catch_generators.hpp>
|
#include <catch2/generators/catch_generators.hpp>
|
||||||
|
184
tests/TestBoostAODE.cc
Normal file
184
tests/TestBoostAODE.cc
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#include <type_traits>
|
||||||
|
#include <catch2/catch_test_macros.hpp>
|
||||||
|
#include <catch2/catch_approx.hpp>
|
||||||
|
#include <catch2/generators/catch_generators.hpp>
|
||||||
|
#include "bayesnet/ensembles/BoostAODE.h"
|
||||||
|
#include "TestUtils.h"
|
||||||
|
|
||||||
|
|
||||||
|
TEST_CASE("Feature_select CFS", "[BoostAODE]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("glass", true);
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
clf.setHyperparameters({ {"select_features", "CFS"} });
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||||
|
REQUIRE(clf.getNotes().size() == 2);
|
||||||
|
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
|
||||||
|
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||||
|
}
|
||||||
|
TEST_CASE("Feature_select IWSS", "[BoostAODE]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("glass", true);
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||||
|
REQUIRE(clf.getNotes().size() == 2);
|
||||||
|
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
||||||
|
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||||
|
}
|
||||||
|
TEST_CASE("Feature_select FCBF", "[BoostAODE]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("glass", true);
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||||
|
REQUIRE(clf.getNotes().size() == 2);
|
||||||
|
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
|
||||||
|
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||||
|
}
|
||||||
|
TEST_CASE("Test used features in train note and score", "[BoostAODE]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("diabetes", true);
|
||||||
|
auto clf = bayesnet::BoostAODE(true);
|
||||||
|
clf.setHyperparameters({
|
||||||
|
{"order", "asc"},
|
||||||
|
{"convergence", true},
|
||||||
|
{"select_features","CFS"},
|
||||||
|
});
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 72);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 120);
|
||||||
|
REQUIRE(clf.getNotes().size() == 2);
|
||||||
|
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||||
|
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
|
||||||
|
auto score = clf.score(raw.Xv, raw.yv);
|
||||||
|
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||||
|
REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
||||||
|
}
|
||||||
|
TEST_CASE("Voting vs proba", "[BoostAODE]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto clf = bayesnet::BoostAODE(false);
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||||
|
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||||
|
clf.setHyperparameters({
|
||||||
|
{"predict_voting",true},
|
||||||
|
});
|
||||||
|
auto score_voting = clf.score(raw.Xv, raw.yv);
|
||||||
|
auto pred_voting = clf.predict_proba(raw.Xv);
|
||||||
|
REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(pred_voting[83][2] == Catch::Approx(1.0).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(pred_proba[83][2] == Catch::Approx(0.86121525).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(clf.dump_cpt() == "");
|
||||||
|
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||||
|
}
|
||||||
|
TEST_CASE("Order asc, desc & random", "[BoostAODE]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("glass", true);
|
||||||
|
std::map<std::string, double> scores{
|
||||||
|
{"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
|
||||||
|
};
|
||||||
|
for (const std::string& order : { "asc", "desc", "rand" }) {
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
clf.setHyperparameters({
|
||||||
|
{"order", order},
|
||||||
|
{"bisection", false},
|
||||||
|
{"maxTolerance", 1},
|
||||||
|
{"convergence", false},
|
||||||
|
});
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
auto score = clf.score(raw.Xv, raw.yv);
|
||||||
|
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||||
|
INFO("BoostAODE order: " + order);
|
||||||
|
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST_CASE("Oddities", "[BoostAODE]")
|
||||||
|
{
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto bad_hyper = nlohmann::json{
|
||||||
|
{ { "order", "duck" } },
|
||||||
|
{ { "select_features", "duck" } },
|
||||||
|
{ { "maxTolerance", 0 } },
|
||||||
|
{ { "maxTolerance", 5 } },
|
||||||
|
};
|
||||||
|
for (const auto& hyper : bad_hyper.items()) {
|
||||||
|
INFO("BoostAODE hyper: " + hyper.value().dump());
|
||||||
|
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||||
|
}
|
||||||
|
REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0 } }), std::invalid_argument);
|
||||||
|
auto bad_hyper_fit = nlohmann::json{
|
||||||
|
{ { "select_features","IWSS" }, { "threshold", -0.01 } },
|
||||||
|
{ { "select_features","IWSS" }, { "threshold", 0.51 } },
|
||||||
|
{ { "select_features","FCBF" }, { "threshold", 1e-8 } },
|
||||||
|
{ { "select_features","FCBF" }, { "threshold", 1.01 } },
|
||||||
|
};
|
||||||
|
for (const auto& hyper : bad_hyper_fit.items()) {
|
||||||
|
INFO("BoostAODE hyper: " + hyper.value().dump());
|
||||||
|
clf.setHyperparameters(hyper.value());
|
||||||
|
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Bisection", "[BoostAODE]")
|
||||||
|
{
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
auto raw = RawDatasets("mfeat-factors", true);
|
||||||
|
clf.setHyperparameters({
|
||||||
|
{"bisection", true},
|
||||||
|
{"maxTolerance", 3},
|
||||||
|
{"convergence", true},
|
||||||
|
{"block_update", false},
|
||||||
|
});
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 431);
|
||||||
|
REQUIRE(clf.getNotes().size() == 3);
|
||||||
|
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||||
|
REQUIRE(clf.getNotes()[1] == "Used features in train: 16 of 216");
|
||||||
|
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
|
||||||
|
auto score = clf.score(raw.Xv, raw.yv);
|
||||||
|
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||||
|
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("Block Update", "[BoostAODE]")
|
||||||
|
{
|
||||||
|
auto clf = bayesnet::BoostAODE();
|
||||||
|
auto raw = RawDatasets("mfeat-factors", true);
|
||||||
|
clf.setHyperparameters({
|
||||||
|
{"bisection", true},
|
||||||
|
{"block_update", true},
|
||||||
|
{"maxTolerance", 3},
|
||||||
|
{"convergence", true},
|
||||||
|
});
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 431);
|
||||||
|
REQUIRE(clf.getNotes().size() == 3);
|
||||||
|
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||||
|
REQUIRE(clf.getNotes()[1] == "Used features in train: 16 of 216");
|
||||||
|
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
|
||||||
|
auto score = clf.score(raw.Xv, raw.yv);
|
||||||
|
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||||
|
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||||
|
}
|
@@ -1,6 +1,13 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include <catch2/catch_test_macros.hpp>
|
#include <catch2/catch_test_macros.hpp>
|
||||||
#include <catch2/catch_approx.hpp>
|
#include <catch2/catch_approx.hpp>
|
||||||
#include <catch2/generators/catch_generators.hpp>
|
#include <catch2/generators/catch_generators.hpp>
|
||||||
|
#include <catch2/matchers/catch_matchers.hpp>
|
||||||
#include "bayesnet/utils/BayesMetrics.h"
|
#include "bayesnet/utils/BayesMetrics.h"
|
||||||
#include "bayesnet/feature_selection/CFS.h"
|
#include "bayesnet/feature_selection/CFS.h"
|
||||||
#include "bayesnet/feature_selection/FCBF.h"
|
#include "bayesnet/feature_selection/FCBF.h"
|
||||||
@@ -69,3 +76,14 @@ TEST_CASE("Features Selected", "[FeatureSelection]")
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
TEST_CASE("Oddities", "[FeatureSelection]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
// FCBF Limits
|
||||||
|
REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
|
||||||
|
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
|
||||||
|
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
|
||||||
|
}
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#include "TestUtils.h"
|
#include "TestUtils.h"
|
||||||
#include "bayesnet/config.h"
|
#include "bayesnet/config.h"
|
||||||
|
|
||||||
|
@@ -1,3 +1,9 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
#ifndef TEST_UTILS_H
|
#ifndef TEST_UTILS_H
|
||||||
#define TEST_UTILS_H
|
#define TEST_UTILS_H
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
|
File diff suppressed because it is too large
Load Diff
35
update_coverage.py
Normal file
35
update_coverage.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# ***************************************************************
|
||||||
|
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
# SPDX-FileType: SOURCE
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
# ***************************************************************
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
readme_file = "README.md"
|
||||||
|
print("Updating coverage...")
|
||||||
|
# Generate badge line
|
||||||
|
output = subprocess.check_output(
|
||||||
|
"lcov --summary " + sys.argv[1] + "/coverage.info|cut -d' ' -f4 |head -2|"
|
||||||
|
"tail -1",
|
||||||
|
shell=True,
|
||||||
|
)
|
||||||
|
value = float(output.decode("utf-8").strip().replace("%", ""))
|
||||||
|
if value < 90:
|
||||||
|
print("⛔Coverage is less than 90%. I won't update the badge.")
|
||||||
|
sys.exit(1)
|
||||||
|
percentage = output.decode("utf-8").strip().replace(".", ",")
|
||||||
|
coverage_line = (
|
||||||
|
f""
|
||||||
|
)
|
||||||
|
# Update README.md
|
||||||
|
with open(readme_file, "r") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
with open(readme_file, "w") as f:
|
||||||
|
for line in lines:
|
||||||
|
if "Coverage" in line:
|
||||||
|
f.write(coverage_line + "\n")
|
||||||
|
else:
|
||||||
|
f.write(line)
|
||||||
|
print(f"✅Coverage updated with value: {percentage}")
|
Reference in New Issue
Block a user