Compare commits
47 Commits
Author | SHA1 | Date | |
---|---|---|---|
b956aa3873
|
|||
1f06631f69
|
|||
6dd589bd61
|
|||
6475f10825
|
|||
7d906b24d1 | |||
464fe029ea
|
|||
09a1369122
|
|||
503ad687dc
|
|||
8eeaa1beee
|
|||
a2de1c9522
|
|||
cf9b5716ac
|
|||
1326891d6a
|
|||
da2a969686
|
|||
f9553a38d7
|
|||
8b6121eaf2
|
|||
fbbed8ad68
|
|||
a1178554ff
|
|||
d12a779bd9 | |||
a8fc29e2b2
|
|||
50543e7929
|
|||
9014649a0d
|
|||
0d6a081d01
|
|||
46cb8d30eb
|
|||
cb26ef2562
|
|||
df45fddd45
|
|||
a1f9086780
|
|||
e55365c41c
|
|||
de23303801
|
|||
56b5158ff3
|
|||
a5a29eb66f
|
|||
d5eba5710a
|
|||
8c61840d81
|
|||
bc0b938cfc
|
|||
58d5a35a35
|
|||
45c048f635
|
|||
6e854dfda3
|
|||
5826702fc7
|
|||
42e2be3263
|
|||
827b0dd893
|
|||
882d905a28
|
|||
422129802a
|
|||
eb97a5a14b
|
|||
eb72f13bf0
|
|||
5db168d87b
|
|||
8f3bb47cfd
|
|||
1986d05c34
|
|||
7c98ba9bea
|
39
.clang-uml
Normal file
39
.clang-uml
Normal file
@@ -0,0 +1,39 @@
|
||||
compilation_database_dir: build_debug
|
||||
output_directory: diagrams
|
||||
diagrams:
|
||||
BayesNet:
|
||||
type: class
|
||||
glob:
|
||||
- bayesnet/*.h
|
||||
- bayesnet/classifiers/*.h
|
||||
- bayesnet/classifiers/*.cc
|
||||
- bayesnet/ensembles/*.h
|
||||
- bayesnet/ensembles/*.cc
|
||||
- bayesnet/feature_selection/*.h
|
||||
- bayesnet/feature_selection/*.cc
|
||||
- bayesnet/network/*.h
|
||||
- bayesnet/network/*.cc
|
||||
- bayesnet/utils/*.h
|
||||
- bayesnet/utils/*.cc
|
||||
include:
|
||||
# Only include entities from the following namespaces
|
||||
namespaces:
|
||||
- bayesnet
|
||||
exclude:
|
||||
access:
|
||||
- private
|
||||
plantuml:
|
||||
style:
|
||||
# Apply this style to all classes in the diagram
|
||||
class: "#aliceblue;line:blue;line.dotted;text:blue"
|
||||
# Apply this style to all packages in the diagram
|
||||
package: "#back:grey"
|
||||
# Make all template instantiation relations point upwards and draw them
|
||||
# as green and dotted lines
|
||||
instantiation: "up[#green,dotted]"
|
||||
cmd: "/usr/bin/plantuml -tsvg \"diagrams/{}.puml\""
|
||||
before:
|
||||
- 'title clang-uml class diagram model'
|
||||
mermaid:
|
||||
before:
|
||||
- 'classDiagram'
|
12
.github/workflows/main.yml
vendored
Normal file
12
.github/workflows/main.yml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
name: CI
|
||||
on: push
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- run: sudo apt-get install ninja-build cmake
|
||||
- run: ninja --version
|
||||
- run: cmake --version
|
||||
- run: g++ --version
|
38
.vscode/c_cpp_properties.json
vendored
38
.vscode/c_cpp_properties.json
vendored
@@ -3,15 +3,47 @@
|
||||
{
|
||||
"name": "Mac",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**"
|
||||
"/Users/rmontanana/Code/BayesNet/**"
|
||||
],
|
||||
"defines": [],
|
||||
"macFrameworkPath": [
|
||||
"/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks"
|
||||
"/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include"
|
||||
],
|
||||
"cStandard": "c17",
|
||||
"cppStandard": "c++17",
|
||||
"compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json"
|
||||
"compileCommands": "",
|
||||
"intelliSenseMode": "macos-clang-arm64",
|
||||
"mergeConfigurations": false,
|
||||
"browse": {
|
||||
"path": [
|
||||
"/Users/rmontanana/Code/BayesNet/**",
|
||||
"${workspaceFolder}"
|
||||
],
|
||||
"limitSymbolsToIncludedHeaders": true
|
||||
},
|
||||
"configurationProvider": "ms-vscode.cmake-tools"
|
||||
},
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"/home/rmontanana/Code/BayesNet/**",
|
||||
"/home/rmontanana/Code/libtorch/include/torch/csrc/api/include/",
|
||||
"/home/rmontanana/Code/BayesNet/lib/"
|
||||
],
|
||||
"defines": [],
|
||||
"cStandard": "c17",
|
||||
"cppStandard": "c++17",
|
||||
"intelliSenseMode": "linux-gcc-x64",
|
||||
"mergeConfigurations": false,
|
||||
"compilerPath": "/usr/bin/g++",
|
||||
"browse": {
|
||||
"path": [
|
||||
"/home/rmontanana/Code/BayesNet/**",
|
||||
"${workspaceFolder}"
|
||||
],
|
||||
"limitSymbolsToIncludedHeaders": true
|
||||
},
|
||||
"configurationProvider": "ms-vscode.cmake-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
|
33
.vscode/launch.json
vendored
33
.vscode/launch.json
vendored
@@ -8,18 +8,41 @@
|
||||
"program": "${workspaceFolder}/build_release/sample/bayesnet_sample",
|
||||
"args": [
|
||||
"${workspaceFolder}/tests/data/glass.arff"
|
||||
],
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "test",
|
||||
"program": "${workspaceFolder}/build_debug/tests/unit_tests_bayesnet",
|
||||
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet",
|
||||
"args": [
|
||||
//"-c=\"Metrics Test\"",
|
||||
// "-s",
|
||||
"Block Update"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build_debug/tests",
|
||||
"cwd": "${workspaceFolder}/build_debug/tests"
|
||||
},
|
||||
{
|
||||
"name": "(gdb) Launch",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "enter program name, for example ${workspaceFolder}/a.out",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${fileDirname}",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"MIMode": "gdb",
|
||||
"setupCommands": [
|
||||
{
|
||||
"description": "Enable pretty-printing for gdb",
|
||||
"text": "-enable-pretty-printing",
|
||||
"ignoreFailures": true
|
||||
},
|
||||
{
|
||||
"description": "Set Disassembly Flavor to Intel",
|
||||
"text": "-gdb-set disassembly-flavor intel",
|
||||
"ignoreFailures": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
16
CHANGELOG.md
16
CHANGELOG.md
@@ -5,22 +5,32 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [unreleased]
|
||||
## [1.0.5] 2024-04-20
|
||||
|
||||
### Added
|
||||
|
||||
- Install command and instructions in README.md
|
||||
- Prefix to install command to install the package in the any location.
|
||||
- The 'block_update' hyperparameter to the BoostAODE class, to control the way weights/significances are updated. Default value is false.
|
||||
- Html report of coverage in the coverage folder. It is created with *make viewcoverage*
|
||||
- Badges of coverage and code quality (codacy) in README.md. Coverage badge is updated with *make viewcoverage*
|
||||
- Tests to reach 97% of coverage.
|
||||
- Copyright header to source files.
|
||||
- Diagrams to README.md: UML class diagram & dependency diagram
|
||||
- Action to create diagrams to Makefile: *make diagrams*
|
||||
|
||||
### Changed
|
||||
|
||||
- Sample app now is a separate target in the Makefile and shows how to use the library with a sample dataset
|
||||
- The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy.
|
||||
- Default hyperparameter values in BoostAODE: bisection is true, maxTolerance is 3, convergence is true
|
||||
|
||||
## [1.0.4] 2024-03-06
|
||||
|
||||
### Added
|
||||
|
||||
- Change _ascending_ hyperparameter to _order_ with these possible values _{"asc", "desc", "rand"}_, Default is _"desc"_.
|
||||
- Add the _predict_single_ hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true.
|
||||
- Change *ascending* hyperparameter to *order* with these possible values *{"asc", "desc", "rand"}*, Default is *"desc"*.
|
||||
- Add the *predict_single* hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true.
|
||||
- sample app to show how to use the library (make sample)
|
||||
|
||||
### Changed
|
||||
|
5
CMakeGraphVizOptions.cmake
Normal file
5
CMakeGraphVizOptions.cmake
Normal file
@@ -0,0 +1,5 @@
|
||||
# Set the default graph title
|
||||
set(GRAPHVIZ_GRAPH_NAME "BayesNet dependency graph")
|
||||
|
||||
set(GRAPHVIZ_SHARED_LIBS OFF)
|
||||
set(GRAPHVIZ_STATIC_LIBS ON)
|
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(BayesNet
|
||||
VERSION 1.0.4
|
||||
VERSION 1.0.5
|
||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||
LANGUAGES CXX
|
||||
@@ -25,6 +25,8 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||
# Options
|
||||
# -------
|
||||
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
||||
@@ -48,7 +50,6 @@ if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE("Code coverage enabled")
|
||||
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
|
||||
|
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) <year> <copyright holders>
|
||||
Copyright (c) 2023 Ricardo Montañana Gómez
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
|
48
Makefile
48
Makefile
@@ -1,11 +1,15 @@
|
||||
SHELL := /bin/bash
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: coverage setup help buildr buildd test clean debug release sample
|
||||
.PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean debug release sample updatebadge
|
||||
|
||||
f_release = build_release
|
||||
f_debug = build_debug
|
||||
f_diagrams = diagrams
|
||||
app_targets = BayesNet
|
||||
test_targets = unit_tests_bayesnet
|
||||
test_targets = TestBayesNet
|
||||
clang-uml = clang-uml
|
||||
plantuml = plantuml
|
||||
dot = dot
|
||||
n_procs = -j 16
|
||||
|
||||
define ClearTests
|
||||
@@ -29,12 +33,23 @@ setup: ## Install dependencies for tests and coverage
|
||||
fi
|
||||
@if [ "$(shell uname)" = "Linux" ]; then \
|
||||
pip install gcovr; \
|
||||
sudo dnf install lcov;\
|
||||
fi
|
||||
@echo "* You should install plantuml & graphviz for the diagrams"
|
||||
|
||||
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
|
||||
diagrams: ## Create an UML class diagram & depnendency of the project (diagrams/BayesNet.png)
|
||||
@which $(plantuml) || (echo ">>> Please install plantuml"; exit 1)
|
||||
@which $(dot) || (echo ">>> Please install graphviz"; exit 1)
|
||||
@which $(clang-uml) || (echo ">>> Please install clang-uml"; exit 1)
|
||||
@export PLANTUML_LIMIT_SIZE=16384
|
||||
@echo ">>> Creating UML class diagram of the project...";
|
||||
@$(clang-uml) -p
|
||||
@cd $(f_diagrams); \
|
||||
$(plantuml) -tsvg BayesNet.puml
|
||||
@echo ">>> Creating dependency graph diagram of the project...";
|
||||
$(MAKE) debug
|
||||
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
|
||||
cd $(f_debug) && cmake .. --graphviz=dependency.dot
|
||||
@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
|
||||
|
||||
buildd: ## Build the debug targets
|
||||
cmake --build $(f_debug) -t $(app_targets) $(n_procs)
|
||||
@@ -52,9 +67,10 @@ uninstall: ## Uninstall library
|
||||
xargs rm < $(f_release)/install_manifest.txt
|
||||
@echo ">>> Done";
|
||||
|
||||
prefix = "/usr/local"
|
||||
install: ## Install library
|
||||
@echo ">>> Installing BayesNet...";
|
||||
@cmake --install $(f_release)
|
||||
@cmake --install $(f_release) --prefix $(prefix)
|
||||
@echo ">>> Done";
|
||||
|
||||
debug: ## Build a debug version of the project
|
||||
@@ -85,9 +101,11 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
|
||||
@$(MAKE) clean
|
||||
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
|
||||
@for t in $(test_targets); do \
|
||||
echo ">>> Running $$t...";\
|
||||
if [ -f $(f_debug)/tests/$$t ]; then \
|
||||
cd $(f_debug)/tests ; \
|
||||
./$$t $(opt) ; \
|
||||
cd ../.. ; \
|
||||
fi ; \
|
||||
done
|
||||
@echo ">>> Done";
|
||||
@@ -98,6 +116,26 @@ coverage: ## Run tests and generate coverage report (build/index.html)
|
||||
@gcovr $(f_debug)/tests
|
||||
@echo ">>> Done";
|
||||
|
||||
viewcoverage: ## Run tests, generate coverage report and upload it to codecov (build/index.html)
|
||||
@echo ">>> Building tests with coverage..."
|
||||
@$(MAKE) coverage
|
||||
@echo ">>> Building report..."
|
||||
@cd $(f_debug)/tests; \
|
||||
lcov --directory . --capture --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
genhtml coverage.info --output-directory coverage >/dev/null 2>&1;
|
||||
@$(MAKE) updatebadge
|
||||
@xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null
|
||||
@echo ">>> Done";
|
||||
|
||||
updatebadge: ## Update the coverage badge in README.md
|
||||
@echo ">>> Updating coverage badge..."
|
||||
@env python update_coverage.py $(f_debug)/tests
|
||||
@echo ">>> Done";
|
||||
|
||||
help: ## Show help message
|
||||
@IFS=$$'\n' ; \
|
||||
|
28
README.md
28
README.md
@@ -1,10 +1,24 @@
|
||||
# BayesNet
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||
[](<https://opensource.org/licenses/MIT>)
|
||||

|
||||
[](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
||||

|
||||

|
||||
|
||||
Bayesian Network Classifiers using libtorch from scratch
|
||||
|
||||
## Installation
|
||||
## Dependencies
|
||||
|
||||
The only external dependency is [libtorch](https://pytorch.org/cppdocs/installing.html) which can be installed with the following commands:
|
||||
|
||||
```bash
|
||||
wget https://download.pytorch.org/libtorch/nightly/cpu/libtorch-shared-with-deps-latest.zip
|
||||
unzip libtorch-shared-with-deps-latest.zips
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
### Release
|
||||
|
||||
@@ -34,3 +48,13 @@ make sample fname=tests/data/glass.arff
|
||||
## Models
|
||||
|
||||
### [BoostAODE](docs/BoostAODE.md)
|
||||
|
||||
## Diagrams
|
||||
|
||||
### UML Class Diagram
|
||||
|
||||

|
||||
|
||||
### Dependency Diagram
|
||||
|
||||

|
||||
|
@@ -1,5 +1,10 @@
|
||||
#ifndef BASE_H
|
||||
#define BASE_H
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
@@ -30,7 +35,7 @@ namespace bayesnet {
|
||||
virtual std::string getVersion() = 0;
|
||||
std::vector<std::string> virtual topological_order() = 0;
|
||||
std::vector<std::string> virtual getNotes() const = 0;
|
||||
void virtual dump_cpt()const = 0;
|
||||
std::string virtual dump_cpt()const = 0;
|
||||
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
||||
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
||||
protected:
|
||||
@@ -38,4 +43,3 @@ namespace bayesnet {
|
||||
std::vector<std::string> validHyperparameters;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,3 +1,10 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <sstream>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "Classifier.h"
|
||||
|
||||
@@ -10,7 +17,7 @@ namespace bayesnet {
|
||||
this->className = className;
|
||||
this->states = states;
|
||||
m = dataset.size(1);
|
||||
n = dataset.size(0) - 1;
|
||||
n = features.size();
|
||||
checkFitParameters();
|
||||
auto n_classes = states.at(className).size();
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
@@ -27,10 +34,11 @@ namespace bayesnet {
|
||||
dataset = torch::cat({ dataset, yresized }, 0);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
std::cout << "X dimensions: " << dataset.sizes() << "\n";
|
||||
std::cout << "y dimensions: " << ytmp.sizes() << "\n";
|
||||
exit(1);
|
||||
std::stringstream oss;
|
||||
oss << "* Error in X and y dimensions *\n";
|
||||
oss << "X dimensions: " << dataset.sizes() << "\n";
|
||||
oss << "y dimensions: " << ytmp.sizes();
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
}
|
||||
void Classifier::trainModel(const torch::Tensor& weights)
|
||||
@@ -73,11 +81,11 @@ namespace bayesnet {
|
||||
if (torch::is_floating_point(dataset)) {
|
||||
throw std::invalid_argument("dataset (X, y) must be of type Integer");
|
||||
}
|
||||
if (n != features.size()) {
|
||||
throw std::invalid_argument("Classifier: X " + std::to_string(n) + " and features " + std::to_string(features.size()) + " must have the same number of features");
|
||||
if (dataset.size(0) - 1 != features.size()) {
|
||||
throw std::invalid_argument("Classifier: X " + std::to_string(dataset.size(0) - 1) + " and features " + std::to_string(features.size()) + " must have the same number of features");
|
||||
}
|
||||
if (states.find(className) == states.end()) {
|
||||
throw std::invalid_argument("className not found in states");
|
||||
throw std::invalid_argument("class name not found in states");
|
||||
}
|
||||
for (auto feature : features) {
|
||||
if (states.find(feature) == states.end()) {
|
||||
@@ -173,12 +181,14 @@ namespace bayesnet {
|
||||
{
|
||||
return model.topological_sort();
|
||||
}
|
||||
void Classifier::dump_cpt() const
|
||||
std::string Classifier::dump_cpt() const
|
||||
{
|
||||
model.dump_cpt();
|
||||
return model.dump_cpt();
|
||||
}
|
||||
void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||
{
|
||||
//For classifiers that don't have hyperparameters
|
||||
if (!hyperparameters.empty()) {
|
||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CLASSIFIER_H
|
||||
#define CLASSIFIER_H
|
||||
#include <torch/torch.h>
|
||||
@@ -30,7 +36,7 @@ namespace bayesnet {
|
||||
std::vector<std::string> show() const override;
|
||||
std::vector<std::string> topological_order() override;
|
||||
std::vector<std::string> getNotes() const override { return notes; }
|
||||
void dump_cpt() const override;
|
||||
std::string dump_cpt() const override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
|
||||
protected:
|
||||
bool fitted;
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "KDB.h"
|
||||
|
||||
namespace bayesnet {
|
||||
@@ -6,14 +12,18 @@ namespace bayesnet {
|
||||
validHyperparameters = { "k", "theta" };
|
||||
|
||||
}
|
||||
void KDB::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||
void KDB::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("k")) {
|
||||
k = hyperparameters["k"];
|
||||
hyperparameters.erase("k");
|
||||
}
|
||||
if (hyperparameters.contains("theta")) {
|
||||
theta = hyperparameters["theta"];
|
||||
hyperparameters.erase("theta");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void KDB::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef KDB_H
|
||||
#define KDB_H
|
||||
#include <torch/torch.h>
|
||||
@@ -14,7 +20,7 @@ namespace bayesnet {
|
||||
public:
|
||||
explicit KDB(int k, float theta = 0.03);
|
||||
virtual ~KDB() = default;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||
};
|
||||
}
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "KDBLd.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef KDBLD_H
|
||||
#define KDBLD_H
|
||||
#include "Proposal.h"
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.h>
|
||||
#include "Proposal.h"
|
||||
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef PROPOSAL_H
|
||||
#define PROPOSAL_H
|
||||
#include <string>
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "SPODE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef SPODE_H
|
||||
#define SPODE_H
|
||||
#include "Classifier.h"
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "SPODELd.h"
|
||||
|
||||
namespace bayesnet {
|
||||
@@ -5,25 +11,23 @@ namespace bayesnet {
|
||||
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
className = className_;
|
||||
Xf = X_;
|
||||
y = y_;
|
||||
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
|
||||
SPODE::fit(dataset, features, className, states);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
return commonFit(features_, className_, states_);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
{
|
||||
if (!torch::is_floating_point(dataset)) {
|
||||
throw std::runtime_error("Dataset must be a floating point tensor");
|
||||
}
|
||||
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
|
||||
y = dataset.index({ -1, "..." }).clone();
|
||||
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
|
||||
return commonFit(features_, className_, states_);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
{
|
||||
features = features_;
|
||||
className = className_;
|
||||
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
|
||||
@@ -34,7 +38,6 @@ namespace bayesnet {
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
}
|
||||
|
||||
torch::Tensor SPODELd::predict(torch::Tensor& X)
|
||||
{
|
||||
auto Xt = prepareX(X);
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef SPODELD_H
|
||||
#define SPODELD_H
|
||||
#include "SPODE.h"
|
||||
@@ -10,6 +16,7 @@ namespace bayesnet {
|
||||
virtual ~SPODELd() = default;
|
||||
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states);
|
||||
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "TAN.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TAN_H
|
||||
#define TAN_H
|
||||
#include "Classifier.h"
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "TANLd.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TANLD_H
|
||||
#define TANLD_H
|
||||
#include "TAN.h"
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "AODE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
@@ -13,9 +19,7 @@ namespace bayesnet {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
if (!hyperparameters.empty()) {
|
||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void AODE::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef AODE_H
|
||||
#define AODE_H
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
@@ -5,7 +11,7 @@
|
||||
namespace bayesnet {
|
||||
class AODE : public Ensemble {
|
||||
public:
|
||||
AODE(bool predict_voting = true);
|
||||
AODE(bool predict_voting = false);
|
||||
virtual ~AODE() {};
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
std::vector<std::string> graph(const std::string& title = "AODE") const override;
|
||||
|
@@ -1,21 +1,14 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "AODELd.h"
|
||||
|
||||
namespace bayesnet {
|
||||
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
|
||||
{
|
||||
validHyperparameters = { "predict_voting" };
|
||||
|
||||
}
|
||||
void AODELd::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
if (!hyperparameters.empty()) {
|
||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||
}
|
||||
}
|
||||
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
{
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef AODELD_H
|
||||
#define AODELD_H
|
||||
#include "bayesnet/classifiers/Proposal.h"
|
||||
@@ -10,7 +16,6 @@ namespace bayesnet {
|
||||
AODELd(bool predict_voting = true);
|
||||
virtual ~AODELd() = default;
|
||||
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <set>
|
||||
#include <functional>
|
||||
#include <limits.h>
|
||||
@@ -9,21 +15,12 @@
|
||||
#include "BoostAODE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
struct {
|
||||
std::string CFS = "CFS";
|
||||
std::string FCBF = "FCBF";
|
||||
std::string IWSS = "IWSS";
|
||||
}SelectFeatures;
|
||||
struct {
|
||||
std::string ASC = "asc";
|
||||
std::string DESC = "desc";
|
||||
std::string RAND = "rand";
|
||||
}Orders;
|
||||
|
||||
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
|
||||
{
|
||||
validHyperparameters = {
|
||||
"repeatSparent", "maxModels", "order", "convergence", "threshold",
|
||||
"select_features", "tolerance", "predict_voting", "predict_single"
|
||||
"maxModels", "bisection", "order", "convergence", "threshold",
|
||||
"select_features", "maxTolerance", "predict_voting", "block_update"
|
||||
};
|
||||
|
||||
}
|
||||
@@ -38,8 +35,6 @@ namespace bayesnet {
|
||||
if (convergence) {
|
||||
// Prepare train & validation sets from train data
|
||||
auto fold = folding::StratifiedKFold(5, y_, 271);
|
||||
dataset_ = torch::clone(dataset);
|
||||
// save input dataset
|
||||
auto [train, test] = fold.getFold(0);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
@@ -51,9 +46,9 @@ namespace bayesnet {
|
||||
dataset = X_train;
|
||||
m = X_train.size(1);
|
||||
auto n_classes = states.at(className).size();
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
// Build dataset with train data
|
||||
buildDataset(y_train);
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
} else {
|
||||
// Use all data to train
|
||||
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
||||
@@ -63,14 +58,6 @@ namespace bayesnet {
|
||||
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("repeatSparent")) {
|
||||
repeatSparent = hyperparameters["repeatSparent"];
|
||||
hyperparameters.erase("repeatSparent");
|
||||
}
|
||||
if (hyperparameters.contains("maxModels")) {
|
||||
maxModels = hyperparameters["maxModels"];
|
||||
hyperparameters.erase("maxModels");
|
||||
}
|
||||
if (hyperparameters.contains("order")) {
|
||||
std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
|
||||
order_algorithm = hyperparameters["order"];
|
||||
@@ -83,17 +70,19 @@ namespace bayesnet {
|
||||
convergence = hyperparameters["convergence"];
|
||||
hyperparameters.erase("convergence");
|
||||
}
|
||||
if (hyperparameters.contains("predict_single")) {
|
||||
predict_single = hyperparameters["predict_single"];
|
||||
hyperparameters.erase("predict_single");
|
||||
if (hyperparameters.contains("bisection")) {
|
||||
bisection = hyperparameters["bisection"];
|
||||
hyperparameters.erase("bisection");
|
||||
}
|
||||
if (hyperparameters.contains("threshold")) {
|
||||
threshold = hyperparameters["threshold"];
|
||||
hyperparameters.erase("threshold");
|
||||
}
|
||||
if (hyperparameters.contains("tolerance")) {
|
||||
tolerance = hyperparameters["tolerance"];
|
||||
hyperparameters.erase("tolerance");
|
||||
if (hyperparameters.contains("maxTolerance")) {
|
||||
maxTolerance = hyperparameters["maxTolerance"];
|
||||
if (maxTolerance < 1 || maxTolerance > 4)
|
||||
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]");
|
||||
hyperparameters.erase("maxTolerance");
|
||||
}
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
@@ -109,9 +98,11 @@ namespace bayesnet {
|
||||
}
|
||||
hyperparameters.erase("select_features");
|
||||
}
|
||||
if (!hyperparameters.empty()) {
|
||||
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
|
||||
if (hyperparameters.contains("block_update")) {
|
||||
block_update = hyperparameters["block_update"];
|
||||
hyperparameters.erase("block_update");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
|
||||
{
|
||||
@@ -140,9 +131,105 @@ namespace bayesnet {
|
||||
}
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::unordered_set<int> BoostAODE::initializeModels()
|
||||
std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
|
||||
{
|
||||
std::unordered_set<int> featuresUsed;
|
||||
/* Update Block algorithm
|
||||
k = # of models in block
|
||||
n_models = # of models in ensemble to make predictions
|
||||
n_models_bak = # models saved
|
||||
models = vector of models to make predictions
|
||||
models_bak = models not used to make predictions
|
||||
significances_bak = backup of significances vector
|
||||
|
||||
Case list
|
||||
A) k = 1, n_models = 1 => n = 0 , n_models = n + k
|
||||
B) k = 1, n_models = n + 1 => n_models = n + k
|
||||
C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
|
||||
D) k > 1, n_models = k => n = 0, n_models = n + k
|
||||
E) k > 1, n_models = k + n => n_models = n + k
|
||||
|
||||
A, D) n=0, k > 0, n_models == k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Don’t move any classifiers out of models
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Don’t restore any classifiers to models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
|
||||
B, C, E) n > 0, k > 0, n_models == n + k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Move first n classifiers to models_bak
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Insert classifiers in models_bak to be the first n models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
*/
|
||||
//
|
||||
// Make predict with only the last k models
|
||||
//
|
||||
std::unique_ptr<Classifier> model;
|
||||
std::vector<std::unique_ptr<Classifier>> models_bak;
|
||||
// 1. n_models_bak <- n_models 2. significances_bak <- significances
|
||||
auto significance_bak = significanceModels;
|
||||
auto n_models_bak = n_models;
|
||||
// 3. significances = vector(k, 1)
|
||||
significanceModels = std::vector<double>(k, 1.0);
|
||||
// 4. Move first n classifiers to models_bak
|
||||
// backup the first n_models - k models (if n_models == k, don't backup any)
|
||||
for (int i = 0; i < n_models - k; ++i) {
|
||||
model = std::move(models[0]);
|
||||
models.erase(models.begin());
|
||||
models_bak.push_back(std::move(model));
|
||||
}
|
||||
assert(models.size() == k);
|
||||
// 5. n_models <- k
|
||||
n_models = k;
|
||||
// 6. Make prediction, compute alpha, update weights
|
||||
auto ypred = predict(X_train);
|
||||
//
|
||||
// Update weights
|
||||
//
|
||||
double alpha_t;
|
||||
bool terminate;
|
||||
std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
|
||||
//
|
||||
// Restore the models if needed
|
||||
//
|
||||
// 7. Insert classifiers in models_bak to be the first n models
|
||||
// if n_models_bak == k, don't restore any, because none of them were moved
|
||||
if (k != n_models_bak) {
|
||||
// Insert in the same order as they were extracted
|
||||
int bak_size = models_bak.size();
|
||||
for (int i = 0; i < bak_size; ++i) {
|
||||
model = std::move(models_bak[bak_size - 1 - i]);
|
||||
models_bak.erase(models_bak.end() - 1);
|
||||
models.insert(models.begin(), std::move(model));
|
||||
}
|
||||
}
|
||||
// 8. significances <- significances_bak
|
||||
significanceModels = significance_bak;
|
||||
//
|
||||
// Update the significance of the last k models
|
||||
//
|
||||
// 9. Update last k significances
|
||||
for (int i = 0; i < k; ++i) {
|
||||
significanceModels[n_models_bak - k + i] = alpha_t;
|
||||
}
|
||||
// 10. n_models <- n_models_bak
|
||||
n_models = n_models_bak;
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::vector<int> BoostAODE::initializeModels()
|
||||
{
|
||||
std::vector<int> featuresUsed;
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
int maxFeatures = 0;
|
||||
if (select_features_algorithm == SelectFeatures.CFS) {
|
||||
@@ -160,134 +247,124 @@ namespace bayesnet {
|
||||
}
|
||||
featureSelector->fit();
|
||||
auto cfsFeatures = featureSelector->getFeatures();
|
||||
auto scores = featureSelector->getScores();
|
||||
for (const int& feature : cfsFeatures) {
|
||||
featuresUsed.insert(feature);
|
||||
featuresUsed.push_back(feature);
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0);
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
n_models++;
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
delete featureSelector;
|
||||
return featuresUsed;
|
||||
}
|
||||
torch::Tensor BoostAODE::ensemble_predict(torch::Tensor& X, SPODE* model)
|
||||
{
|
||||
if (initialize_prob_table) {
|
||||
initialize_prob_table = false;
|
||||
prob_table = model->predict_proba(X) * 1.0;
|
||||
} else {
|
||||
prob_table += model->predict_proba(X) * 1.0;
|
||||
}
|
||||
// prob_table doesn't store probabilities but the sum of them
|
||||
// to have them we need to divide by the sum of the "weights" used to
|
||||
// consider the results obtanined in the model's predict_proba.
|
||||
return prob_table.argmax(1);
|
||||
}
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||
{
|
||||
// Algorithm based on the adaboost algorithm for classification
|
||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||
initialize_prob_table = true;
|
||||
fitted = true;
|
||||
double alpha_t = 0;
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
bool exitCondition = false;
|
||||
std::unordered_set<int> featuresUsed;
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels();
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
significanceModels[i] = alpha_t;
|
||||
}
|
||||
if (exitCondition) {
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
bool resetMaxModels = false;
|
||||
if (maxModels == 0) {
|
||||
maxModels = .1 * n > 10 ? .1 * n : n;
|
||||
resetMaxModels = true; // Flag to unset maxModels
|
||||
}
|
||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||
// Variables to control the accuracy finish condition
|
||||
double priorAccuracy = 0.0;
|
||||
double delta = 1.0;
|
||||
double improvement = 1.0;
|
||||
double convergence_threshold = 1e-4;
|
||||
int count = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
// Step 0: Set the finish condition
|
||||
// if not repeatSparent a finish condition is run out of features
|
||||
// n_models == maxModels
|
||||
// epsilon sub t > 0.5 => inverse the weights policy
|
||||
// validation error is not decreasing
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == Orders.ASC;
|
||||
std::mt19937 g{ 173 };
|
||||
while (!exitCondition) {
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||
if (order_algorithm == Orders.RAND) {
|
||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||
}
|
||||
// Remove used features
|
||||
featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x)
|
||||
{ return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
|
||||
end(featureSelection)
|
||||
);
|
||||
int k = pow(2, tolerance);
|
||||
int counter = 0; // The model counter of the current pack
|
||||
while (counter++ < k && featureSelection.size() > 0) {
|
||||
auto feature = featureSelection[0];
|
||||
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
|
||||
bool used = true;
|
||||
for (const auto& feat : featureSelection) {
|
||||
if (std::find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
|
||||
continue;
|
||||
}
|
||||
used = false;
|
||||
feature = feat;
|
||||
break;
|
||||
}
|
||||
if (used) {
|
||||
exitCondition = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
torch::Tensor ypred;
|
||||
if (predict_single) {
|
||||
ypred = model->predict(X_train);
|
||||
} else {
|
||||
ypred = ensemble_predict(X_train, dynamic_cast<SPODE*>(model.get()));
|
||||
}
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, exitCondition) = update_weights(y_train, ypred, weights_);
|
||||
if (exitCondition) {
|
||||
break;
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
}
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
featuresUsed.insert(feature);
|
||||
numItemsPack++;
|
||||
featuresUsed.push_back(feature);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
if (convergence) {
|
||||
}
|
||||
if (block_update) {
|
||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||
}
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
} else {
|
||||
delta = accuracy - priorAccuracy;
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (delta < convergence_threshold) {
|
||||
count++;
|
||||
if (improvement < convergence_threshold) {
|
||||
tolerance++;
|
||||
} else {
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
priorAccuracy = accuracy;
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
// priorAccuracy = accuracy;
|
||||
}
|
||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||
}
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
for (int i = 0; i < numItemsPack; ++i) {
|
||||
significanceModels.pop_back();
|
||||
models.pop_back();
|
||||
n_models--;
|
||||
}
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
}
|
||||
exitCondition = n_models >= maxModels && repeatSparent || count > tolerance;
|
||||
}
|
||||
if (featuresUsed.size() != features.size()) {
|
||||
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
|
||||
status = WARNING;
|
||||
}
|
||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||
if (resetMaxModels) {
|
||||
maxModels = 0;
|
||||
}
|
||||
}
|
||||
std::vector<std::string> BoostAODE::graph(const std::string& title) const
|
||||
{
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BOOSTAODE_H
|
||||
#define BOOSTAODE_H
|
||||
#include <map>
|
||||
@@ -5,33 +11,39 @@
|
||||
#include "bayesnet/feature_selection/FeatureSelect.h"
|
||||
#include "Ensemble.h"
|
||||
namespace bayesnet {
|
||||
struct {
|
||||
std::string CFS = "CFS";
|
||||
std::string FCBF = "FCBF";
|
||||
std::string IWSS = "IWSS";
|
||||
}SelectFeatures;
|
||||
struct {
|
||||
std::string ASC = "asc";
|
||||
std::string DESC = "desc";
|
||||
std::string RAND = "rand";
|
||||
}Orders;
|
||||
class BoostAODE : public Ensemble {
|
||||
public:
|
||||
BoostAODE(bool predict_voting = true);
|
||||
BoostAODE(bool predict_voting = false);
|
||||
virtual ~BoostAODE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
private:
|
||||
std::unordered_set<int> initializeModels();
|
||||
torch::Tensor ensemble_predict(torch::Tensor& X, SPODE* model);
|
||||
torch::Tensor dataset_;
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
|
||||
std::vector<int> initializeModels();
|
||||
torch::Tensor X_train, y_train, X_test, y_test;
|
||||
// Hyperparameters
|
||||
bool repeatSparent = false; // if true, a feature can be selected more than once
|
||||
int maxModels = 0;
|
||||
int tolerance = 0;
|
||||
bool predict_single = true; // wether the last model is used to predict in training or the whole ensemble
|
||||
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
||||
int maxTolerance = 3;
|
||||
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
||||
bool convergence = false; //if true, stop when the model does not improve
|
||||
bool convergence = true; //if true, stop when the model does not improve
|
||||
bool selectFeatures = false; // if true, use feature selection
|
||||
std::string select_features_algorithm = "desc"; // Selected feature selection algorithm
|
||||
bool initialize_prob_table; // if true, initialize the prob_table with the first model (used in train)
|
||||
torch::Tensor prob_table; // Table of probabilities for ensemble predicting if predict_single is false
|
||||
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
||||
FeatureSelect* featureSelector = nullptr;
|
||||
double threshold = -1;
|
||||
bool block_update = false;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "Ensemble.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef ENSEMBLE_H
|
||||
#define ENSEMBLE_H
|
||||
#include <torch/torch.h>
|
||||
@@ -25,8 +31,9 @@ namespace bayesnet {
|
||||
{
|
||||
return std::vector<std::string>();
|
||||
}
|
||||
void dump_cpt() const override
|
||||
std::string dump_cpt() const override
|
||||
{
|
||||
return "";
|
||||
}
|
||||
protected:
|
||||
torch::Tensor predict_average_voting(torch::Tensor& X);
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <limits>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "CFS.h"
|
||||
@@ -11,7 +17,7 @@ namespace bayesnet {
|
||||
auto feature = featureOrder[0];
|
||||
selectedFeatures.push_back(feature);
|
||||
selectedScores.push_back(suLabels[feature]);
|
||||
selectedFeatures.erase(selectedFeatures.begin());
|
||||
featureOrder.erase(featureOrder.begin());
|
||||
while (continueCondition) {
|
||||
double merit = std::numeric_limits<double>::lowest();
|
||||
int bestFeature = -1;
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CFS_H
|
||||
#define CFS_H
|
||||
#include <torch/torch.h>
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "FCBF.h"
|
||||
namespace bayesnet {
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef FCBF_H
|
||||
#define FCBF_H
|
||||
#include <torch/torch.h>
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <limits>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "FeatureSelect.h"
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef FEATURE_SELECT_H
|
||||
#define FEATURE_SELECT_H
|
||||
#include <torch/torch.h>
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <limits>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "IWSS.h"
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef IWSS_H
|
||||
#define IWSS_H
|
||||
#include <vector>
|
||||
|
@@ -1,27 +1,41 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include "Network.h"
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
namespace bayesnet {
|
||||
Network::Network() : features(std::vector<std::string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
|
||||
Network::Network(float maxT) : features(std::vector<std::string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
|
||||
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
|
||||
getmaxThreads()), fitted(other.fitted)
|
||||
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
{
|
||||
}
|
||||
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
{
|
||||
|
||||
}
|
||||
Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
|
||||
maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples)
|
||||
{
|
||||
if (samples.defined())
|
||||
samples = samples.clone();
|
||||
for (const auto& node : other.nodes) {
|
||||
nodes[node.first] = std::make_unique<Node>(*node.second);
|
||||
}
|
||||
}
|
||||
void Network::initialize()
|
||||
{
|
||||
features = std::vector<std::string>();
|
||||
features.clear();
|
||||
className = "";
|
||||
classNumStates = 0;
|
||||
fitted = false;
|
||||
nodes.clear();
|
||||
samples = torch::Tensor();
|
||||
}
|
||||
float Network::getmaxThreads()
|
||||
float Network::getMaxThreads() const
|
||||
{
|
||||
return maxThreads;
|
||||
}
|
||||
@@ -114,11 +128,14 @@ namespace bayesnet {
|
||||
if (n_features != featureNames.size()) {
|
||||
throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")");
|
||||
}
|
||||
if (features.size() == 0) {
|
||||
throw std::invalid_argument("The network has not been initialized. You must call addNode() before calling fit()");
|
||||
}
|
||||
if (n_features != features.size() - 1) {
|
||||
throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
if (find(features.begin(), features.end(), className) == features.end()) {
|
||||
throw std::invalid_argument("className not found in Network::features");
|
||||
throw std::invalid_argument("Class Name not found in Network::features");
|
||||
}
|
||||
for (auto& feature : featureNames) {
|
||||
if (find(features.begin(), features.end(), feature) == features.end()) {
|
||||
@@ -404,11 +421,13 @@ namespace bayesnet {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
void Network::dump_cpt() const
|
||||
std::string Network::dump_cpt() const
|
||||
{
|
||||
std::stringstream oss;
|
||||
for (auto& node : nodes) {
|
||||
std::cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
|
||||
std::cout << node.second->getCPT() << std::endl;
|
||||
oss << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
|
||||
oss << node.second->getCPT() << std::endl;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef NETWORK_H
|
||||
#define NETWORK_H
|
||||
#include <map>
|
||||
@@ -10,10 +16,10 @@ namespace bayesnet {
|
||||
public:
|
||||
Network();
|
||||
explicit Network(float);
|
||||
explicit Network(Network&);
|
||||
explicit Network(const Network&);
|
||||
~Network() = default;
|
||||
torch::Tensor& getSamples();
|
||||
float getmaxThreads();
|
||||
float getMaxThreads() const;
|
||||
void addNode(const std::string&);
|
||||
void addEdge(const std::string&, const std::string&);
|
||||
std::map<std::string, std::unique_ptr<Node>>& getNodes();
|
||||
@@ -39,7 +45,7 @@ namespace bayesnet {
|
||||
std::vector<std::string> show() const;
|
||||
std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
|
||||
void initialize();
|
||||
void dump_cpt() const;
|
||||
std::string dump_cpt() const;
|
||||
inline std::string version() { return { project_version.begin(), project_version.end() }; }
|
||||
private:
|
||||
std::map<std::string, std::unique_ptr<Node>> nodes;
|
||||
@@ -49,7 +55,7 @@ namespace bayesnet {
|
||||
std::vector<std::string> features; // Including classname
|
||||
std::string className;
|
||||
double laplaceSmoothing;
|
||||
torch::Tensor samples; // nxm tensor used to fit the model
|
||||
torch::Tensor samples; // n+1xm tensor used to fit the model
|
||||
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
|
||||
std::vector<double> predict_sample(const std::vector<int>&);
|
||||
std::vector<double> predict_sample(const torch::Tensor&);
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "Node.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef NODE_H
|
||||
#define NODE_H
|
||||
#include <unordered_set>
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "Mst.h"
|
||||
#include "BayesMetrics.h"
|
||||
namespace bayesnet {
|
||||
@@ -9,12 +15,12 @@ namespace bayesnet {
|
||||
, classNumStates(classNumStates)
|
||||
{
|
||||
}
|
||||
//samples is nxm std::vector used to fit the model
|
||||
//samples is n+1xm std::vector used to fit the model
|
||||
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
|
||||
: features(features)
|
||||
, className(className)
|
||||
, classNumStates(classNumStates)
|
||||
, samples(torch::zeros({ static_cast<int>(vsamples[0].size()), static_cast<int>(vsamples.size() + 1) }, torch::kInt32))
|
||||
, samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
|
||||
{
|
||||
for (int i = 0; i < vsamples.size(); ++i) {
|
||||
samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32));
|
||||
@@ -24,7 +30,7 @@ namespace bayesnet {
|
||||
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
|
||||
{
|
||||
// Return the K Best features
|
||||
auto n = samples.size(0) - 1;
|
||||
auto n = features.size();
|
||||
if (k == 0) {
|
||||
k = n;
|
||||
}
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BAYESNET_METRICS_H
|
||||
#define BAYESNET_METRICS_H
|
||||
#include <vector>
|
||||
@@ -5,11 +11,16 @@
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
class Metrics {
|
||||
private:
|
||||
int classNumStates = 0;
|
||||
std::vector<double> scoresKBest;
|
||||
std::vector<int> featuresKBest; // sorted indices of the features
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
public:
|
||||
Metrics() = default;
|
||||
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
|
||||
std::vector<double> getScoresKBest() const;
|
||||
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
|
||||
torch::Tensor conditionalEdge(const torch::Tensor& weights);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
protected:
|
||||
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
|
||||
std::string className;
|
||||
@@ -34,16 +45,11 @@ namespace bayesnet {
|
||||
v.erase(v.begin());
|
||||
return temp;
|
||||
}
|
||||
public:
|
||||
Metrics() = default;
|
||||
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
|
||||
std::vector<double> getScoresKBest() const;
|
||||
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
|
||||
torch::Tensor conditionalEdge(const torch::Tensor& weights);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
private:
|
||||
int classNumStates = 0;
|
||||
std::vector<double> scoresKBest;
|
||||
std::vector<int> featuresKBest; // sorted indices of the features
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,3 +1,10 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include "Mst.h"
|
||||
@@ -45,15 +52,6 @@ namespace bayesnet {
|
||||
}
|
||||
}
|
||||
}
|
||||
void Graph::display_mst()
|
||||
{
|
||||
std::cout << "Edge :" << " Weight" << std::endl;
|
||||
for (int i = 0; i < T.size(); i++) {
|
||||
std::cout << T[i].second.first << " - " << T[i].second.second << " : "
|
||||
<< T[i].first;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void insertElement(std::list<int>& variables, int variable)
|
||||
{
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef MST_H
|
||||
#define MST_H
|
||||
#include <vector>
|
||||
@@ -5,29 +11,28 @@
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
class MST {
|
||||
private:
|
||||
torch::Tensor weights;
|
||||
std::vector<std::string> features;
|
||||
int root = 0;
|
||||
public:
|
||||
MST() = default;
|
||||
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree();
|
||||
private:
|
||||
torch::Tensor weights;
|
||||
std::vector<std::string> features;
|
||||
int root = 0;
|
||||
};
|
||||
class Graph {
|
||||
private:
|
||||
int V; // number of nodes in graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
|
||||
std::vector<int> parent;
|
||||
public:
|
||||
explicit Graph(int V);
|
||||
void addEdge(int u, int v, float wt);
|
||||
int find_set(int i);
|
||||
void union_set(int u, int v);
|
||||
void kruskal_algorithm();
|
||||
void display_mst();
|
||||
std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; }
|
||||
private:
|
||||
int V; // number of nodes in graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
|
||||
std::vector<int> parent;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
|
||||
#include "bayesnetUtils.h"
|
||||
namespace bayesnet {
|
||||
@@ -10,18 +16,6 @@ namespace bayesnet {
|
||||
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
|
||||
return indices;
|
||||
}
|
||||
std::vector<std::vector<int>> tensorToVector(torch::Tensor& dtensor)
|
||||
{
|
||||
// convert mxn tensor to nxm std::vector
|
||||
std::vector<std::vector<int>> result;
|
||||
// Iterate over cols
|
||||
for (int i = 0; i < dtensor.size(1); ++i) {
|
||||
auto col_tensor = dtensor.index({ "...", i });
|
||||
auto col = std::vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + dtensor.size(0));
|
||||
result.push_back(col);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor)
|
||||
{
|
||||
// convert mxn tensor to mxn std::vector
|
||||
|
@@ -1,10 +1,15 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BAYESNET_UTILS_H
|
||||
#define BAYESNET_UTILS_H
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
std::vector<int> argsort(std::vector<double>& nums);
|
||||
std::vector<std::vector<int>> tensorToVector(torch::Tensor& dtensor);
|
||||
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor);
|
||||
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector, bool transpose = true);
|
||||
}
|
||||
|
Binary file not shown.
412
diagrams/BayesNet.puml
Normal file
412
diagrams/BayesNet.puml
Normal file
@@ -0,0 +1,412 @@
|
||||
@startuml
|
||||
title clang-uml class diagram model
|
||||
class "bayesnet::Metrics" as C_0000736965376885623323
|
||||
class C_0000736965376885623323 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Metrics() = default : void
|
||||
+Metrics(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
+Metrics(const std::vector<std::vector<int>> & vsamples, const std::vector<int> & labels, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
..
|
||||
+SelectKBestWeighted(const torch::Tensor & weights, bool ascending = false, unsigned int k = 0) : std::vector<int>
|
||||
+conditionalEdge(const torch::Tensor & weights) : torch::Tensor
|
||||
+conditionalEdgeWeights(std::vector<float> & weights) : std::vector<float>
|
||||
#doCombinations<T>(const std::vector<T> & source) : std::vector<std::pair<T, T> >
|
||||
#entropy(const torch::Tensor & feature, const torch::Tensor & weights) : double
|
||||
+getScoresKBest() const : std::vector<double>
|
||||
+maximumSpanningTree(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : std::vector<std::pair<int,int>>
|
||||
+mutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & weights) : double
|
||||
#pop_first<T>(std::vector<T> & v) : T
|
||||
__
|
||||
#className : std::string
|
||||
#features : std::vector<std::string>
|
||||
#samples : torch::Tensor
|
||||
}
|
||||
class "bayesnet::Node" as C_0001303524929067080934
|
||||
class C_0001303524929067080934 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Node(const std::string &) : void
|
||||
..
|
||||
+addChild(Node *) : void
|
||||
+addParent(Node *) : void
|
||||
+clear() : void
|
||||
+computeCPT(const torch::Tensor & dataset, const std::vector<std::string> & features, const double laplaceSmoothing, const torch::Tensor & weights) : void
|
||||
+getCPT() : torch::Tensor &
|
||||
+getChildren() : std::vector<Node *> &
|
||||
+getFactorValue(std::map<std::string,int> &) : float
|
||||
+getName() const : std::string
|
||||
+getNumStates() const : int
|
||||
+getParents() : std::vector<Node *> &
|
||||
+graph(const std::string & clasName) : std::vector<std::string>
|
||||
+minFill() : unsigned int
|
||||
+removeChild(Node *) : void
|
||||
+removeParent(Node *) : void
|
||||
+setNumStates(int) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::Network" as C_0001186707649890429575
|
||||
class C_0001186707649890429575 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Network() : void
|
||||
+Network(float) : void
|
||||
+Network(const Network &) : void
|
||||
+~Network() = default : void
|
||||
..
|
||||
+addEdge(const std::string &, const std::string &) : void
|
||||
+addNode(const std::string &) : void
|
||||
+dump_cpt() const : std::string
|
||||
+fit(const torch::Tensor & samples, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||
+fit(const torch::Tensor & X, const torch::Tensor & y, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||
+fit(const std::vector<std::vector<int>> & input_data, const std::vector<int> & labels, const std::vector<double> & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||
+getClassName() const : std::string
|
||||
+getClassNumStates() const : int
|
||||
+getEdges() const : std::vector<std::pair<std::string,std::string>>
|
||||
+getFeatures() const : std::vector<std::string>
|
||||
+getMaxThreads() const : float
|
||||
+getNodes() : std::map<std::string,std::unique_ptr<Node>> &
|
||||
+getNumEdges() const : int
|
||||
+getSamples() : torch::Tensor &
|
||||
+getStates() const : int
|
||||
+graph(const std::string & title) const : std::vector<std::string>
|
||||
+initialize() : void
|
||||
+predict(const std::vector<std::vector<int>> &) : std::vector<int>
|
||||
+predict(const torch::Tensor &) : torch::Tensor
|
||||
+predict_proba(const std::vector<std::vector<int>> &) : std::vector<std::vector<double>>
|
||||
+predict_proba(const torch::Tensor &) : torch::Tensor
|
||||
+predict_tensor(const torch::Tensor & samples, const bool proba) : torch::Tensor
|
||||
+score(const std::vector<std::vector<int>> &, const std::vector<int> &) : double
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_sort() : std::vector<std::string>
|
||||
+version() : std::string
|
||||
__
|
||||
}
|
||||
enum "bayesnet::status_t" as C_0000738420730783851375
|
||||
enum C_0000738420730783851375 {
|
||||
NORMAL
|
||||
WARNING
|
||||
ERROR
|
||||
}
|
||||
abstract "bayesnet::BaseClassifier" as C_0000327135989451974539
|
||||
abstract C_0000327135989451974539 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+~BaseClassifier() = default : void
|
||||
..
|
||||
{abstract} +dump_cpt() const = 0 : std::string
|
||||
{abstract} +fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights) = 0 : BaseClassifier &
|
||||
{abstract} +fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||
{abstract} +getClassNumStates() const = 0 : int
|
||||
{abstract} +getNotes() const = 0 : std::vector<std::string>
|
||||
{abstract} +getNumberOfEdges() const = 0 : int
|
||||
{abstract} +getNumberOfNodes() const = 0 : int
|
||||
{abstract} +getNumberOfStates() const = 0 : int
|
||||
{abstract} +getStatus() const = 0 : status_t
|
||||
+getValidHyperparameters() : std::vector<std::string> &
|
||||
{abstract} +getVersion() = 0 : std::string
|
||||
{abstract} +graph(const std::string & title = "") const = 0 : std::vector<std::string>
|
||||
{abstract} +predict(std::vector<std::vector<int>> & X) = 0 : std::vector<int>
|
||||
{abstract} +predict(torch::Tensor & X) = 0 : torch::Tensor
|
||||
{abstract} +predict_proba(std::vector<std::vector<int>> & X) = 0 : std::vector<std::vector<double>>
|
||||
{abstract} +predict_proba(torch::Tensor & X) = 0 : torch::Tensor
|
||||
{abstract} +score(std::vector<std::vector<int>> & X, std::vector<int> & y) = 0 : float
|
||||
{abstract} +score(torch::Tensor & X, torch::Tensor & y) = 0 : float
|
||||
{abstract} +setHyperparameters(const nlohmann::json & hyperparameters) = 0 : void
|
||||
{abstract} +show() const = 0 : std::vector<std::string>
|
||||
{abstract} +topological_order() = 0 : std::vector<std::string>
|
||||
{abstract} #trainModel(const torch::Tensor & weights) = 0 : void
|
||||
__
|
||||
#validHyperparameters : std::vector<std::string>
|
||||
}
|
||||
abstract "bayesnet::Classifier" as C_0002043996622900301644
|
||||
abstract C_0002043996622900301644 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Classifier(Network model) : void
|
||||
+~Classifier() = default : void
|
||||
..
|
||||
+addNodes() : void
|
||||
#buildDataset(torch::Tensor & y) : void
|
||||
{abstract} #buildModel(const torch::Tensor & weights) = 0 : void
|
||||
#checkFitParameters() : void
|
||||
+dump_cpt() const : std::string
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||
+fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights) : Classifier &
|
||||
+getClassNumStates() const : int
|
||||
+getNotes() const : std::vector<std::string>
|
||||
+getNumberOfEdges() const : int
|
||||
+getNumberOfNodes() const : int
|
||||
+getNumberOfStates() const : int
|
||||
+getStatus() const : status_t
|
||||
+getVersion() : std::string
|
||||
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||
+predict_proba(torch::Tensor & X) : torch::Tensor
|
||||
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_order() : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
__
|
||||
#className : std::string
|
||||
#dataset : torch::Tensor
|
||||
#features : std::vector<std::string>
|
||||
#fitted : bool
|
||||
#m : unsigned int
|
||||
#metrics : Metrics
|
||||
#model : Network
|
||||
#n : unsigned int
|
||||
#notes : std::vector<std::string>
|
||||
#states : std::map<std::string,std::vector<int>>
|
||||
#status : status_t
|
||||
}
|
||||
class "bayesnet::KDB" as C_0001112865019015250005
|
||||
class C_0001112865019015250005 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDB(int k, float theta = 0.03) : void
|
||||
+~KDB() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::TAN" as C_0001760994424884323017
|
||||
class C_0001760994424884323017 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TAN() : void
|
||||
+~TAN() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Proposal" as C_0002219995589162262979
|
||||
class C_0002219995589162262979 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Proposal(torch::Tensor & pDataset, std::vector<std::string> & features_, std::string & className_) : void
|
||||
+~Proposal() : void
|
||||
..
|
||||
#checkInput(const torch::Tensor & X, const torch::Tensor & y) : void
|
||||
#fit_local_discretization(const torch::Tensor & y) : std::map<std::string,std::vector<int>>
|
||||
#localDiscretizationProposal(const std::map<std::string,std::vector<int>> & states, Network & model) : std::map<std::string,std::vector<int>>
|
||||
#prepareX(torch::Tensor & X) : torch::Tensor
|
||||
__
|
||||
#Xf : torch::Tensor
|
||||
#discretizers : map<std::string,mdlp::CPPFImdlp *>
|
||||
#y : torch::Tensor
|
||||
}
|
||||
class "bayesnet::TANLd" as C_0001668829096702037834
|
||||
class C_0001668829096702037834 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TANLd() : void
|
||||
+~TANLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : TANLd &
|
||||
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
abstract "bayesnet::FeatureSelect" as C_0001695326193250580823
|
||||
abstract C_0001695326193250580823 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FeatureSelect(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~FeatureSelect() : void
|
||||
..
|
||||
#computeMeritCFS() : double
|
||||
#computeSuFeatures(const int a, const int b) : double
|
||||
#computeSuLabels() : void
|
||||
{abstract} +fit() = 0 : void
|
||||
+getFeatures() const : std::vector<int>
|
||||
+getScores() const : std::vector<double>
|
||||
#initialize() : void
|
||||
#symmetricalUncertainty(int a, int b) : double
|
||||
__
|
||||
#fitted : bool
|
||||
#maxFeatures : int
|
||||
#selectedFeatures : std::vector<int>
|
||||
#selectedScores : std::vector<double>
|
||||
#suFeatures : std::map<std::pair<int,int>,double>
|
||||
#suLabels : std::vector<double>
|
||||
#weights : const torch::Tensor &
|
||||
}
|
||||
class "bayesnet::CFS" as C_0000011627355691342494
|
||||
class C_0000011627355691342494 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+CFS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~CFS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::FCBF" as C_0000144682015341746929
|
||||
class C_0000144682015341746929 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FCBF(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~FCBF() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::IWSS" as C_0000008268514674428553
|
||||
class C_0000008268514674428553 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+IWSS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~IWSS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODE" as C_0000512022813807538451
|
||||
class C_0000512022813807538451 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODE(int root) : void
|
||||
+~SPODE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Ensemble" as C_0001985241386355360576
|
||||
class C_0001985241386355360576 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Ensemble(bool predict_voting = true) : void
|
||||
+~Ensemble() = default : void
|
||||
..
|
||||
#compute_arg_max(std::vector<std::vector<double>> & X) : std::vector<int>
|
||||
#compute_arg_max(torch::Tensor & X) : torch::Tensor
|
||||
+dump_cpt() const : std::string
|
||||
+getNumberOfEdges() const : int
|
||||
+getNumberOfNodes() const : int
|
||||
+getNumberOfStates() const : int
|
||||
+graph(const std::string & title) const : std::vector<std::string>
|
||||
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
#predict_average_proba(torch::Tensor & X) : torch::Tensor
|
||||
#predict_average_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||
#predict_average_voting(torch::Tensor & X) : torch::Tensor
|
||||
#predict_average_voting(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||
+predict_proba(torch::Tensor & X) : torch::Tensor
|
||||
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
|
||||
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_order() : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
#voting(torch::Tensor & votes) : torch::Tensor
|
||||
__
|
||||
#models : std::vector<std::unique_ptr<Classifier>>
|
||||
#n_models : unsigned int
|
||||
#predict_voting : bool
|
||||
#significanceModels : std::vector<double>
|
||||
}
|
||||
class "bayesnet::(anonymous_45089536)" as C_0001186398587753535158
|
||||
class C_0001186398587753535158 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_45090163)" as C_0000602764946063116717
|
||||
class C_0000602764946063116717 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostAODE" as C_0000358471592399852382
|
||||
class C_0000358471592399852382 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+BoostAODE(bool predict_voting = false) : void
|
||||
+~BoostAODE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "BoostAODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::MST" as C_0000131858426172291700
|
||||
class C_0000131858426172291700 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+MST() = default : void
|
||||
+MST(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : void
|
||||
..
|
||||
+maximumSpanningTree() : std::vector<std::pair<int,int>>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Graph" as C_0001197041682001898467
|
||||
class C_0001197041682001898467 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Graph(int V) : void
|
||||
..
|
||||
+addEdge(int u, int v, float wt) : void
|
||||
+find_set(int i) : int
|
||||
+get_mst() : std::vector<std::pair<float,std::pair<int,int>>>
|
||||
+kruskal_algorithm() : void
|
||||
+union_set(int u, int v) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::KDBLd" as C_0000344502277874806837
|
||||
class C_0000344502277874806837 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDBLd(int k) : void
|
||||
+~KDBLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : KDBLd &
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODE" as C_0000786111576121788282
|
||||
class C_0000786111576121788282 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODE(bool predict_voting = false) : void
|
||||
+~AODE() : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "AODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODELd" as C_0001369655639257755354
|
||||
class C_0001369655639257755354 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODELd(int root) : void
|
||||
+~SPODELd() = default : void
|
||||
..
|
||||
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODELd" as C_0000487273479333793647
|
||||
class C_0000487273479333793647 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODELd(bool predict_voting = true) : void
|
||||
+~AODELd() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_) : AODELd &
|
||||
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
__
|
||||
}
|
||||
C_0001303524929067080934 --> C_0001303524929067080934 : -parents
|
||||
C_0001303524929067080934 --> C_0001303524929067080934 : -children
|
||||
C_0001186707649890429575 o-- C_0001303524929067080934 : -nodes
|
||||
C_0000327135989451974539 ..> C_0000738420730783851375
|
||||
C_0002043996622900301644 o-- C_0001186707649890429575 : #model
|
||||
C_0002043996622900301644 o-- C_0000736965376885623323 : #metrics
|
||||
C_0002043996622900301644 o-- C_0000738420730783851375 : #status
|
||||
C_0000327135989451974539 <|-- C_0002043996622900301644
|
||||
C_0002043996622900301644 <|-- C_0001112865019015250005
|
||||
C_0002043996622900301644 <|-- C_0001760994424884323017
|
||||
C_0002219995589162262979 ..> C_0001186707649890429575
|
||||
C_0001760994424884323017 <|-- C_0001668829096702037834
|
||||
C_0002219995589162262979 <|-- C_0001668829096702037834
|
||||
C_0000736965376885623323 <|-- C_0001695326193250580823
|
||||
C_0001695326193250580823 <|-- C_0000011627355691342494
|
||||
C_0001695326193250580823 <|-- C_0000144682015341746929
|
||||
C_0001695326193250580823 <|-- C_0000008268514674428553
|
||||
C_0002043996622900301644 <|-- C_0000512022813807538451
|
||||
C_0001985241386355360576 o-- C_0002043996622900301644 : #models
|
||||
C_0002043996622900301644 <|-- C_0001985241386355360576
|
||||
C_0000358471592399852382 --> C_0001695326193250580823 : -featureSelector
|
||||
C_0001985241386355360576 <|-- C_0000358471592399852382
|
||||
C_0001112865019015250005 <|-- C_0000344502277874806837
|
||||
C_0002219995589162262979 <|-- C_0000344502277874806837
|
||||
C_0001985241386355360576 <|-- C_0000786111576121788282
|
||||
C_0000512022813807538451 <|-- C_0001369655639257755354
|
||||
C_0002219995589162262979 <|-- C_0001369655639257755354
|
||||
C_0001985241386355360576 <|-- C_0000487273479333793647
|
||||
C_0002219995589162262979 <|-- C_0000487273479333793647
|
||||
|
||||
'Generated with clang-uml, version 0.5.1
|
||||
'LLVM version clang version 17.0.6 (Fedora 17.0.6-2.fc39)
|
||||
@enduml
|
1
diagrams/BayesNet.svg
Normal file
1
diagrams/BayesNet.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 139 KiB |
128
diagrams/dependency.svg
Normal file
128
diagrams/dependency.svg
Normal file
@@ -0,0 +1,128 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 8.1.0 (20230707.0739)
|
||||
-->
|
||||
<!-- Title: BayesNet Pages: 1 -->
|
||||
<svg width="1632pt" height="288pt"
|
||||
viewBox="0.00 0.00 1631.95 287.80" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 283.8)">
|
||||
<title>BayesNet</title>
|
||||
<polygon fill="white" stroke="none" points="-4,4 -4,-283.8 1627.95,-283.8 1627.95,4 -4,4"/>
|
||||
<!-- node1 -->
|
||||
<g id="node1" class="node">
|
||||
<title>node1</title>
|
||||
<polygon fill="none" stroke="black" points="826.43,-254.35 826.43,-269.26 796.69,-279.8 754.63,-279.8 724.89,-269.26 724.89,-254.35 754.63,-243.8 796.69,-243.8 826.43,-254.35"/>
|
||||
<text text-anchor="middle" x="775.66" y="-257.53" font-family="Times,serif" font-size="12.00">BayesNet</text>
|
||||
</g>
|
||||
<!-- node2 -->
|
||||
<g id="node2" class="node">
|
||||
<title>node2</title>
|
||||
<polygon fill="none" stroke="black" points="413.32,-185.8 372.39,-201.03 206.66,-207.8 40.93,-201.03 0,-185.8 114.69,-173.59 298.64,-173.59 413.32,-185.8"/>
|
||||
<text text-anchor="middle" x="206.66" y="-185.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10.so</text>
|
||||
</g>
|
||||
<!-- node1->node2 -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>node1->node2</title>
|
||||
<path fill="none" stroke="black" d="M724.41,-254.5C634.7,-243.46 447.04,-220.38 324.01,-205.24"/>
|
||||
<polygon fill="black" stroke="black" points="324.77,-201.69 314.42,-203.94 323.92,-208.63 324.77,-201.69"/>
|
||||
</g>
|
||||
<!-- node3 -->
|
||||
<g id="node3" class="node">
|
||||
<title>node3</title>
|
||||
<polygon fill="none" stroke="black" points="857.68,-185.8 815.49,-201.03 644.66,-207.8 473.84,-201.03 431.65,-185.8 549.86,-173.59 739.46,-173.59 857.68,-185.8"/>
|
||||
<text text-anchor="middle" x="644.66" y="-185.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libkineto.a</text>
|
||||
</g>
|
||||
<!-- node1->node3 -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>node1->node3</title>
|
||||
<path fill="none" stroke="black" d="M747.56,-245.79C729.21,-235.98 704.97,-223.03 684.63,-212.16"/>
|
||||
<polygon fill="black" stroke="black" points="686.47,-208.64 676,-207.02 683.17,-214.82 686.47,-208.64"/>
|
||||
</g>
|
||||
<!-- node4 -->
|
||||
<g id="node4" class="node">
|
||||
<title>node4</title>
|
||||
<polygon fill="none" stroke="black" points="939.33,-182.35 939.33,-197.26 920.78,-207.8 894.54,-207.8 875.99,-197.26 875.99,-182.35 894.54,-171.8 920.78,-171.8 939.33,-182.35"/>
|
||||
<text text-anchor="middle" x="907.66" y="-185.53" font-family="Times,serif" font-size="12.00">mdlp</text>
|
||||
</g>
|
||||
<!-- node1->node4 -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>node1->node4</title>
|
||||
<path fill="none" stroke="black" d="M803.66,-245.96C824.66,-234.82 853.45,-219.56 875.41,-207.91"/>
|
||||
<polygon fill="black" stroke="black" points="876.78,-210.61 883.97,-202.84 873.5,-204.43 876.78,-210.61"/>
|
||||
</g>
|
||||
<!-- node9 -->
|
||||
<g id="node5" class="node">
|
||||
<title>node9</title>
|
||||
<polygon fill="none" stroke="black" points="1107.74,-195.37 1032.66,-207.8 957.58,-195.37 986.26,-175.24 1079.06,-175.24 1107.74,-195.37"/>
|
||||
<text text-anchor="middle" x="1032.66" y="-185.53" font-family="Times,serif" font-size="12.00">torch_library</text>
|
||||
</g>
|
||||
<!-- node1->node9 -->
|
||||
<g id="edge4" class="edge">
|
||||
<title>node1->node9</title>
|
||||
<path fill="none" stroke="black" d="M815.25,-250.02C860.25,-237.77 933.77,-217.74 982.68,-204.42"/>
|
||||
<polygon fill="black" stroke="black" points="983.3,-207.61 992.02,-201.6 981.46,-200.85 983.3,-207.61"/>
|
||||
</g>
|
||||
<!-- node10 -->
|
||||
<g id="node6" class="node">
|
||||
<title>node10</title>
|
||||
<polygon fill="none" stroke="black" points="1159.81,-113.8 1086.89,-129.03 791.66,-135.8 496.43,-129.03 423.52,-113.8 627.82,-101.59 955.5,-101.59 1159.81,-113.8"/>
|
||||
<text text-anchor="middle" x="791.66" y="-113.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node9->node10 -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>node9->node10</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M985.62,-175.14C949.2,-164.56 898.31,-149.78 857.79,-138.01"/>
|
||||
<polygon fill="black" stroke="black" points="859.04,-134.44 848.46,-135.01 857.09,-141.16 859.04,-134.44"/>
|
||||
</g>
|
||||
<!-- node5 -->
|
||||
<g id="node7" class="node">
|
||||
<title>node5</title>
|
||||
<polygon fill="none" stroke="black" points="1371.56,-123.37 1274.66,-135.8 1177.77,-123.37 1214.78,-103.24 1334.55,-103.24 1371.56,-123.37"/>
|
||||
<text text-anchor="middle" x="1274.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch_cpu_library</text>
|
||||
</g>
|
||||
<!-- node9->node5 -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>node9->node5</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1079.61,-175.22C1120.66,-163.35 1180.2,-146.13 1222.68,-133.84"/>
|
||||
<polygon fill="black" stroke="black" points="1223.46,-136.97 1232.09,-130.83 1221.51,-130.24 1223.46,-136.97"/>
|
||||
</g>
|
||||
<!-- node6 -->
|
||||
<g id="node8" class="node">
|
||||
<title>node6</title>
|
||||
<polygon fill="none" stroke="black" points="1191.4,-27.9 1114.6,-43.12 803.66,-49.9 492.72,-43.12 415.93,-27.9 631.1,-15.68 976.22,-15.68 1191.4,-27.9"/>
|
||||
<text text-anchor="middle" x="803.66" y="-27.63" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cpu.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node5->node6 -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>node5->node6</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1210.16,-105.31C1130.55,-91.13 994.37,-66.87 901.77,-50.38"/>
|
||||
<polygon fill="black" stroke="black" points="902.44,-46.77 891.98,-48.46 901.22,-53.66 902.44,-46.77"/>
|
||||
</g>
|
||||
<!-- node7 -->
|
||||
<g id="node9" class="node">
|
||||
<title>node7</title>
|
||||
<polygon fill="none" stroke="black" points="1339.72,-37.46 1274.66,-49.9 1209.61,-37.46 1234.46,-17.34 1314.87,-17.34 1339.72,-37.46"/>
|
||||
<text text-anchor="middle" x="1274.66" y="-27.63" font-family="Times,serif" font-size="12.00">caffe2::mkl</text>
|
||||
</g>
|
||||
<!-- node5->node7 -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>node5->node7</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1274.66,-102.95C1274.66,-91.56 1274.66,-75.07 1274.66,-60.95"/>
|
||||
<polygon fill="black" stroke="black" points="1278.16,-61.27 1274.66,-51.27 1271.16,-61.27 1278.16,-61.27"/>
|
||||
</g>
|
||||
<!-- node8 -->
|
||||
<g id="node10" class="node">
|
||||
<title>node8</title>
|
||||
<polygon fill="none" stroke="black" points="1623.95,-41.76 1490.66,-63.8 1357.37,-41.76 1408.28,-6.09 1573.04,-6.09 1623.95,-41.76"/>
|
||||
<text text-anchor="middle" x="1490.66" y="-34.75" font-family="Times,serif" font-size="12.00">dummy</text>
|
||||
<text text-anchor="middle" x="1490.66" y="-20.5" font-family="Times,serif" font-size="12.00">(protobuf::libprotobuf)</text>
|
||||
</g>
|
||||
<!-- node5->node8 -->
|
||||
<g id="edge9" class="edge">
|
||||
<title>node5->node8</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1310.82,-102.76C1341.68,-90.77 1386.88,-73.21 1424.25,-58.7"/>
|
||||
<polygon fill="black" stroke="black" points="1425.01,-61.77 1433.06,-54.89 1422.47,-55.25 1425.01,-61.77"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 7.1 KiB |
@@ -1,20 +1,18 @@
|
||||
# BoostAODE Algorithm Operation
|
||||
|
||||
The algorithm is based on the AdaBoost algorithm with some new proposals that can be activated using the following hyperparameters.
|
||||
|
||||
## Hyperparameters
|
||||
|
||||
The hyperparameters defined in the algorithm are:
|
||||
|
||||
- ***repeatSparent*** (*boolean*): Allows dataset variables to be repeated as parents of an *SPODE*. Default value: *false*.
|
||||
|
||||
- ***maxModels*** (*int*): Maximum number of models (*SPODEs*) to build. This hyperparameter is only taken into account if ***repeatSparent*** is set to *true*. Default value: *0*.
|
||||
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *true*.
|
||||
|
||||
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
|
||||
|
||||
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*.
|
||||
- ***block_update*** (*boolean*): Sets whether the algorithm will update the weights of the models in blocks. If set to false, the algorithm will update the weights of the models one by one. Default value: *false*.
|
||||
|
||||
- ***tolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *0*.
|
||||
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *true*.
|
||||
|
||||
- ***maxTolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. if ***bisection*** is set to *true*, the value of this hyperparameter will be exponent of base 2 to compute the number of models to insert at once. Default value: *3*
|
||||
|
||||
- ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same α<sub>t</sub>. Default value: *""*.
|
||||
|
||||
@@ -24,48 +22,8 @@ The hyperparameters defined in the algorithm are:
|
||||
|
||||
Default value is *-1* so every time any of those algorithms are called, the threshold has to be set to the desired value.
|
||||
|
||||
- ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *true*.
|
||||
|
||||
- ***predict_single*** (*boolean*): Sets whether the algorithm will use single-model prediction in the learning process. If set to *false*, all models trained up to that point will be used to calculate the prediction necessary to update the weights in the learning process. Default value: *true*.
|
||||
- ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *false*.
|
||||
|
||||
## Operation
|
||||
|
||||
The algorithm performs the following steps:
|
||||
|
||||
1. **Initialization**
|
||||
|
||||
- If ***select_features*** is set, as many *SPODEs* are created as variables selected by the corresponding feature selection algorithm, and these variables are marked as used.
|
||||
|
||||
- Initial weights of the examples are set to *1/m*.
|
||||
|
||||
1. **Main Training Loop:**
|
||||
|
||||
- Variables are sorted by mutual information order with the class variable and processed in ascending, descending or random order, according to the value of the *order* hyperparameter. If it is random, the variables are shuffled.
|
||||
|
||||
- If the parent repetition is not established, the variable is marked as used.
|
||||
|
||||
- A *SPODE* is created using the selected variable as the parent.
|
||||
|
||||
- The model is trained, and the class variable corresponding to the training dataset is calculated. The calculation can be done using the last trained model or the set of models trained up to that point, according to the value of the *predict_single* hyperparameter.
|
||||
|
||||
- The weights associated with the examples are updated using this expression:
|
||||
|
||||
- w<sub>i</sub> · e<sup>α<sub>t</sub></sup> (if the example has been misclassified)
|
||||
|
||||
- w<sub>i</sub> · e<sup>-α<sub>t</sub></sup> (if the example has been correctly classified)
|
||||
|
||||
- The model significance is set to α<sub>t</sub>.
|
||||
|
||||
- If the ***convergence*** hyperparameter is set, the accuracy value on the test dataset that we separated in an initial step is calculated.
|
||||
|
||||
1. **Exit Conditions:**
|
||||
|
||||
- ε<sub>t</sub> > 0.5 => misclassified examples are penalized.
|
||||
|
||||
- Number of models with worse accuracy greater than ***tolerance*** and ***convergence*** established.
|
||||
|
||||
- There are no more variables to create models, and ***repeatSparent*** is not set.
|
||||
|
||||
- Number of models > ***maxModels*** if ***repeatSparent*** is set.
|
||||
|
||||
### [Proposal for *predict_single = false*](./BoostAODE_train_predict.pdf)
|
||||
### [Algorithm](./algorithm.md)
|
||||
|
118
docs/algorithm.md
Normal file
118
docs/algorithm.md
Normal file
@@ -0,0 +1,118 @@
|
||||
# Algorithm
|
||||
|
||||
- // notation
|
||||
|
||||
- $n$ features ${\cal{X}} = \{X_1, \dots, X_n\}$ and the class $Y$
|
||||
|
||||
- $m$ instances.
|
||||
|
||||
- $D = \{ (x_1^i, \dots, x_n^i, y^i) \}_{i=1}^{m}$
|
||||
|
||||
- $W$ a weights vector. $W_0$ are the initial weights.
|
||||
|
||||
- $D[W]$ dataset with weights $W$ for the instances.
|
||||
|
||||
1. // initialization
|
||||
|
||||
2. $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$
|
||||
|
||||
3. $W \leftarrow W_0$
|
||||
|
||||
4. $Vars \leftarrow {\cal{X}}$
|
||||
|
||||
5. $\delta \leftarrow 10^{-4}$
|
||||
|
||||
6. $convergence \leftarrow True$ // hyperparameter
|
||||
|
||||
7. $maxTolerancia \leftarrow 3$ // hyperparameter
|
||||
|
||||
8. $bisection \leftarrow False$ // hyperparameter
|
||||
|
||||
9. $finished \leftarrow False$
|
||||
|
||||
10. $AODE \leftarrow \emptyset$ // the ensemble
|
||||
|
||||
11. $tolerance \leftarrow 0$
|
||||
|
||||
12. $numModelsInPack \leftarrow 0$
|
||||
|
||||
13. $maxAccuracy \leftarrow -1$
|
||||
|
||||
14.
|
||||
|
||||
15. // main loop
|
||||
|
||||
16. While $(\lnot finished)$
|
||||
|
||||
1. $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$
|
||||
|
||||
2. $k \leftarrow 2^{tolerance}$
|
||||
|
||||
3. if ($tolerance == 0$) $numItemsPack \leftarrow0$
|
||||
|
||||
4. $P \leftarrow Head(\pi,k)$ // first k features in order
|
||||
|
||||
5. $spodes \leftarrow \emptyset$
|
||||
|
||||
6. $i \leftarrow 0$
|
||||
|
||||
7. While ($i < size(P)$)
|
||||
|
||||
1. $X \leftarrow P[i]$
|
||||
|
||||
2. $i \leftarrow i + 1$
|
||||
|
||||
3. $numItemsPack \leftarrow numItemsPack + 1$
|
||||
|
||||
4. $Vars.remove(X)$
|
||||
|
||||
5. $spode \leftarrow BuildSpode(X, {\cal{X}}, D[W])$
|
||||
|
||||
6. $\hat{y}[] \leftarrow spode.Predict(D)$
|
||||
|
||||
7. $\epsilon \leftarrow error(\hat{y}[], y[])$
|
||||
|
||||
8. $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-\epsilon}{\epsilon} \right )$
|
||||
|
||||
9. if ($\epsilon > 0.5$)
|
||||
|
||||
1. $finished \leftarrow True$
|
||||
|
||||
2. break
|
||||
|
||||
10. $spodes.add( (spode,\alpha_t) )$
|
||||
|
||||
11. $W \leftarrow UpdateWeights(W,\alpha,y[],\hat{y}[])$
|
||||
|
||||
8. $AODE.add( spodes )$
|
||||
|
||||
9. if ($convergence \land \lnot finished$)
|
||||
|
||||
1. $\hat{y}[] \leftarrow AODE.Predict(D)$
|
||||
|
||||
2. $actualAccuracy \leftarrow accuracy(\hat{y}[], y[])$
|
||||
|
||||
3. $if (maxAccuracy == -1)\; maxAccuracy \leftarrow actualAccuracy$
|
||||
|
||||
4. if $((accuracy - maxAccuracy) < \delta)$ // result doesn't
|
||||
improve enough
|
||||
|
||||
1. $tolerance \leftarrow tolerance + 1$
|
||||
|
||||
5. else
|
||||
|
||||
1. $tolerance \leftarrow 0$
|
||||
|
||||
2. $numItemsPack \leftarrow 0$
|
||||
|
||||
10. If
|
||||
$(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
|
||||
|
||||
11. $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$
|
||||
|
||||
17. if ($tolerance > maxTolerance$) // algorithm finished because of
|
||||
lack of convergence
|
||||
|
||||
1. $removeModels(AODE, numItemsPack)$
|
||||
|
||||
18. Return $AODE$
|
80
docs/algorithm.tex
Normal file
80
docs/algorithm.tex
Normal file
@@ -0,0 +1,80 @@
|
||||
\section{Algorithm}
|
||||
\begin{itemize}
|
||||
\item[] // notation
|
||||
\item $n$ features ${\cal{X}} = \{X_1, \dots, X_n\}$ and the class $Y$
|
||||
\item $m$ instances.
|
||||
\item $D = \{ (x_1^i, \dots, x_n^i, y^i) \}_{i=1}^{m}$
|
||||
\item $W$ a weights vector. $W_0$ are the initial weights.
|
||||
\item $D[W]$ dataset with weights $W$ for the instances.
|
||||
\end{itemize}
|
||||
\bigskip
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item[] // initialization
|
||||
\item $W_0 \leftarrow (w_1, \dots, w_m) \leftarrow 1/m$
|
||||
\item $W \leftarrow W_0$
|
||||
\item $Vars \leftarrow {\cal{X}}$
|
||||
\item $\delta \leftarrow 10^{-4}$
|
||||
\item $convergence \leftarrow True$ // hyperparameter
|
||||
\item $maxTolerancia \leftarrow 3$ // hyperparameter
|
||||
\item $bisection \leftarrow False$ // hyperparameter
|
||||
\item $finished \leftarrow False$
|
||||
\item $AODE \leftarrow \emptyset$ \hspace*{2cm} // the ensemble
|
||||
\item $tolerance \leftarrow 0$
|
||||
\item $numModelsInPack \leftarrow 0$
|
||||
\item $maxAccuracy \leftarrow -1$
|
||||
\item[]
|
||||
\newpage
|
||||
\item[] // main loop
|
||||
\item While $(\lnot finished)$
|
||||
\begin{enumerate}
|
||||
\item $\pi \leftarrow SortFeatures(Vars, criterio, D[W])$
|
||||
\item $k \leftarrow 2^{tolerance}$
|
||||
\item if ($tolerance == 0$) $numItemsPack \leftarrow0$
|
||||
\item $P \leftarrow Head(\pi,k)$ \hspace*{2cm} // first k features in order
|
||||
\item $spodes \leftarrow \emptyset$
|
||||
\item $i \leftarrow 0$
|
||||
\item While ($ i < size(P)$)
|
||||
\begin{enumerate}
|
||||
\item $X \leftarrow P[i]$
|
||||
\item $i \leftarrow i + 1$
|
||||
\item $numItemsPack \leftarrow numItemsPack + 1$
|
||||
\item $Vars.remove(X)$
|
||||
\item $spode \leftarrow BuildSpode(X, {\cal{X}}, D[W])$
|
||||
\item $\hat{y}[] \leftarrow spode.Predict(D)$
|
||||
\item $\epsilon \leftarrow error(\hat{y}[], y[])$
|
||||
\item $\alpha \leftarrow \frac{1}{2} ln \left ( \frac{1-\epsilon}{\epsilon} \right )$
|
||||
\item if ($\epsilon > 0.5$)
|
||||
\begin{enumerate}
|
||||
\item $finished \leftarrow True$
|
||||
\item break
|
||||
\end{enumerate}
|
||||
\item $spodes.add( (spode,\alpha_t) )$
|
||||
\item $W \leftarrow UpdateWeights(W,\alpha,y[],\hat{y}[])$
|
||||
\end{enumerate}
|
||||
\item $AODE.add( spodes )$
|
||||
\item if ($convergence \land \lnot finished$)
|
||||
\begin{enumerate}
|
||||
\item $\hat{y}[] \leftarrow AODE.Predict(D)$
|
||||
\item $actualAccuracy \leftarrow accuracy(\hat{y}[], y[])$
|
||||
\item $if (maxAccuracy == -1)\; maxAccuracy \leftarrow actualAccuracy$
|
||||
\item if $((accuracy - maxAccuracy) < \delta)$\hspace*{2cm} // result doesn't improve enough
|
||||
\begin{enumerate}
|
||||
\item $tolerance \leftarrow tolerance + 1$
|
||||
\end{enumerate}
|
||||
\item else
|
||||
\begin{enumerate}
|
||||
\item $tolerance \leftarrow 0$
|
||||
\item $numItemsPack \leftarrow 0$
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
\item If $(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
|
||||
\item $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$
|
||||
\end{enumerate}
|
||||
\item if ($tolerance > maxTolerance$) \hspace*{1cm} // algorithm finished because of lack of convergence
|
||||
\begin{enumerate}
|
||||
\item $removeModels(AODE, numItemsPack)$
|
||||
\end{enumerate}
|
||||
\item Return $AODE$
|
||||
\end{enumerate}
|
@@ -1,4 +1,5 @@
|
||||
filter = bayesnet/
|
||||
exclude-directories = build_debug/lib/
|
||||
exclude = bayesnet/utils/loguru.*
|
||||
print-summary = yes
|
||||
sort = uncovered-percent
|
||||
|
Submodule lib/catch2 updated: 8ac8190e49...bff6e35e2b
Submodule lib/folding updated: 37316a54e0...71d6055be4
2
lib/json
2
lib/json
Submodule lib/json updated: 0457de21cf...199dea11b1
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.h>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <bayesnet/ensembles/BoostAODE.h>
|
||||
|
@@ -1,5 +1,4 @@
|
||||
if(ENABLE_TESTING)
|
||||
set(TEST_BAYESNET "unit_tests_bayesnet")
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
@@ -9,8 +8,17 @@ if(ENABLE_TESTING)
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
||||
set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES})
|
||||
add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET})
|
||||
target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
|
||||
add_test(NAME ${TEST_BAYESNET} COMMAND ${TEST_BAYESNET})
|
||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc
|
||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc
|
||||
TestUtils.cc TestBayesEnsemble.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
|
||||
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
||||
add_test(NAME Network COMMAND TestBayesNet "[Network]")
|
||||
add_test(NAME Node COMMAND TestBayesNet "[Node]")
|
||||
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
|
||||
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
|
||||
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
|
||||
add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
|
||||
add_test(NAME Models COMMAND TestBayesNet "[Models]")
|
||||
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
|
||||
endif(ENABLE_TESTING)
|
||||
|
125
tests/TestBayesClassifier.cc
Normal file
125
tests/TestBayesClassifier.cc
Normal file
@@ -0,0 +1,125 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <string>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/classifiers/TAN.h"
|
||||
#include "bayesnet/classifiers/KDB.h"
|
||||
#include "bayesnet/classifiers/KDBLd.h"
|
||||
|
||||
|
||||
TEST_CASE("Test Cannot build dataset with wrong data vector", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
raw.yv.pop_back();
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
}
|
||||
TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto yshort = torch::zeros({ 149 }, torch::kInt32);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.featurest, raw.classNamet, raw.statest), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.featurest, raw.classNamet, raw.statest), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
}
|
||||
TEST_CASE("Invalid data type", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", false);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), "dataset (X, y) must be of type Integer");
|
||||
}
|
||||
TEST_CASE("Invalid number of features", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0);
|
||||
REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), "Classifier: X 5 and features 4 must have the same number of features");
|
||||
}
|
||||
TEST_CASE("Invalid class name", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, "duck", raw.statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, "duck", raw.statest), "class name not found in states");
|
||||
}
|
||||
TEST_CASE("Invalid feature name", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto statest = raw.statest;
|
||||
statest.erase("petallength");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), "feature [petallength] not found in states");
|
||||
}
|
||||
TEST_CASE("Invalid hyperparameter", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::KDB(2);
|
||||
auto raw = RawDatasets("iris", true);
|
||||
REQUIRE_THROWS_AS(model.setHyperparameters({ { "alpha", "0.0" } }), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.setHyperparameters({ { "alpha", "0.0" } }), "Invalid hyperparameters{\"alpha\":\"0.0\"}");
|
||||
}
|
||||
TEST_CASE("Topological order", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto order = model.topological_order();
|
||||
REQUIRE(order.size() == 4);
|
||||
REQUIRE(order[0] == "petallength");
|
||||
REQUIRE(order[1] == "sepallength");
|
||||
REQUIRE(order[2] == "sepalwidth");
|
||||
REQUIRE(order[3] == "petalwidth");
|
||||
}
|
||||
TEST_CASE("Dump_cpt", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto cpt = model.dump_cpt();
|
||||
REQUIRE(cpt.size() == 1713);
|
||||
}
|
||||
TEST_CASE("Not fitted model", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto message = "Classifier has not been fitted";
|
||||
// tensors
|
||||
REQUIRE_THROWS_AS(model.predict(raw.Xt), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.predict(raw.Xt), message);
|
||||
REQUIRE_THROWS_AS(model.predict_proba(raw.Xt), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.predict_proba(raw.Xt), message);
|
||||
REQUIRE_THROWS_AS(model.score(raw.Xt, raw.yt), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.score(raw.Xt, raw.yt), message);
|
||||
// vectors
|
||||
REQUIRE_THROWS_AS(model.predict(raw.Xv), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.predict(raw.Xv), message);
|
||||
REQUIRE_THROWS_AS(model.predict_proba(raw.Xv), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.predict_proba(raw.Xv), message);
|
||||
REQUIRE_THROWS_AS(model.score(raw.Xv, raw.yv), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(model.score(raw.Xv, raw.yv), message);
|
||||
}
|
||||
TEST_CASE("KDB Graph", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::KDB(2);
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto graph = model.graph();
|
||||
REQUIRE(graph.size() == 15);
|
||||
}
|
||||
TEST_CASE("KDBLd Graph", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::KDBLd(2);
|
||||
auto raw = RawDatasets("iris", false);
|
||||
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto graph = model.graph();
|
||||
REQUIRE(graph.size() == 15);
|
||||
}
|
126
tests/TestBayesEnsemble.cc
Normal file
126
tests/TestBayesEnsemble.cc
Normal file
@@ -0,0 +1,126 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "bayesnet/ensembles/AODE.h"
|
||||
#include "bayesnet/ensembles/AODELd.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
|
||||
TEST_CASE("Topological Order", "[Ensemble]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto order = clf.topological_order();
|
||||
REQUIRE(order.size() == 0);
|
||||
}
|
||||
TEST_CASE("Dump CPT", "[Ensemble]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto dump = clf.dump_cpt();
|
||||
REQUIRE(dump == "");
|
||||
}
|
||||
TEST_CASE("Number of States", "[Ensemble]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfStates() == 76);
|
||||
}
|
||||
TEST_CASE("Show", "[Ensemble]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", false},
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
std::vector<std::string> expected = {
|
||||
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||
"petallength -> sepallength, sepalwidth, petalwidth, ",
|
||||
"petalwidth -> ",
|
||||
"sepallength -> ",
|
||||
"sepalwidth -> ",
|
||||
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||
"petallength -> ",
|
||||
"petalwidth -> sepallength, sepalwidth, petallength, ",
|
||||
"sepallength -> ",
|
||||
"sepalwidth -> ",
|
||||
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||
"petallength -> ",
|
||||
"petalwidth -> ",
|
||||
"sepallength -> sepalwidth, petallength, petalwidth, ",
|
||||
"sepalwidth -> ",
|
||||
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||
"petallength -> ",
|
||||
"petalwidth -> ",
|
||||
"sepallength -> ",
|
||||
"sepalwidth -> sepallength, petallength, petalwidth, ",
|
||||
};
|
||||
auto show = clf.show();
|
||||
REQUIRE(show.size() == expected.size());
|
||||
for (size_t i = 0; i < show.size(); i++)
|
||||
REQUIRE(show[i] == expected[i]);
|
||||
}
|
||||
TEST_CASE("Graph", "[Ensemble]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto graph = clf.graph();
|
||||
REQUIRE(graph.size() == 56);
|
||||
auto clf2 = bayesnet::AODE();
|
||||
clf2.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
graph = clf2.graph();
|
||||
REQUIRE(graph.size() == 56);
|
||||
raw = RawDatasets("glass", false);
|
||||
auto clf3 = bayesnet::AODELd();
|
||||
clf3.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
graph = clf3.graph();
|
||||
REQUIRE(graph.size() == 261);
|
||||
}
|
||||
TEST_CASE("Compute ArgMax", "[Ensemble]")
|
||||
{
|
||||
class TestEnsemble : public bayesnet::BoostAODE {
|
||||
public:
|
||||
TestEnsemble() : bayesnet::BoostAODE() {}
|
||||
torch::Tensor compute_arg_max(torch::Tensor& X) { return Ensemble::compute_arg_max(X); }
|
||||
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X) { return Ensemble::compute_arg_max(X); }
|
||||
};
|
||||
TestEnsemble clf;
|
||||
std::vector<std::vector<double>> X = {
|
||||
{0.1f, 0.2f, 0.3f},
|
||||
{0.4f, 0.9f, 0.6f},
|
||||
{0.7f, 0.8f, 0.9f},
|
||||
{0.5f, 0.2f, 0.1f},
|
||||
{0.3f, 0.7f, 0.2f},
|
||||
{0.5f, 0.5f, 0.2f}
|
||||
};
|
||||
std::vector<int> expected = { 2, 1, 2, 0, 1, 0 };
|
||||
auto argmax = clf.compute_arg_max(X);
|
||||
REQUIRE(argmax.size() == expected.size());
|
||||
REQUIRE(argmax == expected);
|
||||
auto Xt = torch::zeros({ 6, 3 }, torch::kFloat32);
|
||||
Xt[0][0] = 0.1f; Xt[0][1] = 0.2f; Xt[0][2] = 0.3f;
|
||||
Xt[1][0] = 0.4f; Xt[1][1] = 0.9f; Xt[1][2] = 0.6f;
|
||||
Xt[2][0] = 0.7f; Xt[2][1] = 0.8f; Xt[2][2] = 0.9f;
|
||||
Xt[3][0] = 0.5f; Xt[3][1] = 0.2f; Xt[3][2] = 0.1f;
|
||||
Xt[4][0] = 0.3f; Xt[4][1] = 0.7f; Xt[4][2] = 0.2f;
|
||||
Xt[5][0] = 0.5f; Xt[5][1] = 0.5f; Xt[5][2] = 0.2f;
|
||||
auto argmaxt = clf.compute_arg_max(Xt);
|
||||
REQUIRE(argmaxt.size(0) == expected.size());
|
||||
for (int i = 0; i < argmaxt.size(0); i++)
|
||||
REQUIRE(argmaxt[i].item<int>() == expected[i]);
|
||||
}
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
@@ -5,7 +11,7 @@
|
||||
#include "TestUtils.h"
|
||||
|
||||
|
||||
TEST_CASE("Metrics Test", "[BayesNet]")
|
||||
TEST_CASE("Metrics Test", "[Metrics]")
|
||||
{
|
||||
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
|
||||
map<std::string, pair<int, std::vector<int>>> resultsKBest = {
|
||||
@@ -32,31 +38,41 @@ TEST_CASE("Metrics Test", "[BayesNet]")
|
||||
};
|
||||
auto raw = RawDatasets(file_name, true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.featurest, raw.classNamet, raw.classNumStates);
|
||||
bayesnet::Metrics metricsv(raw.Xv, raw.yv, raw.featurest, raw.classNamet, raw.classNumStates);
|
||||
|
||||
SECTION("Test Constructor")
|
||||
{
|
||||
REQUIRE(metrics.getScoresKBest().size() == 0);
|
||||
REQUIRE(metricsv.getScoresKBest().size() == 0);
|
||||
}
|
||||
|
||||
SECTION("Test SelectKBestWeighted")
|
||||
{
|
||||
std::vector<int> kBest = metrics.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first);
|
||||
std::vector<int> kBestv = metricsv.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first);
|
||||
REQUIRE(kBest.size() == resultsKBest.at(file_name).first);
|
||||
REQUIRE(kBestv.size() == resultsKBest.at(file_name).first);
|
||||
REQUIRE(kBest == resultsKBest.at(file_name).second);
|
||||
REQUIRE(kBestv == resultsKBest.at(file_name).second);
|
||||
}
|
||||
|
||||
SECTION("Test Mutual Information")
|
||||
{
|
||||
auto result = metrics.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights);
|
||||
auto resultv = metricsv.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights);
|
||||
REQUIRE(result == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon));
|
||||
REQUIRE(resultv == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon));
|
||||
}
|
||||
|
||||
SECTION("Test Maximum Spanning Tree")
|
||||
{
|
||||
auto weights_matrix = metrics.conditionalEdge(raw.weights);
|
||||
auto weights_matrixv = metricsv.conditionalEdge(raw.weights);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
auto result = metrics.maximumSpanningTree(raw.featurest, weights_matrix, i);
|
||||
auto resultv = metricsv.maximumSpanningTree(raw.featurest, weights_matrixv, i);
|
||||
REQUIRE(result == resultsMST.at({ file_name, i }));
|
||||
REQUIRE(resultv == resultsMST.at({ file_name, i }));
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,7 +1,14 @@
|
||||
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include "bayesnet/classifiers/KDB.h"
|
||||
#include "bayesnet/classifiers/TAN.h"
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
@@ -13,19 +20,19 @@
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
const std::string ACTUAL_VERSION = "1.0.4";
|
||||
const std::string ACTUAL_VERSION = "1.0.5";
|
||||
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]")
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
{
|
||||
map <pair<std::string, std::string>, float> scores{
|
||||
// Diabetes
|
||||
{{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
|
||||
{{"diabetes", "AODE"}, 0.82161}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
|
||||
{{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
|
||||
// Ecoli
|
||||
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
|
||||
{{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f},
|
||||
// Glass
|
||||
{{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
|
||||
{{"glass", "AODE"}, 0.79439}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
|
||||
{{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f},
|
||||
// Iris
|
||||
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
|
||||
@@ -49,8 +56,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]")
|
||||
auto raw = RawDatasets(file_name, discretize);
|
||||
clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto score = clf->score(raw.Xt, raw.yt);
|
||||
INFO("File: " + file_name);
|
||||
INFO("Classifier: " + name + " File: " + file_name);
|
||||
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
||||
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
|
||||
}
|
||||
}
|
||||
SECTION("Library check version")
|
||||
@@ -60,7 +68,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[BayesNet]")
|
||||
}
|
||||
delete clf;
|
||||
}
|
||||
TEST_CASE("Models features", "[BayesNet]")
|
||||
TEST_CASE("Models features & Graph", "[Models]")
|
||||
{
|
||||
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
|
||||
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
||||
@@ -69,6 +77,8 @@ TEST_CASE("Models features", "[BayesNet]")
|
||||
"sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n"
|
||||
}
|
||||
);
|
||||
SECTION("Test TAN")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::TAN();
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
@@ -78,8 +88,21 @@ TEST_CASE("Models features", "[BayesNet]")
|
||||
REQUIRE(clf.getClassNumStates() == 3);
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
}
|
||||
SECTION("Test TANLd")
|
||||
{
|
||||
auto clf = bayesnet::TANLd();
|
||||
auto raw = RawDatasets("iris", false);
|
||||
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 19);
|
||||
REQUIRE(clf.getClassNumStates() == 3);
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
}
|
||||
}
|
||||
TEST_CASE("Get num features & num edges", "[BayesNet]")
|
||||
TEST_CASE("Get num features & num edges", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::KDB(2);
|
||||
@@ -87,41 +110,8 @@ TEST_CASE("Get num features & num edges", "[BayesNet]")
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 8);
|
||||
}
|
||||
TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "CFS"} });
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::BoostAODE(true);
|
||||
clf.setHyperparameters({
|
||||
{"order", "asc"},
|
||||
{"convergence", true},
|
||||
{"repeatSparent",true},
|
||||
{"select_features","CFS"},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 72);
|
||||
REQUIRE(clf.getNumberOfEdges() == 120);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 8");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Model predict_proba", "[BayesNet]")
|
||||
|
||||
TEST_CASE("Model predict_proba", "[Models]")
|
||||
{
|
||||
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
|
||||
auto res_prob_tan = std::vector<std::vector<double>>({
|
||||
@@ -147,15 +137,15 @@ TEST_CASE("Model predict_proba", "[BayesNet]")
|
||||
{0.003135, 0.991799, 0.0050661}
|
||||
});
|
||||
auto res_prob_baode = std::vector<std::vector<double>>({
|
||||
{0.00803291, 0.9676, 0.0243672},
|
||||
{0.00398714, 0.945126, 0.050887},
|
||||
{0.00398714, 0.945126, 0.050887},
|
||||
{0.00398714, 0.945126, 0.050887},
|
||||
{0.00189227, 0.859575, 0.138533},
|
||||
{0.0118341, 0.442149, 0.546017},
|
||||
{0.0216135, 0.785781, 0.192605},
|
||||
{0.0204803, 0.844276, 0.135244},
|
||||
{0.00576313, 0.961665, 0.0325716},
|
||||
{0.0112349, 0.962274, 0.0264907},
|
||||
{0.00371025, 0.950592, 0.0456973},
|
||||
{0.00371025, 0.950592, 0.0456973},
|
||||
{0.00371025, 0.950592, 0.0456973},
|
||||
{0.00369275, 0.84967, 0.146637},
|
||||
{0.0252205, 0.113564, 0.861215},
|
||||
{0.0284828, 0.770524, 0.200993},
|
||||
{0.0213182, 0.857189, 0.121493},
|
||||
{0.00868436, 0.949494, 0.0418215}
|
||||
});
|
||||
auto res_prob_voting = std::vector<std::vector<double>>({
|
||||
{0, 1, 0},
|
||||
@@ -163,8 +153,8 @@ TEST_CASE("Model predict_proba", "[BayesNet]")
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 0.447909, 0.552091},
|
||||
{0, 0.811482, 0.188517},
|
||||
{0, 0, 1},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0}
|
||||
});
|
||||
@@ -187,7 +177,7 @@ TEST_CASE("Model predict_proba", "[BayesNet]")
|
||||
REQUIRE(y_pred.size() == raw.yv.size());
|
||||
REQUIRE(y_pred_proba[0].size() == 3);
|
||||
REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size());
|
||||
for (int i = 0; i < y_pred_proba.size(); ++i) {
|
||||
for (int i = 0; i < 9; ++i) {
|
||||
auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end());
|
||||
int predictedClass = distance(y_pred_proba[i].begin(), maxElem);
|
||||
REQUIRE(predictedClass == y_pred[i]);
|
||||
@@ -198,7 +188,7 @@ TEST_CASE("Model predict_proba", "[BayesNet]")
|
||||
}
|
||||
}
|
||||
// Check predict_proba values for vectors and tensors
|
||||
for (int i = 0; i < res_prob.size(); i++) {
|
||||
for (int i = 0; i < 9; i++) {
|
||||
REQUIRE(y_pred[i] == yt_pred[i].item<int>());
|
||||
for (int j = 0; j < 3; j++) {
|
||||
REQUIRE(res_prob[model][i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon));
|
||||
@@ -208,10 +198,11 @@ TEST_CASE("Model predict_proba", "[BayesNet]")
|
||||
delete clf;
|
||||
}
|
||||
}
|
||||
TEST_CASE("BoostAODE voting-proba", "[BayesNet]")
|
||||
|
||||
TEST_CASE("AODE voting-proba", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", false);
|
||||
auto clf = bayesnet::BoostAODE(false);
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::AODE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
@@ -220,50 +211,60 @@ TEST_CASE("BoostAODE voting-proba", "[BayesNet]")
|
||||
});
|
||||
auto score_voting = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_voting = clf.predict_proba(raw.Xv);
|
||||
REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
|
||||
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[83][2] == Catch::Approx(0.552091).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.546017).epsilon(raw.epsilon));
|
||||
clf.dump_cpt();
|
||||
REQUIRE(score_proba == Catch::Approx(0.79439f).epsilon(raw.epsilon));
|
||||
REQUIRE(score_voting == Catch::Approx(0.78972f).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[67][0] == Catch::Approx(0.888889).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[67][0] == Catch::Approx(0.702184).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
}
|
||||
TEST_CASE("BoostAODE order asc, desc & random", "[BayesNet]")
|
||||
TEST_CASE("SPODELd dataset", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", false);
|
||||
auto clf = bayesnet::SPODELd(0);
|
||||
// raw.dataset.to(torch::kFloat32);
|
||||
clf.fit(raw.dataset, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = clf.score(raw.Xt, raw.yt);
|
||||
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("KDB with hyperparameters", "[Models]")
|
||||
{
|
||||
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<std::string, double> scores{
|
||||
{"asc", 0.83178f }, { "desc", 0.84579f }, { "rand", 0.83645f }
|
||||
};
|
||||
for (const std::string& order : { "asc", "desc", "rand" }) {
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({
|
||||
{"order", order},
|
||||
});
|
||||
auto clf = bayesnet::KDB(2);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("order: " + order);
|
||||
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
TEST_CASE("BoostAODE predict_single", "[BayesNet]")
|
||||
{
|
||||
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<bool, double> scores{
|
||||
{true, 0.84579f }, { false, 0.80841f }
|
||||
};
|
||||
for (const bool kind : { true, false}) {
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({
|
||||
{"predict_single", kind}, {"order", "desc" },
|
||||
{"k", 3},
|
||||
{"theta", 0.7},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("kind: " + std::string(kind ? "true" : "false"));
|
||||
REQUIRE(score == Catch::Approx(scores[kind]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[kind]).epsilon(raw.epsilon));
|
||||
}
|
||||
auto scoret = clf.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Incorrect type of data for SPODELd", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::SPODELd(0);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest), std::runtime_error);
|
||||
}
|
||||
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
|
||||
{
|
||||
auto clf = bayesnet::AODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
std::string message = "Ensemble has not been fitted";
|
||||
REQUIRE_THROWS_AS(clf.predict(raw.Xv), std::logic_error);
|
||||
REQUIRE_THROWS_AS(clf.predict_proba(raw.Xv), std::logic_error);
|
||||
REQUIRE_THROWS_AS(clf.predict(raw.Xt), std::logic_error);
|
||||
REQUIRE_THROWS_AS(clf.predict_proba(raw.Xt), std::logic_error);
|
||||
REQUIRE_THROWS_AS(clf.score(raw.Xv, raw.yv), std::logic_error);
|
||||
REQUIRE_THROWS_AS(clf.score(raw.Xt, raw.yt), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(clf.predict(raw.Xv), message);
|
||||
REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xv), message);
|
||||
REQUIRE_THROWS_WITH(clf.predict(raw.Xt), message);
|
||||
REQUIRE_THROWS_WITH(clf.predict_proba(raw.Xt), message);
|
||||
REQUIRE_THROWS_WITH(clf.score(raw.Xv, raw.yv), message);
|
||||
REQUIRE_THROWS_WITH(clf.score(raw.Xt, raw.yt), message);
|
||||
}
|
@@ -1,9 +1,18 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <string>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
|
||||
void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::string& className)
|
||||
{
|
||||
@@ -20,7 +29,7 @@ void buildModel(bayesnet::Network& net, const std::vector<std::string>& features
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
{
|
||||
|
||||
auto raw = RawDatasets("iris", true);
|
||||
@@ -110,6 +119,22 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
net3.fit(raw.Xt, raw.yt, raw.weights, raw.featurest, raw.classNamet, raw.statest);
|
||||
REQUIRE(net.getStates() == net2.getStates());
|
||||
REQUIRE(net.getStates() == net3.getStates());
|
||||
REQUIRE(net.getFeatures() == net2.getFeatures());
|
||||
REQUIRE(net.getFeatures() == net3.getFeatures());
|
||||
REQUIRE(net.getClassName() == net2.getClassName());
|
||||
REQUIRE(net.getClassName() == net3.getClassName());
|
||||
REQUIRE(net.getNodes().size() == net2.getNodes().size());
|
||||
REQUIRE(net.getNodes().size() == net3.getNodes().size());
|
||||
REQUIRE(net.getEdges() == net2.getEdges());
|
||||
REQUIRE(net.getEdges() == net3.getEdges());
|
||||
REQUIRE(net.getNumEdges() == net2.getNumEdges());
|
||||
REQUIRE(net.getNumEdges() == net3.getNumEdges());
|
||||
REQUIRE(net.getClassNumStates() == net2.getClassNumStates());
|
||||
REQUIRE(net.getClassNumStates() == net3.getClassNumStates());
|
||||
REQUIRE(net.getSamples().size(0) == net2.getSamples().size(0));
|
||||
REQUIRE(net.getSamples().size(0) == net3.getSamples().size(0));
|
||||
REQUIRE(net.getSamples().size(1) == net2.getSamples().size(1));
|
||||
REQUIRE(net.getSamples().size(1) == net3.getSamples().size(1));
|
||||
// Check Conditional Probabilities tables
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
auto feature = features.at(i);
|
||||
@@ -124,7 +149,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test show")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@@ -138,7 +162,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test topological_sort")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@@ -152,7 +175,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test graph")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@@ -170,7 +192,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test predict")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
@@ -180,7 +201,6 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test predict_proba")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
@@ -202,10 +222,230 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
|
||||
}
|
||||
SECTION("Test score")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = net.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.97333333).margin(threshold));
|
||||
}
|
||||
SECTION("Copy constructor")
|
||||
{
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto net2 = bayesnet::Network(net);
|
||||
REQUIRE(net.getFeatures() == net2.getFeatures());
|
||||
REQUIRE(net.getEdges() == net2.getEdges());
|
||||
REQUIRE(net.getNumEdges() == net2.getNumEdges());
|
||||
REQUIRE(net.getStates() == net2.getStates());
|
||||
REQUIRE(net.getClassName() == net2.getClassName());
|
||||
REQUIRE(net.getClassNumStates() == net2.getClassNumStates());
|
||||
REQUIRE(net.getSamples().size(0) == net2.getSamples().size(0));
|
||||
REQUIRE(net.getSamples().size(1) == net2.getSamples().size(1));
|
||||
REQUIRE(net.getNodes().size() == net2.getNodes().size());
|
||||
for (const auto& feature : net.getFeatures()) {
|
||||
auto& node = net.getNodes().at(feature);
|
||||
auto& node2 = net2.getNodes().at(feature);
|
||||
REQUIRE(node->getName() == node2->getName());
|
||||
REQUIRE(node->getChildren().size() == node2->getChildren().size());
|
||||
REQUIRE(node->getParents().size() == node2->getParents().size());
|
||||
REQUIRE(node->getCPT().equal(node2->getCPT()));
|
||||
}
|
||||
}
|
||||
SECTION("Test oddities")
|
||||
{
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
// predict without fitting
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
auto test_tensor = bayesnet::vectorToTensor(test);
|
||||
REQUIRE_THROWS_AS(net.predict(test), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net.predict(test), "You must call fit() before calling predict()");
|
||||
REQUIRE_THROWS_AS(net.predict(test_tensor), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net.predict(test_tensor), "You must call fit() before calling predict()");
|
||||
REQUIRE_THROWS_AS(net.predict_proba(test), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net.predict_proba(test), "You must call fit() before calling predict_proba()");
|
||||
REQUIRE_THROWS_AS(net.score(raw.Xv, raw.yv), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net.score(raw.Xv, raw.yv), "You must call fit() before calling predict()");
|
||||
// predict with wrong data
|
||||
auto netx = bayesnet::Network();
|
||||
buildModel(netx, raw.featuresv, raw.classNamev);
|
||||
netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
std::vector<std::vector<int>> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} };
|
||||
auto test_tensor2 = bayesnet::vectorToTensor(test2, false);
|
||||
REQUIRE_THROWS_AS(netx.predict(test2), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test2), "Sample size (3) does not match the number of features (4)");
|
||||
REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "Sample size (3) does not match the number of features (4)");
|
||||
// fit with wrong data
|
||||
// Weights
|
||||
auto net2 = bayesnet::Network();
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.featuresv, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
std::string invalid_weights = "Weights (0) must have the same number of elements as samples (150) in Network::fit";
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.featuresv, raw.classNamev, raw.statesv), invalid_weights);
|
||||
// X & y
|
||||
std::string invalid_labels = "X and y must have the same number of samples in Network::fit (150 != 0)";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv), invalid_labels);
|
||||
// Features
|
||||
std::string invalid_features = "X and features must have the same number of features in Network::fit (4 != 0)";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.classNamev, raw.statesv), invalid_features);
|
||||
// Different number of features
|
||||
auto net3 = bayesnet::Network();
|
||||
auto test2y = { 1, 2, 3, 4, 5 };
|
||||
buildModel(net3, raw.featuresv, raw.classNamev);
|
||||
auto features3 = raw.featuresv;
|
||||
features3.pop_back();
|
||||
std::string invalid_features2 = "X and local features must have the same number of features in Network::fit (3 != 4)";
|
||||
REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.classNamev, raw.statesv), invalid_features2);
|
||||
// Uninitialized network
|
||||
std::string network_invalid = "The network has not been initialized. You must call addNode() before calling fit()";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), network_invalid);
|
||||
// Classname
|
||||
std::string invalid_classname = "Class Name not found in Network::features";
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), invalid_classname);
|
||||
// Invalid feature
|
||||
auto features2 = raw.featuresv;
|
||||
features2.pop_back();
|
||||
features2.push_back("duck");
|
||||
std::string invalid_feature = "Feature duck not found in Network::features";
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.classNamev, raw.statesv), invalid_feature);
|
||||
}
|
||||
|
||||
}
|
||||
TEST_CASE("Test and empty Node", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
REQUIRE_THROWS_AS(net.addNode(""), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addNode(""), "Node name cannot be empty");
|
||||
}
|
||||
TEST_CASE("Cicle in Network", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
net.addEdge("A", "B");
|
||||
net.addEdge("B", "C");
|
||||
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Adding this edge forms a cycle in the graph.");
|
||||
}
|
||||
TEST_CASE("Test max threads constructor", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
REQUIRE(net.getMaxThreads() == 0.95f);
|
||||
auto net2 = bayesnet::Network(4);
|
||||
REQUIRE(net2.getMaxThreads() == 4);
|
||||
auto net3 = bayesnet::Network(1.75);
|
||||
REQUIRE(net3.getMaxThreads() == 1.75);
|
||||
}
|
||||
TEST_CASE("Edges troubles", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
REQUIRE_THROWS_AS(net.addEdge("A", "C"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("A", "C"), "Child node C does not exist");
|
||||
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Parent node C does not exist");
|
||||
}
|
||||
TEST_CASE("Dump CPT", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto res = net.dump_cpt();
|
||||
std::string expected = R"(* class: (3) : [3]
|
||||
0.3333
|
||||
0.3333
|
||||
0.3333
|
||||
[ CPUFloatType{3} ]
|
||||
* petallength: (4) : [4, 3, 3]
|
||||
(1,.,.) =
|
||||
0.9388 0.1000 0.2000
|
||||
0.6250 0.0526 0.1667
|
||||
0.4000 0.0303 0.0196
|
||||
|
||||
(2,.,.) =
|
||||
0.0204 0.7000 0.4000
|
||||
0.1250 0.8421 0.1667
|
||||
0.2000 0.7273 0.0196
|
||||
|
||||
(3,.,.) =
|
||||
0.0204 0.1000 0.2000
|
||||
0.1250 0.0526 0.5000
|
||||
0.2000 0.1818 0.1373
|
||||
|
||||
(4,.,.) =
|
||||
0.0204 0.1000 0.2000
|
||||
0.1250 0.0526 0.1667
|
||||
0.2000 0.0606 0.8235
|
||||
[ CPUFloatType{4,3,3} ]
|
||||
* petalwidth: (3) : [3, 6, 3]
|
||||
(1,.,.) =
|
||||
0.5000 0.0417 0.0714
|
||||
0.3333 0.1111 0.0909
|
||||
0.5000 0.1000 0.2000
|
||||
0.7778 0.0909 0.0667
|
||||
0.8667 0.1000 0.0667
|
||||
0.9394 0.2500 0.1250
|
||||
|
||||
(2,.,.) =
|
||||
0.2500 0.9167 0.2857
|
||||
0.3333 0.7778 0.1818
|
||||
0.2500 0.8000 0.2000
|
||||
0.1111 0.8182 0.1333
|
||||
0.0667 0.7000 0.0667
|
||||
0.0303 0.5000 0.1250
|
||||
|
||||
(3,.,.) =
|
||||
0.2500 0.0417 0.6429
|
||||
0.3333 0.1111 0.7273
|
||||
0.2500 0.1000 0.6000
|
||||
0.1111 0.0909 0.8000
|
||||
0.0667 0.2000 0.8667
|
||||
0.0303 0.2500 0.7500
|
||||
[ CPUFloatType{3,6,3} ]
|
||||
* sepallength: (3) : [3, 3]
|
||||
0.8679 0.1321 0.0377
|
||||
0.0943 0.3019 0.0566
|
||||
0.0377 0.5660 0.9057
|
||||
[ CPUFloatType{3,3} ]
|
||||
* sepalwidth: (6) : [6, 3, 3]
|
||||
(1,.,.) =
|
||||
0.0392 0.5000 0.2857
|
||||
0.1000 0.4286 0.2500
|
||||
0.1429 0.2571 0.1887
|
||||
|
||||
(2,.,.) =
|
||||
0.0196 0.0833 0.1429
|
||||
0.1000 0.1429 0.2500
|
||||
0.1429 0.1429 0.1509
|
||||
|
||||
(3,.,.) =
|
||||
0.0392 0.0833 0.1429
|
||||
0.1000 0.1429 0.1250
|
||||
0.1429 0.1714 0.0566
|
||||
|
||||
(4,.,.) =
|
||||
0.1373 0.1667 0.1429
|
||||
0.1000 0.1905 0.1250
|
||||
0.1429 0.1429 0.2453
|
||||
|
||||
(5,.,.) =
|
||||
0.2549 0.0833 0.1429
|
||||
0.1000 0.0476 0.1250
|
||||
0.1429 0.2286 0.2453
|
||||
|
||||
(6,.,.) =
|
||||
0.5098 0.0833 0.1429
|
||||
0.5000 0.0476 0.1250
|
||||
0.2857 0.0571 0.1132
|
||||
[ CPUFloatType{6,3,3} ]
|
||||
)";
|
||||
REQUIRE(res == expected);
|
||||
}
|
||||
|
||||
|
90
tests/TestBayesNode.cc
Normal file
90
tests/TestBayesNode.cc
Normal file
@@ -0,0 +1,90 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <string>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
|
||||
|
||||
|
||||
TEST_CASE("Test Node children and parents", "[Node]")
|
||||
{
|
||||
auto node = bayesnet::Node("Node");
|
||||
REQUIRE(node.getName() == "Node");
|
||||
auto parent_1 = bayesnet::Node("P1");
|
||||
auto parent_2 = bayesnet::Node("P2");
|
||||
auto child_1 = bayesnet::Node("H1");
|
||||
auto child_2 = bayesnet::Node("H2");
|
||||
auto child_3 = bayesnet::Node("H3");
|
||||
node.addParent(&parent_1);
|
||||
node.addParent(&parent_2);
|
||||
node.addChild(&child_1);
|
||||
node.addChild(&child_2);
|
||||
node.addChild(&child_3);
|
||||
auto parents = node.getParents();
|
||||
auto children = node.getChildren();
|
||||
REQUIRE(parents.size() == 2);
|
||||
REQUIRE(children.size() == 3);
|
||||
REQUIRE(parents[0]->getName() == "P1");
|
||||
REQUIRE(parents[1]->getName() == "P2");
|
||||
REQUIRE(children[0]->getName() == "H1");
|
||||
REQUIRE(children[1]->getName() == "H2");
|
||||
REQUIRE(children[2]->getName() == "H3");
|
||||
node.removeParent(&parent_1);
|
||||
node.removeChild(&child_1);
|
||||
parents = node.getParents();
|
||||
children = node.getChildren();
|
||||
REQUIRE(parents.size() == 1);
|
||||
REQUIRE(children.size() == 2);
|
||||
node.clear();
|
||||
parents = node.getParents();
|
||||
children = node.getChildren();
|
||||
REQUIRE(parents.size() == 0);
|
||||
REQUIRE(children.size() == 0);
|
||||
}
|
||||
TEST_CASE("TEST MinFill method", "[Node]")
|
||||
{
|
||||
// Generate a test to test the minFill method of the Node class
|
||||
// Create a graph with 5 nodes
|
||||
// The graph is a chain with some additional edges
|
||||
// 0 -> 1,2,3
|
||||
// 1 -> 2,4
|
||||
// 2 -> 3
|
||||
// 3 -> 4
|
||||
auto node_0 = bayesnet::Node("0");
|
||||
auto node_1 = bayesnet::Node("1");
|
||||
auto node_2 = bayesnet::Node("2");
|
||||
auto node_3 = bayesnet::Node("3");
|
||||
auto node_4 = bayesnet::Node("4");
|
||||
// node 0
|
||||
node_0.addChild(&node_1);
|
||||
node_0.addChild(&node_2);
|
||||
node_0.addChild(&node_3);
|
||||
// node 1
|
||||
node_1.addChild(&node_2);
|
||||
node_1.addChild(&node_4);
|
||||
node_1.addParent(&node_0);
|
||||
// node 2
|
||||
node_2.addChild(&node_3);
|
||||
node_2.addChild(&node_4);
|
||||
node_2.addParent(&node_0);
|
||||
node_2.addParent(&node_1);
|
||||
// node 3
|
||||
node_3.addChild(&node_4);
|
||||
node_3.addParent(&node_0);
|
||||
node_3.addParent(&node_2);
|
||||
// node 4
|
||||
node_4.addParent(&node_1);
|
||||
node_4.addParent(&node_3);
|
||||
REQUIRE(node_0.minFill() == 3);
|
||||
REQUIRE(node_1.minFill() == 3);
|
||||
REQUIRE(node_2.minFill() == 6);
|
||||
REQUIRE(node_3.minFill() == 3);
|
||||
REQUIRE(node_4.minFill() == 1);
|
||||
}
|
184
tests/TestBoostAODE.cc
Normal file
184
tests/TestBoostAODE.cc
Normal file
@@ -0,0 +1,184 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
|
||||
TEST_CASE("Feature_select CFS", "[BoostAODE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "CFS"} });
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("Feature_select IWSS", "[BoostAODE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("Feature_select FCBF", "[BoostAODE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("Test used features in train note and score", "[BoostAODE]")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::BoostAODE(true);
|
||||
clf.setHyperparameters({
|
||||
{"order", "asc"},
|
||||
{"convergence", true},
|
||||
{"select_features","CFS"},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 72);
|
||||
REQUIRE(clf.getNumberOfEdges() == 120);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Voting vs proba", "[BoostAODE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostAODE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
clf.setHyperparameters({
|
||||
{"predict_voting",true},
|
||||
});
|
||||
auto score_voting = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_voting = clf.predict_proba(raw.Xv);
|
||||
REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
|
||||
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[83][2] == Catch::Approx(1.0).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.86121525).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.dump_cpt() == "");
|
||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
}
|
||||
TEST_CASE("Order asc, desc & random", "[BoostAODE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<std::string, double> scores{
|
||||
{"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
|
||||
};
|
||||
for (const std::string& order : { "asc", "desc", "rand" }) {
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({
|
||||
{"order", order},
|
||||
{"bisection", false},
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("BoostAODE order: " + order);
|
||||
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
TEST_CASE("Oddities", "[BoostAODE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto bad_hyper = nlohmann::json{
|
||||
{ { "order", "duck" } },
|
||||
{ { "select_features", "duck" } },
|
||||
{ { "maxTolerance", 0 } },
|
||||
{ { "maxTolerance", 5 } },
|
||||
};
|
||||
for (const auto& hyper : bad_hyper.items()) {
|
||||
INFO("BoostAODE hyper: " + hyper.value().dump());
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
}
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0 } }), std::invalid_argument);
|
||||
auto bad_hyper_fit = nlohmann::json{
|
||||
{ { "select_features","IWSS" }, { "threshold", -0.01 } },
|
||||
{ { "select_features","IWSS" }, { "threshold", 0.51 } },
|
||||
{ { "select_features","FCBF" }, { "threshold", 1e-8 } },
|
||||
{ { "select_features","FCBF" }, { "threshold", 1.01 } },
|
||||
};
|
||||
for (const auto& hyper : bad_hyper_fit.items()) {
|
||||
INFO("BoostAODE hyper: " + hyper.value().dump());
|
||||
clf.setHyperparameters(hyper.value());
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Bisection", "[BoostAODE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("mfeat-factors", true);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"block_update", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||
REQUIRE(clf.getNumberOfEdges() == 431);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Used features in train: 16 of 216");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
}
|
||||
|
||||
TEST_CASE("Block Update", "[BoostAODE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("mfeat-factors", true);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"block_update", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||
REQUIRE(clf.getNumberOfEdges() == 431);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Used features in train: 16 of 216");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
}
|
89
tests/TestFeatureSelection.cc
Normal file
89
tests/TestFeatureSelection.cc
Normal file
@@ -0,0 +1,89 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include "bayesnet/utils/BayesMetrics.h"
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold)
|
||||
{
|
||||
if (selector == "CFS") {
|
||||
return new bayesnet::CFS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights);
|
||||
} else if (selector == "FCBF") {
|
||||
return new bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, threshold);
|
||||
} else if (selector == "IWSS") {
|
||||
return new bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, threshold);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TEST_CASE("Features Selected", "[FeatureSelection]")
|
||||
{
|
||||
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
|
||||
|
||||
auto raw = RawDatasets(file_name, true);
|
||||
|
||||
SECTION("Test features selected, scores and sizes")
|
||||
{
|
||||
map<pair<std::string, std::string>, pair<std::vector<int>, std::vector<double>>> results = {
|
||||
{ {"glass", "CFS"}, { { 2, 3, 6, 1, 8, 4 }, {0.365513, 0.42895, 0.369809, 0.298294, 0.240952, 0.200915} } },
|
||||
{ {"iris", "CFS"}, { { 3, 2, 1, 0 }, {0.870521, 0.890375, 0.588155, 0.41843} } },
|
||||
{ {"ecoli", "CFS"}, { { 5, 0, 4, 2, 1, 6 }, {0.512319, 0.565381, 0.486025, 0.41087, 0.331423, 0.266251} } },
|
||||
{ {"diabetes", "CFS"}, { { 1, 5, 7, 6, 4, 2 }, {0.132858, 0.151209, 0.14244, 0.126591, 0.106028, 0.0825904} } },
|
||||
{ {"glass", "IWSS" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.42895, 0.359907, 0.273784, 0.223346} } },
|
||||
{ {"iris", "IWSS"}, { { 3, 2, 0 }, {0.870521, 0.890375, 0.585426} }},
|
||||
{ {"ecoli", "IWSS"}, { { 5, 6, 0, 1, 4 }, {0.512319, 0.550978, 0.475025, 0.382607, 0.308203} } },
|
||||
{ {"diabetes", "IWSS"}, { { 1, 5, 4, 7, 3 }, {0.132858, 0.151209, 0.136576, 0.122097, 0.0802232} } },
|
||||
{ {"glass", "FCBF" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.304911, 0.302109, 0.281621, 0.253297} } },
|
||||
{ {"iris", "FCBF"}, {{ 3, 2 }, {0.870521, 0.816401} }},
|
||||
{ {"ecoli", "FCBF"}, {{ 5, 0, 1, 4, 2 }, {0.512319, 0.350406, 0.260905, 0.203132, 0.11229} }},
|
||||
{ {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }}
|
||||
};
|
||||
double threshold;
|
||||
std::string selector;
|
||||
std::vector<std::pair<std::string, double>> selectors = {
|
||||
{ "CFS", 0.0 },
|
||||
{ "IWSS", 0.5 },
|
||||
{ "FCBF", 1e-7 }
|
||||
};
|
||||
for (const auto item : selectors) {
|
||||
selector = item.first; threshold = item.second;
|
||||
bayesnet::FeatureSelect* featureSelector = build_selector(raw, selector, threshold);
|
||||
featureSelector->fit();
|
||||
INFO("file_name: " << file_name << ", selector: " << selector);
|
||||
// Features
|
||||
auto expected_features = results.at({ file_name, selector }).first;
|
||||
std::vector<int> selected_features = featureSelector->getFeatures();
|
||||
REQUIRE(selected_features.size() == expected_features.size());
|
||||
REQUIRE(selected_features == expected_features);
|
||||
// Scores
|
||||
auto expected_scores = results.at({ file_name, selector }).second;
|
||||
std::vector<double> selected_scores = featureSelector->getScores();
|
||||
REQUIRE(selected_scores.size() == selected_features.size());
|
||||
for (int i = 0; i < selected_scores.size(); i++) {
|
||||
REQUIRE(selected_scores[i] == Catch::Approx(expected_scores[i]).epsilon(raw.epsilon));
|
||||
}
|
||||
delete featureSelector;
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_CASE("Oddities", "[FeatureSelection]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
// FCBF Limits
|
||||
REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
|
||||
}
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/config.h"
|
||||
|
||||
|
@@ -1,3 +1,9 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TEST_UTILS_H
|
||||
#define TEST_UTILS_H
|
||||
#include <torch/torch.h>
|
||||
|
File diff suppressed because it is too large
Load Diff
35
update_coverage.py
Normal file
35
update_coverage.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# ***************************************************************
|
||||
# SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
# SPDX-FileType: SOURCE
|
||||
# SPDX-License-Identifier: MIT
|
||||
# ***************************************************************
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
readme_file = "README.md"
|
||||
print("Updating coverage...")
|
||||
# Generate badge line
|
||||
output = subprocess.check_output(
|
||||
"lcov --summary " + sys.argv[1] + "/coverage.info|cut -d' ' -f4 |head -2|"
|
||||
"tail -1",
|
||||
shell=True,
|
||||
)
|
||||
value = float(output.decode("utf-8").strip().replace("%", ""))
|
||||
if value < 90:
|
||||
print("⛔Coverage is less than 90%. I won't update the badge.")
|
||||
sys.exit(1)
|
||||
percentage = output.decode("utf-8").strip().replace(".", ",")
|
||||
coverage_line = (
|
||||
f""
|
||||
)
|
||||
# Update README.md
|
||||
with open(readme_file, "r") as f:
|
||||
lines = f.readlines()
|
||||
with open(readme_file, "w") as f:
|
||||
for line in lines:
|
||||
if "Coverage" in line:
|
||||
f.write(coverage_line + "\n")
|
||||
else:
|
||||
f.write(line)
|
||||
print(f"✅Coverage updated with value: {percentage}")
|
Reference in New Issue
Block a user