Compare commits
32 Commits
Author | SHA1 | Date | |
---|---|---|---|
43ceefd2c9 | |||
e6501502d1 | |||
d84adf6172 | |||
268a86cbe0 | |||
fc4c93b299 | |||
86f2bc44fc | |||
f0f3d9ad6e | |||
9a323cd7a3 | |||
cb949ac7e5 | |||
2c297ea15d | |||
4e4b6e67f4 | |||
82847774ee | |||
d0955d9369 | |||
2d34eb8c89 | |||
0159c397fa | |||
0bbc8328a9 | |||
35ca862eca | |||
26eb58b104 | |||
6fcc15d39a | |||
9a14133be5 | |||
59c1cf5b3b | |||
8e9090d283 | |||
02bcab01be | |||
716748e18c | |||
0b31780d39 | |||
fa26aa80f7 | |||
3eb61905fb | |||
ca0ae4dacf | |||
b34869cc61 | |||
27a3e5a5e0 | |||
684443a788 | |||
6d9badc33b |
@ -1,4 +1,4 @@
|
||||
compilation_database_dir: build_debug
|
||||
compilation_database_dir: build_Debug
|
||||
output_directory: diagrams
|
||||
diagrams:
|
||||
BayesNet:
|
||||
|
@ -1,6 +1,6 @@
|
||||
FROM mcr.microsoft.com/devcontainers/cpp:ubuntu22.04
|
||||
|
||||
ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.22.2"
|
||||
ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.29.3"
|
||||
|
||||
# Optionally install the cmake for vcpkg
|
||||
COPY ./reinstall-cmake.sh /tmp/
|
||||
@ -23,7 +23,7 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test
|
||||
RUN apt-get update
|
||||
|
||||
# Install GCC 13.1
|
||||
RUN apt-get install -y gcc-13 g++-13
|
||||
RUN apt-get install -y gcc-13 g++-13 doxygen
|
||||
|
||||
# Install lcov 2.1
|
||||
RUN wget --quiet https://github.com/linux-test-project/lcov/releases/download/v2.1/lcov-2.1.tar.gz && \
|
||||
|
8
.gitmodules
vendored
8
.gitmodules
vendored
@ -1,8 +1,3 @@
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
main = main
|
||||
update = merge
|
||||
[submodule "lib/json"]
|
||||
path = lib/json
|
||||
url = https://github.com/nlohmann/json.git
|
||||
@ -21,3 +16,6 @@
|
||||
[submodule "tests/lib/Files"]
|
||||
path = tests/lib/Files
|
||||
url = https://github.com/rmontanana/ArffFiles
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
|
6
.vscode/launch.json
vendored
6
.vscode/launch.json
vendored
@ -14,11 +14,11 @@
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "test",
|
||||
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet",
|
||||
"program": "${workspaceFolder}/build_Debug/tests/TestBayesNet",
|
||||
"args": [
|
||||
"[Node]"
|
||||
"No features selected"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build_debug/tests"
|
||||
"cwd": "${workspaceFolder}/build_Debug/tests"
|
||||
},
|
||||
{
|
||||
"name": "(gdb) Launch",
|
||||
|
23
CHANGELOG.md
23
CHANGELOG.md
@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [1.0.6] 2024-11-23
|
||||
|
||||
### Fixed
|
||||
|
||||
- Prevent existing edges to be added to the network in the `add_edge` method.
|
||||
- Don't allow to add nodes or edges on already fiited networks.
|
||||
- Number of threads spawned
|
||||
- Network class tests
|
||||
|
||||
### Added
|
||||
|
||||
- Library logo generated with <https://openart.ai> to README.md
|
||||
@ -14,14 +23,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- *convergence_best* hyperparameter to the BoostAODE class, to control the way the prior accuracy is computed if convergence is set. Default value is *false*.
|
||||
- SPnDE model.
|
||||
- A2DE model.
|
||||
- BoostA2DE model.
|
||||
- A2DE & SPnDE tests.
|
||||
- Add tests to reach 99% of coverage.
|
||||
- Add tests to check the correct version of the mdlp, folding and json libraries.
|
||||
- Library documentation generated with Doxygen.
|
||||
- Link to documentation in the README.md.
|
||||
- Three types of smoothing the Bayesian Network ORIGINAL, LAPLACE and CESTNIK.
|
||||
|
||||
### Internal
|
||||
|
||||
- Fixed doxygen optional dependency
|
||||
- Add env parallel variable to Makefile
|
||||
- Add CountingSemaphore class to manage the number of threads spawned.
|
||||
- Ignore CUDA language in CMake CodeCoverage module.
|
||||
- Update mdlp library as a git submodule.
|
||||
- Create library ShuffleArffFile to limit the number of samples with a parameter and shuffle them.
|
||||
- Refactor catch2 library location to test/lib
|
||||
- Refactor loadDataset function in tests.
|
||||
@ -32,6 +48,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Add a Makefile target (doc) to generate the documentation.
|
||||
- Add a Makefile target (doc-install) to install the documentation.
|
||||
|
||||
### Libraries versions
|
||||
|
||||
- mdlp: 2.0.1
|
||||
- Folding: 1.1.0
|
||||
- json: 3.11
|
||||
- ArffFiles: 1.1.0
|
||||
|
||||
## [1.0.5] 2024-04-20
|
||||
|
||||
### Added
|
||||
|
@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(BayesNet
|
||||
VERSION 1.0.5.1
|
||||
VERSION 1.0.6
|
||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||
LANGUAGES CXX
|
||||
@ -26,7 +26,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
|
||||
endif()
|
||||
@ -49,11 +49,12 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(CODE_COVERAGE ON)
|
||||
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
|
||||
|
||||
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
message(STATUS "Languages=${LANGUAGES}")
|
||||
if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE("Code coverage enabled")
|
||||
MESSAGE(STATUS "Code coverage enabled")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
|
||||
@ -63,6 +64,7 @@ endif (ENABLE_CLANG_TIDY)
|
||||
|
||||
# External libraries - dependencies of BayesNet
|
||||
# ---------------------------------------------
|
||||
|
||||
# include(FetchContent)
|
||||
add_git_submodule("lib/json")
|
||||
add_git_submodule("lib/mdlp")
|
||||
@ -75,7 +77,7 @@ add_subdirectory(bayesnet)
|
||||
# Testing
|
||||
# -------
|
||||
if (ENABLE_TESTING)
|
||||
MESSAGE("Testing enabled")
|
||||
MESSAGE(STATUS "Testing enabled")
|
||||
add_subdirectory(tests/lib/catch2)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
@ -93,10 +95,14 @@ install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h DES
|
||||
# Documentation
|
||||
# -------------
|
||||
find_package(Doxygen)
|
||||
set(DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs)
|
||||
set(doxyfile_in ${DOC_DIR}/Doxyfile.in)
|
||||
set(doxyfile ${DOC_DIR}/Doxyfile)
|
||||
configure_file(${doxyfile_in} ${doxyfile} @ONLY)
|
||||
doxygen_add_docs(doxygen
|
||||
WORKING_DIRECTORY ${DOC_DIR}
|
||||
if (Doxygen_FOUND)
|
||||
set(DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs)
|
||||
set(doxyfile_in ${DOC_DIR}/Doxyfile.in)
|
||||
set(doxyfile ${DOC_DIR}/Doxyfile)
|
||||
configure_file(${doxyfile_in} ${doxyfile} @ONLY)
|
||||
doxygen_add_docs(doxygen
|
||||
WORKING_DIRECTORY ${DOC_DIR}
|
||||
CONFIG_FILE ${doxyfile})
|
||||
else (Doxygen_FOUND)
|
||||
MESSAGE("* Doxygen not found")
|
||||
endif (Doxygen_FOUND)
|
||||
|
13
Makefile
13
Makefile
@ -12,7 +12,6 @@ plantuml = plantuml
|
||||
lcov = lcov
|
||||
genhtml = genhtml
|
||||
dot = dot
|
||||
n_procs = -j 16
|
||||
docsrcdir = docs/manual
|
||||
mansrcdir = docs/man3
|
||||
mandestdir = /usr/local/share/man
|
||||
@ -44,7 +43,7 @@ setup: ## Install dependencies for tests and coverage
|
||||
fi
|
||||
@echo "* You should install plantuml & graphviz for the diagrams"
|
||||
|
||||
diagrams: ## Create an UML class diagram & depnendency of the project (diagrams/BayesNet.png)
|
||||
diagrams: ## Create an UML class diagram & dependency of the project (diagrams/BayesNet.png)
|
||||
@which $(plantuml) || (echo ">>> Please install plantuml"; exit 1)
|
||||
@which $(dot) || (echo ">>> Please install graphviz"; exit 1)
|
||||
@which $(clang-uml) || (echo ">>> Please install clang-uml"; exit 1)
|
||||
@ -59,10 +58,10 @@ diagrams: ## Create an UML class diagram & depnendency of the project (diagrams/
|
||||
@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
|
||||
|
||||
buildd: ## Build the debug targets
|
||||
cmake --build $(f_debug) -t $(app_targets) $(n_procs)
|
||||
cmake --build $(f_debug) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
|
||||
|
||||
buildr: ## Build the release targets
|
||||
cmake --build $(f_release) -t $(app_targets) $(n_procs)
|
||||
cmake --build $(f_release) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
|
||||
|
||||
clean: ## Clean the tests info
|
||||
@echo ">>> Cleaning Debug BayesNet tests...";
|
||||
@ -106,7 +105,7 @@ opt = ""
|
||||
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
|
||||
@echo ">>> Running BayesNet tests...";
|
||||
@$(MAKE) clean
|
||||
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
|
||||
@cmake --build $(f_debug) -t $(test_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
|
||||
@for t in $(test_targets); do \
|
||||
echo ">>> Running $$t...";\
|
||||
if [ -f $(f_debug)/tests/$$t ]; then \
|
||||
@ -119,7 +118,7 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
|
||||
|
||||
coverage: ## Run tests and generate coverage report (build/index.html)
|
||||
@echo ">>> Building tests with coverage..."
|
||||
@which $(lcov) || (echo ">>> Please install lcov"; exit 1)
|
||||
@which $(lcov) || (echo ">>ease install lcov"; exit 1)
|
||||
@if [ ! -f $(f_debug)/tests/coverage.info ] ; then $(MAKE) test ; fi
|
||||
@echo ">>> Building report..."
|
||||
@cd $(f_debug)/tests; \
|
||||
@ -173,7 +172,7 @@ docdir = ""
|
||||
doc-install: ## Install documentation
|
||||
@echo ">>> Installing documentation..."
|
||||
@if [ "$(docdir)" = "" ]; then \
|
||||
echo "docdir parameter has to be set when calling doc-install"; \
|
||||
echo "docdir parameter has to be set when calling doc-install, i.e. docdir=../bayesnet_help"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@if [ ! -d $(docdir) ]; then \
|
||||
|
@ -7,9 +7,10 @@
|
||||
[![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
|
||||
[![Coverage Badge](https://img.shields.io/badge/Coverage-97,3%25-green)](html/index.html)
|
||||
[![Coverage Badge](https://img.shields.io/badge/Coverage-99,1%25-green)](html/index.html)
|
||||
[![DOI](https://zenodo.org/badge/667782806.svg)](https://doi.org/10.5281/zenodo.14210344)
|
||||
|
||||
Bayesian Network Classifiers using libtorch from scratch
|
||||
Bayesian Network Classifiers library
|
||||
|
||||
## Dependencies
|
||||
|
||||
@ -71,6 +72,8 @@ make sample fname=tests/data/glass.arff
|
||||
|
||||
#### - AODE
|
||||
|
||||
#### - A2DE
|
||||
|
||||
#### - [BoostAODE](docs/BoostAODE.md)
|
||||
|
||||
#### - BoostA2DE
|
||||
|
@ -8,16 +8,18 @@
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "bayesnet/network/Network.h"
|
||||
|
||||
namespace bayesnet {
|
||||
enum status_t { NORMAL, WARNING, ERROR };
|
||||
class BaseClassifier {
|
||||
public:
|
||||
// X is nxm std::vector, y is nx1 std::vector
|
||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
// X is nxm tensor, y is nx1 tensor
|
||||
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||
virtual ~BaseClassifier() = default;
|
||||
torch::Tensor virtual predict(torch::Tensor& X) = 0;
|
||||
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
|
||||
@ -39,7 +41,7 @@ namespace bayesnet {
|
||||
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
||||
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
||||
protected:
|
||||
virtual void trainModel(const torch::Tensor& weights) = 0;
|
||||
virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||
std::vector<std::string> validHyperparameters;
|
||||
};
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${BayesNet_SOURCE_DIR}
|
||||
@ -9,4 +9,4 @@ include_directories(
|
||||
file(GLOB_RECURSE Sources "*.cc")
|
||||
|
||||
add_library(BayesNet ${Sources})
|
||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
||||
target_link_libraries(BayesNet fimdlp "${TORCH_LIBRARIES}")
|
||||
|
@ -11,7 +11,7 @@
|
||||
namespace bayesnet {
|
||||
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
|
||||
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
||||
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
this->features = features;
|
||||
this->className = className;
|
||||
@ -23,7 +23,7 @@ namespace bayesnet {
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
model.initialize();
|
||||
buildModel(weights);
|
||||
trainModel(weights);
|
||||
trainModel(weights, smoothing);
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
@ -41,20 +41,20 @@ namespace bayesnet {
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
}
|
||||
void Classifier::trainModel(const torch::Tensor& weights)
|
||||
void Classifier::trainModel(const torch::Tensor& weights, Smoothing_t smoothing)
|
||||
{
|
||||
model.fit(dataset, weights, features, className, states);
|
||||
model.fit(dataset, weights, features, className, states, smoothing);
|
||||
}
|
||||
// X is nxm where n is the number of features and m the number of samples
|
||||
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
dataset = X;
|
||||
buildDataset(y);
|
||||
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
// X is nxm where n is the number of features and m the number of samples
|
||||
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
|
||||
for (int i = 0; i < X.size(); ++i) {
|
||||
@ -63,18 +63,18 @@ namespace bayesnet {
|
||||
auto ytmp = torch::tensor(y, torch::kInt32);
|
||||
buildDataset(ytmp);
|
||||
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
this->dataset = dataset;
|
||||
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
this->dataset = dataset;
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
void Classifier::checkFitParameters()
|
||||
{
|
||||
|
@ -8,7 +8,6 @@
|
||||
#define CLASSIFIER_H
|
||||
#include <torch/torch.h>
|
||||
#include "bayesnet/utils/BayesMetrics.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
#include "bayesnet/BaseClassifier.h"
|
||||
|
||||
namespace bayesnet {
|
||||
@ -16,10 +15,10 @@ namespace bayesnet {
|
||||
public:
|
||||
Classifier(Network model);
|
||||
virtual ~Classifier() = default;
|
||||
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override;
|
||||
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void addNodes();
|
||||
int getNumberOfNodes() const override;
|
||||
int getNumberOfEdges() const override;
|
||||
@ -51,10 +50,10 @@ namespace bayesnet {
|
||||
std::vector<std::string> notes; // Used to store messages occurred during the fit process
|
||||
void checkFitParameters();
|
||||
virtual void buildModel(const torch::Tensor& weights) = 0;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void buildDataset(torch::Tensor& y);
|
||||
private:
|
||||
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
namespace bayesnet {
|
||||
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
|
||||
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
@ -19,7 +19,7 @@ namespace bayesnet {
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
|
||||
KDB::fit(dataset, features, className, states);
|
||||
KDB::fit(dataset, features, className, states, smoothing);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ namespace bayesnet {
|
||||
public:
|
||||
explicit KDBLd(int k);
|
||||
virtual ~KDBLd() = default;
|
||||
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
|
@ -70,7 +70,7 @@ namespace bayesnet {
|
||||
states[pFeatures[index]] = xStates;
|
||||
}
|
||||
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
|
||||
model.fit(pDataset, weights, pFeatures, pClassName, states);
|
||||
model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::ORIGINAL);
|
||||
}
|
||||
return states;
|
||||
}
|
||||
|
@ -8,25 +8,25 @@
|
||||
|
||||
namespace bayesnet {
|
||||
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
|
||||
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
Xf = X_;
|
||||
y = y_;
|
||||
return commonFit(features_, className_, states_);
|
||||
return commonFit(features_, className_, states_, smoothing);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
if (!torch::is_floating_point(dataset)) {
|
||||
throw std::runtime_error("Dataset must be a floating point tensor");
|
||||
}
|
||||
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
|
||||
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
|
||||
return commonFit(features_, className_, states_);
|
||||
return commonFit(features_, className_, states_, smoothing);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
features = features_;
|
||||
className = className_;
|
||||
@ -34,7 +34,7 @@ namespace bayesnet {
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
|
||||
SPODE::fit(dataset, features, className, states);
|
||||
SPODE::fit(dataset, features, className, states, smoothing);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
}
|
||||
|
@ -14,10 +14,10 @@ namespace bayesnet {
|
||||
public:
|
||||
explicit SPODELd(int root);
|
||||
virtual ~SPODELd() = default;
|
||||
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states);
|
||||
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
|
||||
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
};
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
namespace bayesnet {
|
||||
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
|
||||
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
@ -19,7 +19,7 @@ namespace bayesnet {
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
|
||||
TAN::fit(dataset, features, className, states);
|
||||
TAN::fit(dataset, features, className, states, smoothing);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
|
||||
|
@ -15,10 +15,9 @@ namespace bayesnet {
|
||||
public:
|
||||
TANLd();
|
||||
virtual ~TANLd() = default;
|
||||
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
std::vector<std::string> graph(const std::string& name = "TAN") const override;
|
||||
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "TANLd") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
};
|
||||
}
|
||||
#endif // !TANLD_H
|
@ -10,7 +10,7 @@ namespace bayesnet {
|
||||
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
|
||||
{
|
||||
}
|
||||
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
@ -20,8 +20,9 @@ namespace bayesnet {
|
||||
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
|
||||
Ensemble::fit(dataset, features, className, states);
|
||||
// 1st we need to fit the model to build the normal AODE structure, Ensemble::fit
|
||||
// calls buildModel to initialize the base models
|
||||
Ensemble::fit(dataset, features, className, states, smoothing);
|
||||
return *this;
|
||||
|
||||
}
|
||||
@ -34,10 +35,10 @@ namespace bayesnet {
|
||||
n_models = models.size();
|
||||
significanceModels = std::vector<double>(n_models, 1.0);
|
||||
}
|
||||
void AODELd::trainModel(const torch::Tensor& weights)
|
||||
void AODELd::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
for (const auto& model : models) {
|
||||
model->fit(Xf, y, features, className, states);
|
||||
model->fit(Xf, y, features, className, states, smoothing);
|
||||
}
|
||||
}
|
||||
std::vector<std::string> AODELd::graph(const std::string& name) const
|
||||
|
@ -15,10 +15,10 @@ namespace bayesnet {
|
||||
public:
|
||||
AODELd(bool predict_voting = true);
|
||||
virtual ~AODELd() = default;
|
||||
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
|
||||
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
};
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ namespace bayesnet {
|
||||
BoostA2DE::BoostA2DE(bool predict_voting) : Boost(predict_voting)
|
||||
{
|
||||
}
|
||||
std::vector<int> BoostA2DE::initializeModels()
|
||||
std::vector<int> BoostA2DE::initializeModels(const Smoothing_t smoothing)
|
||||
{
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
@ -32,7 +32,7 @@ namespace bayesnet {
|
||||
for (int j = i + 1; j < featuresSelected.size(); j++) {
|
||||
auto parents = { featuresSelected[i], featuresSelected[j] };
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPnDE>(parents);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
n_models++;
|
||||
@ -41,7 +41,7 @@ namespace bayesnet {
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void BoostA2DE::trainModel(const torch::Tensor& weights)
|
||||
void BoostA2DE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
//
|
||||
// Logging setup
|
||||
@ -58,7 +58,10 @@ namespace bayesnet {
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels();
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
if (featuresUsed.size() == 0) {
|
||||
return;
|
||||
}
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
@ -96,7 +99,7 @@ namespace bayesnet {
|
||||
pairSelection.erase(pairSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPnDE>(std::vector<int>({ feature_pair.first, feature_pair.second }));
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
|
@ -17,9 +17,9 @@ namespace bayesnet {
|
||||
virtual ~BoostA2DE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "BoostA2DE") const override;
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::vector<int> initializeModels();
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
};
|
||||
}
|
||||
#endif
|
@ -16,13 +16,13 @@ namespace bayesnet {
|
||||
BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting)
|
||||
{
|
||||
}
|
||||
std::vector<int> BoostAODE::initializeModels()
|
||||
std::vector<int> BoostAODE::initializeModels(const Smoothing_t smoothing)
|
||||
{
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
for (const int& feature : featuresSelected) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
n_models++;
|
||||
@ -30,7 +30,7 @@ namespace bayesnet {
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
//
|
||||
// Logging setup
|
||||
@ -47,7 +47,7 @@ namespace bayesnet {
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels();
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
@ -89,7 +89,7 @@ namespace bayesnet {
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
|
@ -18,9 +18,9 @@ namespace bayesnet {
|
||||
virtual ~BoostAODE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::vector<int> initializeModels();
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
};
|
||||
}
|
||||
#endif
|
@ -3,22 +3,21 @@
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "Ensemble.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting)
|
||||
{
|
||||
|
||||
};
|
||||
const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted";
|
||||
void Ensemble::trainModel(const torch::Tensor& weights)
|
||||
void Ensemble::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
n_models = models.size();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
// fit with std::vectors
|
||||
models[i]->fit(dataset, features, className, states);
|
||||
models[i]->fit(dataset, features, className, states, smoothing);
|
||||
}
|
||||
}
|
||||
std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X)
|
||||
@ -85,17 +84,9 @@ namespace bayesnet {
|
||||
{
|
||||
auto n_states = models[0]->getClassNumStates();
|
||||
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
y_pred += ypredict * significanceModels[i];
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
y_pred += ypredict * significanceModels[i];
|
||||
}
|
||||
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
|
||||
y_pred /= sum;
|
||||
@ -105,23 +96,15 @@ namespace bayesnet {
|
||||
{
|
||||
auto n_states = models[0]->getClassNumStates();
|
||||
std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0));
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
assert(ypredict.size() == y_pred.size());
|
||||
assert(ypredict[0].size() == y_pred[0].size());
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
// Multiply each prediction by the significance of the model and then add it to the final prediction
|
||||
for (auto j = 0; j < ypredict.size(); ++j) {
|
||||
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
|
||||
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
|
||||
}
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
assert(ypredict.size() == y_pred.size());
|
||||
assert(ypredict[0].size() == y_pred[0].size());
|
||||
// Multiply each prediction by the significance of the model and then add it to the final prediction
|
||||
for (auto j = 0; j < ypredict.size(); ++j) {
|
||||
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
|
||||
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
|
||||
}
|
||||
}
|
||||
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
|
||||
//Divide each element of the prediction by the sum of the significances
|
||||
@ -141,17 +124,9 @@ namespace bayesnet {
|
||||
{
|
||||
// Build a m x n_models tensor with the predictions of each model
|
||||
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict(X);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
y_pred.index_put_({ "...", i }, ypredict);
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
auto ypredict = models[i]->predict(X);
|
||||
y_pred.index_put_({ "...", i }, ypredict);
|
||||
}
|
||||
return voting(y_pred);
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ namespace bayesnet {
|
||||
unsigned n_models;
|
||||
std::vector<std::unique_ptr<Classifier>> models;
|
||||
std::vector<double> significanceModels;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
bool predict_voting;
|
||||
};
|
||||
}
|
||||
|
@ -5,20 +5,20 @@
|
||||
// ***************************************************************
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include "Network.h"
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
#include <pthread.h>
|
||||
#include <fstream>
|
||||
namespace bayesnet {
|
||||
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
Network::Network() : fitted{ false }, classNumStates{ 0 }
|
||||
{
|
||||
}
|
||||
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
{
|
||||
|
||||
}
|
||||
Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
|
||||
maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples)
|
||||
Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
|
||||
fitted(other.fitted), samples(other.samples)
|
||||
{
|
||||
if (samples.defined())
|
||||
samples = samples.clone();
|
||||
@ -35,16 +35,15 @@ namespace bayesnet {
|
||||
nodes.clear();
|
||||
samples = torch::Tensor();
|
||||
}
|
||||
float Network::getMaxThreads() const
|
||||
{
|
||||
return maxThreads;
|
||||
}
|
||||
torch::Tensor& Network::getSamples()
|
||||
{
|
||||
return samples;
|
||||
}
|
||||
void Network::addNode(const std::string& name)
|
||||
{
|
||||
if (fitted) {
|
||||
throw std::invalid_argument("Cannot add node to a fitted network. Initialize first.");
|
||||
}
|
||||
if (name == "") {
|
||||
throw std::invalid_argument("Node name cannot be empty");
|
||||
}
|
||||
@ -94,12 +93,21 @@ namespace bayesnet {
|
||||
}
|
||||
void Network::addEdge(const std::string& parent, const std::string& child)
|
||||
{
|
||||
if (fitted) {
|
||||
throw std::invalid_argument("Cannot add edge to a fitted network. Initialize first.");
|
||||
}
|
||||
if (nodes.find(parent) == nodes.end()) {
|
||||
throw std::invalid_argument("Parent node " + parent + " does not exist");
|
||||
}
|
||||
if (nodes.find(child) == nodes.end()) {
|
||||
throw std::invalid_argument("Child node " + child + " does not exist");
|
||||
}
|
||||
// Check if the edge is already in the graph
|
||||
for (auto& node : nodes[parent]->getChildren()) {
|
||||
if (node->getName() == child) {
|
||||
throw std::invalid_argument("Edge " + parent + " -> " + child + " already exists");
|
||||
}
|
||||
}
|
||||
// Temporarily add edge to check for cycles
|
||||
nodes[parent]->addChild(nodes[child].get());
|
||||
nodes[child]->addParent(nodes[parent].get());
|
||||
@ -155,7 +163,7 @@ namespace bayesnet {
|
||||
classNumStates = nodes.at(className)->getNumStates();
|
||||
}
|
||||
// X comes in nxm, where n is the number of features and m the number of samples
|
||||
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
|
||||
this->className = className;
|
||||
@ -164,17 +172,17 @@ namespace bayesnet {
|
||||
for (int i = 0; i < featureNames.size(); ++i) {
|
||||
auto row_feature = X.index({ i, "..." });
|
||||
}
|
||||
completeFit(states, weights);
|
||||
completeFit(states, weights, smoothing);
|
||||
}
|
||||
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
|
||||
this->className = className;
|
||||
this->samples = samples;
|
||||
completeFit(states, weights);
|
||||
completeFit(states, weights, smoothing);
|
||||
}
|
||||
// input_data comes in nxm, where n is the number of features and m the number of samples
|
||||
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
|
||||
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
|
||||
@ -185,17 +193,43 @@ namespace bayesnet {
|
||||
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
|
||||
}
|
||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||
completeFit(states, weights);
|
||||
completeFit(states, weights, smoothing);
|
||||
}
|
||||
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
setStates(states);
|
||||
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
|
||||
std::vector<std::thread> threads;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
const double n_samples = static_cast<double>(samples.size(1));
|
||||
auto worker = [&](std::pair<const std::string, std::unique_ptr<Node>>& node, int i) {
|
||||
std::string threadName = "FitWorker-" + std::to_string(i);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
double numStates = static_cast<double>(node.second->getNumStates());
|
||||
double smoothing_factor;
|
||||
switch (smoothing) {
|
||||
case Smoothing_t::ORIGINAL:
|
||||
smoothing_factor = 1.0 / n_samples;
|
||||
break;
|
||||
case Smoothing_t::LAPLACE:
|
||||
smoothing_factor = 1.0;
|
||||
break;
|
||||
case Smoothing_t::CESTNIK:
|
||||
smoothing_factor = 1 / numStates;
|
||||
break;
|
||||
default:
|
||||
smoothing_factor = 0.0; // No smoothing
|
||||
}
|
||||
node.second->computeCPT(samples, features, smoothing_factor, weights);
|
||||
semaphore.release();
|
||||
};
|
||||
int i = 0;
|
||||
for (auto& node : nodes) {
|
||||
threads.emplace_back([this, &node, &weights]() {
|
||||
node.second->computeCPT(samples, features, laplaceSmoothing, weights);
|
||||
});
|
||||
semaphore.acquire();
|
||||
threads.emplace_back(worker, std::ref(node), i++);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
@ -207,14 +241,38 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw std::logic_error("You must call fit() before calling predict()");
|
||||
}
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (samples.size(0) != features.size() - 1) {
|
||||
throw std::invalid_argument("(T) Sample size (" + std::to_string(samples.size(0)) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
torch::Tensor result;
|
||||
std::vector<std::thread> threads;
|
||||
std::mutex mtx;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
|
||||
for (int i = 0; i < samples.size(1); ++i) {
|
||||
const torch::Tensor sample = samples.index({ "...", i });
|
||||
auto worker = [&](const torch::Tensor& sample, int i) {
|
||||
std::string threadName = "PredictWorker-" + std::to_string(i);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
auto psample = predict_sample(sample);
|
||||
auto temp = torch::tensor(psample, torch::kFloat64);
|
||||
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
|
||||
result.index_put_({ i, "..." }, temp);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
result.index_put_({ i, "..." }, temp);
|
||||
}
|
||||
semaphore.release();
|
||||
};
|
||||
for (int i = 0; i < samples.size(1); ++i) {
|
||||
semaphore.acquire();
|
||||
const torch::Tensor sample = samples.index({ "...", i });
|
||||
threads.emplace_back(worker, sample, i);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
if (proba)
|
||||
return result;
|
||||
@ -239,18 +297,38 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw std::logic_error("You must call fit() before calling predict()");
|
||||
}
|
||||
std::vector<int> predictions;
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (tsamples.size() != features.size() - 1) {
|
||||
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::vector<int> predictions(tsamples[0].size(), 0);
|
||||
std::vector<int> sample;
|
||||
std::vector<std::thread> threads;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
auto worker = [&](const std::vector<int>& sample, const int row, int& prediction) {
|
||||
std::string threadName = "(V)PWorker-" + std::to_string(row);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
auto classProbabilities = predict_sample(sample);
|
||||
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
|
||||
int predictedClass = distance(classProbabilities.begin(), maxElem);
|
||||
prediction = predictedClass;
|
||||
semaphore.release();
|
||||
};
|
||||
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||
sample.clear();
|
||||
for (int col = 0; col < tsamples.size(); ++col) {
|
||||
sample.push_back(tsamples[col][row]);
|
||||
}
|
||||
std::vector<double> classProbabilities = predict_sample(sample);
|
||||
// Find the class with the maximum posterior probability
|
||||
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
|
||||
int predictedClass = distance(classProbabilities.begin(), maxElem);
|
||||
predictions.push_back(predictedClass);
|
||||
semaphore.acquire();
|
||||
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
@ -261,14 +339,36 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw std::logic_error("You must call fit() before calling predict_proba()");
|
||||
}
|
||||
std::vector<std::vector<double>> predictions;
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (tsamples.size() != features.size() - 1) {
|
||||
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::vector<std::vector<double>> predictions(tsamples[0].size(), std::vector<double>(classNumStates, 0.0));
|
||||
std::vector<int> sample;
|
||||
std::vector<std::thread> threads;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
auto worker = [&](const std::vector<int>& sample, int row, std::vector<double>& predictions) {
|
||||
std::string threadName = "(V)PWorker-" + std::to_string(row);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
std::vector<double> classProbabilities = predict_sample(sample);
|
||||
predictions = classProbabilities;
|
||||
semaphore.release();
|
||||
};
|
||||
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||
sample.clear();
|
||||
for (int col = 0; col < tsamples.size(); ++col) {
|
||||
sample.push_back(tsamples[col][row]);
|
||||
}
|
||||
predictions.push_back(predict_sample(sample));
|
||||
semaphore.acquire();
|
||||
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
@ -286,11 +386,6 @@ namespace bayesnet {
|
||||
// Return 1xn std::vector of probabilities
|
||||
std::vector<double> Network::predict_sample(const std::vector<int>& sample)
|
||||
{
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (sample.size() != features.size() - 1) {
|
||||
throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::map<std::string, int> evidence;
|
||||
for (int i = 0; i < sample.size(); ++i) {
|
||||
evidence[features[i]] = sample[i];
|
||||
@ -300,44 +395,26 @@ namespace bayesnet {
|
||||
// Return 1xn std::vector of probabilities
|
||||
std::vector<double> Network::predict_sample(const torch::Tensor& sample)
|
||||
{
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (sample.size(0) != features.size() - 1) {
|
||||
throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::map<std::string, int> evidence;
|
||||
for (int i = 0; i < sample.size(0); ++i) {
|
||||
evidence[features[i]] = sample[i].item<int>();
|
||||
}
|
||||
return exactInference(evidence);
|
||||
}
|
||||
double Network::computeFactor(std::map<std::string, int>& completeEvidence)
|
||||
{
|
||||
double result = 1.0;
|
||||
for (auto& node : getNodes()) {
|
||||
result *= node.second->getFactorValue(completeEvidence);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
|
||||
{
|
||||
std::vector<double> result(classNumStates, 0.0);
|
||||
std::vector<std::thread> threads;
|
||||
std::mutex mtx;
|
||||
auto completeEvidence = std::map<std::string, int>(evidence);
|
||||
for (int i = 0; i < classNumStates; ++i) {
|
||||
threads.emplace_back([this, &result, &evidence, i, &mtx]() {
|
||||
auto completeEvidence = std::map<std::string, int>(evidence);
|
||||
completeEvidence[getClassName()] = i;
|
||||
double factor = computeFactor(completeEvidence);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
result[i] = factor;
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
completeEvidence[getClassName()] = i;
|
||||
double partial = 1.0;
|
||||
for (auto& node : getNodes()) {
|
||||
partial *= node.second->getFactorValue(completeEvidence);
|
||||
}
|
||||
result[i] = partial;
|
||||
}
|
||||
// Normalize result
|
||||
double sum = accumulate(result.begin(), result.end(), 0.0);
|
||||
double sum = std::accumulate(result.begin(), result.end(), 0.0);
|
||||
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
|
||||
return result;
|
||||
}
|
||||
|
@ -12,14 +12,18 @@
|
||||
#include "Node.h"
|
||||
|
||||
namespace bayesnet {
|
||||
enum class Smoothing_t {
|
||||
NONE = -1,
|
||||
ORIGINAL = 0,
|
||||
LAPLACE,
|
||||
CESTNIK
|
||||
};
|
||||
class Network {
|
||||
public:
|
||||
Network();
|
||||
explicit Network(float);
|
||||
explicit Network(const Network&);
|
||||
~Network() = default;
|
||||
torch::Tensor& getSamples();
|
||||
float getMaxThreads() const;
|
||||
void addNode(const std::string&);
|
||||
void addEdge(const std::string&, const std::string&);
|
||||
std::map<std::string, std::unique_ptr<Node>>& getNodes();
|
||||
@ -32,9 +36,9 @@ namespace bayesnet {
|
||||
/*
|
||||
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
|
||||
*/
|
||||
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
|
||||
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
|
||||
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
|
||||
@ -50,19 +54,16 @@ namespace bayesnet {
|
||||
private:
|
||||
std::map<std::string, std::unique_ptr<Node>> nodes;
|
||||
bool fitted;
|
||||
float maxThreads = 0.95;
|
||||
int classNumStates;
|
||||
std::vector<std::string> features; // Including classname
|
||||
std::string className;
|
||||
double laplaceSmoothing;
|
||||
torch::Tensor samples; // n+1xm tensor used to fit the model
|
||||
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
|
||||
std::vector<double> predict_sample(const std::vector<int>&);
|
||||
std::vector<double> predict_sample(const torch::Tensor&);
|
||||
std::vector<double> exactInference(std::map<std::string, int>&);
|
||||
double computeFactor(std::map<std::string, int>&);
|
||||
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
|
||||
void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void setStates(const std::map<std::string, std::vector<int>>&);
|
||||
};
|
||||
}
|
||||
|
@ -90,51 +90,54 @@ namespace bayesnet {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
|
||||
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
|
||||
{
|
||||
dimensions.clear();
|
||||
// Get dimensions of the CPT
|
||||
dimensions.push_back(numStates);
|
||||
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
|
||||
// Create a tensor of zeros with the dimensions of the CPT
|
||||
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
|
||||
cpTable = torch::zeros(dimensions, torch::kDouble) + smoothing;
|
||||
// Fill table with counts
|
||||
auto pos = find(features.begin(), features.end(), name);
|
||||
if (pos == features.end()) {
|
||||
throw std::logic_error("Feature " + name + " not found in dataset");
|
||||
}
|
||||
int name_index = pos - features.begin();
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
coordinates.push_back(dataset.index({ name_index, n_sample }));
|
||||
coordinates.clear();
|
||||
auto sample = dataset.index({ "...", n_sample });
|
||||
coordinates.push_back(sample[name_index]);
|
||||
for (auto parent : parents) {
|
||||
pos = find(features.begin(), features.end(), parent->getName());
|
||||
if (pos == features.end()) {
|
||||
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
|
||||
}
|
||||
int parent_index = pos - features.begin();
|
||||
coordinates.push_back(dataset.index({ parent_index, n_sample }));
|
||||
coordinates.push_back(sample[parent_index]);
|
||||
}
|
||||
// Increment the count of the corresponding coordinate
|
||||
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>());
|
||||
cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
|
||||
}
|
||||
// Normalize the counts
|
||||
// Divide each row by the sum of the row
|
||||
cpTable = cpTable / cpTable.sum(0);
|
||||
}
|
||||
float Node::getFactorValue(std::map<std::string, int>& evidence)
|
||||
double Node::getFactorValue(std::map<std::string, int>& evidence)
|
||||
{
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
// following predetermined order of indices in the cpTable (see Node.h)
|
||||
coordinates.push_back(at::tensor(evidence[name]));
|
||||
transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
|
||||
return cpTable.index({ coordinates }).item<float>();
|
||||
return cpTable.index({ coordinates }).item<double>();
|
||||
}
|
||||
std::vector<std::string> Node::graph(const std::string& className)
|
||||
{
|
||||
auto output = std::vector<std::string>();
|
||||
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
|
||||
output.push_back(name + " [shape=circle" + suffix + "] \n");
|
||||
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); });
|
||||
output.push_back("\"" + name + "\" [shape=circle" + suffix + "] \n");
|
||||
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return "\"" + name + "\" -> \"" + child->getName() + "\""; });
|
||||
return output;
|
||||
}
|
||||
}
|
@ -23,12 +23,12 @@ namespace bayesnet {
|
||||
std::vector<Node*>& getParents();
|
||||
std::vector<Node*>& getChildren();
|
||||
torch::Tensor& getCPT();
|
||||
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
|
||||
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights);
|
||||
int getNumStates() const;
|
||||
void setNumStates(int);
|
||||
unsigned minFill();
|
||||
std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
|
||||
float getFactorValue(std::map<std::string, int>&);
|
||||
double getFactorValue(std::map<std::string, int>&);
|
||||
private:
|
||||
std::string name;
|
||||
std::vector<Node*> parents;
|
||||
|
46
bayesnet/utils/CountingSemaphore.h
Normal file
46
bayesnet/utils/CountingSemaphore.h
Normal file
@ -0,0 +1,46 @@
|
||||
#ifndef COUNTING_SEMAPHORE_H
|
||||
#define COUNTING_SEMAPHORE_H
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <algorithm>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <thread>
|
||||
|
||||
class CountingSemaphore {
|
||||
public:
|
||||
static CountingSemaphore& getInstance()
|
||||
{
|
||||
static CountingSemaphore instance;
|
||||
return instance;
|
||||
}
|
||||
// Delete copy constructor and assignment operator
|
||||
CountingSemaphore(const CountingSemaphore&) = delete;
|
||||
CountingSemaphore& operator=(const CountingSemaphore&) = delete;
|
||||
void acquire()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
cv_.wait(lock, [this]() { return count_ > 0; });
|
||||
--count_;
|
||||
}
|
||||
void release()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
++count_;
|
||||
if (count_ <= max_count_) {
|
||||
cv_.notify_one();
|
||||
}
|
||||
}
|
||||
private:
|
||||
CountingSemaphore()
|
||||
: max_count_(std::max(1u, static_cast<uint>(0.95 * std::thread::hardware_concurrency()))),
|
||||
count_(max_count_)
|
||||
{
|
||||
}
|
||||
std::mutex mtx_;
|
||||
std::condition_variable cv_;
|
||||
const uint max_count_;
|
||||
uint count_;
|
||||
};
|
||||
#endif
|
@ -53,14 +53,14 @@ namespace bayesnet {
|
||||
}
|
||||
}
|
||||
|
||||
void insertElement(std::list<int>& variables, int variable)
|
||||
void MST::insertElement(std::list<int>& variables, int variable)
|
||||
{
|
||||
if (std::find(variables.begin(), variables.end(), variable) == variables.end()) {
|
||||
variables.push_front(variable);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
|
||||
std::vector<std::pair<int, int>> MST::reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
|
||||
{
|
||||
// Create the edges of a DAG from the MST
|
||||
// replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted
|
||||
|
@ -14,6 +14,8 @@ namespace bayesnet {
|
||||
public:
|
||||
MST() = default;
|
||||
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
void insertElement(std::list<int>& variables, int variable);
|
||||
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree();
|
||||
private:
|
||||
torch::Tensor weights;
|
||||
|
@ -137,7 +137,7 @@
|
||||
|
||||
include(CMakeParseArguments)
|
||||
|
||||
option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE)
|
||||
option(CODE_COVERAGE_VERBOSE "Verbose information" TRUE)
|
||||
|
||||
# Check prereqs
|
||||
find_program( GCOV_PATH gcov )
|
||||
@ -160,7 +160,11 @@ foreach(LANG ${LANGUAGES})
|
||||
endif()
|
||||
elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
|
||||
AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
|
||||
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
|
||||
if ("${LANG}" MATCHES "CUDA")
|
||||
message(STATUS "Ignoring CUDA")
|
||||
else()
|
||||
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
|
@ -1,36 +1,16 @@
|
||||
@startuml
|
||||
title clang-uml class diagram model
|
||||
class "bayesnet::Metrics" as C_0000736965376885623323
|
||||
class C_0000736965376885623323 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Metrics() = default : void
|
||||
+Metrics(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
+Metrics(const std::vector<std::vector<int>> & vsamples, const std::vector<int> & labels, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
..
|
||||
+SelectKBestWeighted(const torch::Tensor & weights, bool ascending = false, unsigned int k = 0) : std::vector<int>
|
||||
+conditionalEdge(const torch::Tensor & weights) : torch::Tensor
|
||||
+conditionalEdgeWeights(std::vector<float> & weights) : std::vector<float>
|
||||
#doCombinations<T>(const std::vector<T> & source) : std::vector<std::pair<T, T> >
|
||||
#entropy(const torch::Tensor & feature, const torch::Tensor & weights) : double
|
||||
+getScoresKBest() const : std::vector<double>
|
||||
+maximumSpanningTree(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : std::vector<std::pair<int,int>>
|
||||
+mutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & weights) : double
|
||||
#pop_first<T>(std::vector<T> & v) : T
|
||||
__
|
||||
#className : std::string
|
||||
#features : std::vector<std::string>
|
||||
#samples : torch::Tensor
|
||||
}
|
||||
class "bayesnet::Node" as C_0001303524929067080934
|
||||
class C_0001303524929067080934 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Node" as C_0010428199432536647474
|
||||
class C_0010428199432536647474 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Node(const std::string &) : void
|
||||
..
|
||||
+addChild(Node *) : void
|
||||
+addParent(Node *) : void
|
||||
+clear() : void
|
||||
+computeCPT(const torch::Tensor & dataset, const std::vector<std::string> & features, const double laplaceSmoothing, const torch::Tensor & weights) : void
|
||||
+computeCPT(const torch::Tensor & dataset, const std::vector<std::string> & features, const double smoothing, const torch::Tensor & weights) : void
|
||||
+getCPT() : torch::Tensor &
|
||||
+getChildren() : std::vector<Node *> &
|
||||
+getFactorValue(std::map<std::string,int> &) : float
|
||||
+getFactorValue(std::map<std::string,int> &) : double
|
||||
+getName() const : std::string
|
||||
+getNumStates() const : int
|
||||
+getParents() : std::vector<Node *> &
|
||||
@ -41,24 +21,29 @@ class C_0001303524929067080934 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+setNumStates(int) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::Network" as C_0001186707649890429575
|
||||
class C_0001186707649890429575 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
enum "bayesnet::Smoothing_t" as C_0013393078277439680282
|
||||
enum C_0013393078277439680282 {
|
||||
NONE
|
||||
ORIGINAL
|
||||
LAPLACE
|
||||
CESTNIK
|
||||
}
|
||||
class "bayesnet::Network" as C_0009493661199123436603
|
||||
class C_0009493661199123436603 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Network() : void
|
||||
+Network(float) : void
|
||||
+Network(const Network &) : void
|
||||
+~Network() = default : void
|
||||
..
|
||||
+addEdge(const std::string &, const std::string &) : void
|
||||
+addNode(const std::string &) : void
|
||||
+dump_cpt() const : std::string
|
||||
+fit(const torch::Tensor & samples, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||
+fit(const torch::Tensor & X, const torch::Tensor & y, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||
+fit(const std::vector<std::vector<int>> & input_data, const std::vector<int> & labels, const std::vector<double> & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||
+fit(const torch::Tensor & samples, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
|
||||
+fit(const torch::Tensor & X, const torch::Tensor & y, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
|
||||
+fit(const std::vector<std::vector<int>> & input_data, const std::vector<int> & labels, const std::vector<double> & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
|
||||
+getClassName() const : std::string
|
||||
+getClassNumStates() const : int
|
||||
+getEdges() const : std::vector<std::pair<std::string,std::string>>
|
||||
+getFeatures() const : std::vector<std::string>
|
||||
+getMaxThreads() const : float
|
||||
+getNodes() : std::map<std::string,std::unique_ptr<Node>> &
|
||||
+getNumEdges() const : int
|
||||
+getSamples() : torch::Tensor &
|
||||
@ -76,21 +61,21 @@ class C_0001186707649890429575 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+version() : std::string
|
||||
__
|
||||
}
|
||||
enum "bayesnet::status_t" as C_0000738420730783851375
|
||||
enum C_0000738420730783851375 {
|
||||
enum "bayesnet::status_t" as C_0005907365846270811004
|
||||
enum C_0005907365846270811004 {
|
||||
NORMAL
|
||||
WARNING
|
||||
ERROR
|
||||
}
|
||||
abstract "bayesnet::BaseClassifier" as C_0000327135989451974539
|
||||
abstract C_0000327135989451974539 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
abstract "bayesnet::BaseClassifier" as C_0002617087915615796317
|
||||
abstract C_0002617087915615796317 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+~BaseClassifier() = default : void
|
||||
..
|
||||
{abstract} +dump_cpt() const = 0 : std::string
|
||||
{abstract} +fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights) = 0 : BaseClassifier &
|
||||
{abstract} +fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +getClassNumStates() const = 0 : int
|
||||
{abstract} +getNotes() const = 0 : std::vector<std::string>
|
||||
{abstract} +getNumberOfEdges() const = 0 : int
|
||||
@ -109,12 +94,35 @@ abstract C_0000327135989451974539 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
{abstract} +setHyperparameters(const nlohmann::json & hyperparameters) = 0 : void
|
||||
{abstract} +show() const = 0 : std::vector<std::string>
|
||||
{abstract} +topological_order() = 0 : std::vector<std::string>
|
||||
{abstract} #trainModel(const torch::Tensor & weights) = 0 : void
|
||||
{abstract} #trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) = 0 : void
|
||||
__
|
||||
#validHyperparameters : std::vector<std::string>
|
||||
}
|
||||
abstract "bayesnet::Classifier" as C_0002043996622900301644
|
||||
abstract C_0002043996622900301644 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Metrics" as C_0005895723015084986588
|
||||
class C_0005895723015084986588 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Metrics() = default : void
|
||||
+Metrics(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
+Metrics(const std::vector<std::vector<int>> & vsamples, const std::vector<int> & labels, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
..
|
||||
+SelectKBestWeighted(const torch::Tensor & weights, bool ascending = false, unsigned int k = 0) : std::vector<int>
|
||||
+SelectKPairs(const torch::Tensor & weights, std::vector<int> & featuresExcluded, bool ascending = false, unsigned int k = 0) : std::vector<std::pair<int,int>>
|
||||
+conditionalEdge(const torch::Tensor & weights) : torch::Tensor
|
||||
+conditionalEntropy(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & labels, const torch::Tensor & weights) : double
|
||||
+conditionalMutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & labels, const torch::Tensor & weights) : double
|
||||
#doCombinations<T>(const std::vector<T> & source) : std::vector<std::pair<T, T> >
|
||||
+entropy(const torch::Tensor & feature, const torch::Tensor & weights) : double
|
||||
+getScoresKBest() const : std::vector<double>
|
||||
+getScoresKPairs() const : std::vector<std::pair<std::pair<int,int>,double>>
|
||||
+maximumSpanningTree(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : std::vector<std::pair<int,int>>
|
||||
+mutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & weights) : double
|
||||
#pop_first<T>(std::vector<T> & v) : T
|
||||
__
|
||||
#className : std::string
|
||||
#features : std::vector<std::string>
|
||||
#samples : torch::Tensor
|
||||
}
|
||||
abstract "bayesnet::Classifier" as C_0016351972983202413152
|
||||
abstract C_0016351972983202413152 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Classifier(Network model) : void
|
||||
+~Classifier() = default : void
|
||||
..
|
||||
@ -123,10 +131,10 @@ abstract C_0002043996622900301644 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
{abstract} #buildModel(const torch::Tensor & weights) = 0 : void
|
||||
#checkFitParameters() : void
|
||||
+dump_cpt() const : std::string
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||
+fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights) : Classifier &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
|
||||
+fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights, const Smoothing_t smoothing) : Classifier &
|
||||
+getClassNumStates() const : int
|
||||
+getNotes() const : std::vector<std::string>
|
||||
+getNumberOfEdges() const : int
|
||||
@ -143,7 +151,7 @@ abstract C_0002043996622900301644 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_order() : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
#className : std::string
|
||||
#dataset : torch::Tensor
|
||||
@ -157,8 +165,8 @@ __
|
||||
#states : std::map<std::string,std::vector<int>>
|
||||
#status : status_t
|
||||
}
|
||||
class "bayesnet::KDB" as C_0001112865019015250005
|
||||
class C_0001112865019015250005 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::KDB" as C_0008902920152122000044
|
||||
class C_0008902920152122000044 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDB(int k, float theta = 0.03) : void
|
||||
+~KDB() = default : void
|
||||
..
|
||||
@ -167,8 +175,26 @@ class C_0001112865019015250005 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::TAN" as C_0001760994424884323017
|
||||
class C_0001760994424884323017 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::SPODE" as C_0004096182510460307610
|
||||
class C_0004096182510460307610 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODE(int root) : void
|
||||
+~SPODE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPnDE" as C_0016268916386101512883
|
||||
class C_0016268916386101512883 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPnDE(std::vector<int> parents) : void
|
||||
+~SPnDE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPnDE") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::TAN" as C_0014087955399074584137
|
||||
class C_0014087955399074584137 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TAN() : void
|
||||
+~TAN() = default : void
|
||||
..
|
||||
@ -176,8 +202,8 @@ class C_0001760994424884323017 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Proposal" as C_0002219995589162262979
|
||||
class C_0002219995589162262979 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Proposal" as C_0017759964713298103839
|
||||
class C_0017759964713298103839 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Proposal(torch::Tensor & pDataset, std::vector<std::string> & features_, std::string & className_) : void
|
||||
+~Proposal() : void
|
||||
..
|
||||
@ -190,74 +216,42 @@ __
|
||||
#discretizers : map<std::string,mdlp::CPPFImdlp *>
|
||||
#y : torch::Tensor
|
||||
}
|
||||
class "bayesnet::TANLd" as C_0001668829096702037834
|
||||
class C_0001668829096702037834 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TANLd() : void
|
||||
+~TANLd() = default : void
|
||||
class "bayesnet::KDBLd" as C_0002756018222998454702
|
||||
class C_0002756018222998454702 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDBLd(int k) : void
|
||||
+~KDBLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : TANLd &
|
||||
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : KDBLd &
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
abstract "bayesnet::FeatureSelect" as C_0001695326193250580823
|
||||
abstract C_0001695326193250580823 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FeatureSelect(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~FeatureSelect() : void
|
||||
class "bayesnet::SPODELd" as C_0010957245114062042836
|
||||
class C_0010957245114062042836 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODELd(int root) : void
|
||||
+~SPODELd() = default : void
|
||||
..
|
||||
#computeMeritCFS() : double
|
||||
#computeSuFeatures(const int a, const int b) : double
|
||||
#computeSuLabels() : void
|
||||
{abstract} +fit() = 0 : void
|
||||
+getFeatures() const : std::vector<int>
|
||||
+getScores() const : std::vector<double>
|
||||
#initialize() : void
|
||||
#symmetricalUncertainty(int a, int b) : double
|
||||
__
|
||||
#fitted : bool
|
||||
#maxFeatures : int
|
||||
#selectedFeatures : std::vector<int>
|
||||
#selectedScores : std::vector<double>
|
||||
#suFeatures : std::map<std::pair<int,int>,double>
|
||||
#suLabels : std::vector<double>
|
||||
#weights : const torch::Tensor &
|
||||
}
|
||||
class "bayesnet::CFS" as C_0000011627355691342494
|
||||
class C_0000011627355691342494 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+CFS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~CFS() : void
|
||||
..
|
||||
+fit() : void
|
||||
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+graph(const std::string & name = "SPODELd") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::FCBF" as C_0000144682015341746929
|
||||
class C_0000144682015341746929 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FCBF(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~FCBF() : void
|
||||
class "bayesnet::TANLd" as C_0013350632773616302678
|
||||
class C_0013350632773616302678 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TANLd() : void
|
||||
+~TANLd() = default : void
|
||||
..
|
||||
+fit() : void
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : TANLd &
|
||||
+graph(const std::string & name = "TANLd") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
__
|
||||
}
|
||||
class "bayesnet::IWSS" as C_0000008268514674428553
|
||||
class C_0000008268514674428553 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+IWSS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~IWSS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODE" as C_0000512022813807538451
|
||||
class C_0000512022813807538451 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODE(int root) : void
|
||||
+~SPODE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Ensemble" as C_0001985241386355360576
|
||||
class C_0001985241386355360576 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Ensemble" as C_0015881931090842884611
|
||||
class C_0015881931090842884611 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Ensemble(bool predict_voting = true) : void
|
||||
+~Ensemble() = default : void
|
||||
..
|
||||
@ -280,7 +274,7 @@ class C_0001985241386355360576 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_order() : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
#voting(torch::Tensor & votes) : torch::Tensor
|
||||
__
|
||||
#models : std::vector<std::unique_ptr<Classifier>>
|
||||
@ -288,41 +282,223 @@ __
|
||||
#predict_voting : bool
|
||||
#significanceModels : std::vector<double>
|
||||
}
|
||||
class "bayesnet::(anonymous_45089536)" as C_0001186398587753535158
|
||||
class C_0001186398587753535158 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::A2DE" as C_0001410789567057647859
|
||||
class C_0001410789567057647859 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+A2DE(bool predict_voting = false) : void
|
||||
+~A2DE() : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "A2DE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODE" as C_0006288892608974306258
|
||||
class C_0006288892608974306258 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODE(bool predict_voting = false) : void
|
||||
+~AODE() : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "AODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
abstract "bayesnet::FeatureSelect" as C_0013562609546004646591
|
||||
abstract C_0013562609546004646591 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FeatureSelect(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~FeatureSelect() : void
|
||||
..
|
||||
#computeMeritCFS() : double
|
||||
#computeSuFeatures(const int a, const int b) : double
|
||||
#computeSuLabels() : void
|
||||
{abstract} +fit() = 0 : void
|
||||
+getFeatures() const : std::vector<int>
|
||||
+getScores() const : std::vector<double>
|
||||
#initialize() : void
|
||||
#symmetricalUncertainty(int a, int b) : double
|
||||
__
|
||||
#fitted : bool
|
||||
#maxFeatures : int
|
||||
#selectedFeatures : std::vector<int>
|
||||
#selectedScores : std::vector<double>
|
||||
#suFeatures : std::map<std::pair<int,int>,double>
|
||||
#suLabels : std::vector<double>
|
||||
#weights : const torch::Tensor &
|
||||
}
|
||||
class "bayesnet::(anonymous_60342586)" as C_0005584545181746538542
|
||||
class C_0005584545181746538542 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_45090163)" as C_0000602764946063116717
|
||||
class C_0000602764946063116717 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60343240)" as C_0016227156982041949444
|
||||
class C_0016227156982041949444 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostAODE" as C_0000358471592399852382
|
||||
class C_0000358471592399852382 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Boost" as C_0009819322948617116148
|
||||
class C_0009819322948617116148 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Boost(bool predict_voting = false) : void
|
||||
+~Boost() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
#featureSelection(torch::Tensor & weights_) : std::vector<int>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
#update_weights(torch::Tensor & ytrain, torch::Tensor & ypred, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
|
||||
#update_weights_block(int k, torch::Tensor & ytrain, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
|
||||
__
|
||||
#X_test : torch::Tensor
|
||||
#X_train : torch::Tensor
|
||||
#bisection : bool
|
||||
#block_update : bool
|
||||
#convergence : bool
|
||||
#convergence_best : bool
|
||||
#featureSelector : FeatureSelect *
|
||||
#maxTolerance : int
|
||||
#order_algorithm : std::string
|
||||
#selectFeatures : bool
|
||||
#select_features_algorithm : std::string
|
||||
#threshold : double
|
||||
#y_test : torch::Tensor
|
||||
#y_train : torch::Tensor
|
||||
}
|
||||
class "bayesnet::AODELd" as C_0003898187834670349177
|
||||
class C_0003898187834670349177 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODELd(bool predict_voting = true) : void
|
||||
+~AODELd() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_, const Smoothing_t smoothing) : AODELd &
|
||||
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60275628)" as C_0009086919615463763584
|
||||
class C_0009086919615463763584 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60276282)" as C_0015251985607563196159
|
||||
class C_0015251985607563196159 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostA2DE" as C_0000272055465257861326
|
||||
class C_0000272055465257861326 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+BoostA2DE(bool predict_voting = false) : void
|
||||
+~BoostA2DE() = default : void
|
||||
..
|
||||
+graph(const std::string & title = "BoostA2DE") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60275502)" as C_0016033655851510053155
|
||||
class C_0016033655851510053155 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60276156)" as C_0000379522761622473555
|
||||
class C_0000379522761622473555 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostAODE" as C_0002867772739198819061
|
||||
class C_0002867772739198819061 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+BoostAODE(bool predict_voting = false) : void
|
||||
+~BoostAODE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "BoostAODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::MST" as C_0000131858426172291700
|
||||
class C_0000131858426172291700 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::CFS" as C_0000093018845530739957
|
||||
class C_0000093018845530739957 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+CFS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~CFS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::FCBF" as C_0001157456122733975432
|
||||
class C_0001157456122733975432 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FCBF(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~FCBF() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::IWSS" as C_0000066148117395428429
|
||||
class C_0000066148117395428429 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+IWSS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~IWSS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60730495)" as C_0004857727320042830573
|
||||
class C_0004857727320042830573 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60731150)" as C_0000076541533312623385
|
||||
class C_0000076541533312623385 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60653004)" as C_0001444063444142949758
|
||||
class C_0001444063444142949758 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60653658)" as C_0007139277546931322856
|
||||
class C_0007139277546931322856 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60731375)" as C_0010493853592456211189
|
||||
class C_0010493853592456211189 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60732030)" as C_0007011438637915849564
|
||||
class C_0007011438637915849564 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::MST" as C_0001054867409378333602
|
||||
class C_0001054867409378333602 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+MST() = default : void
|
||||
+MST(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : void
|
||||
..
|
||||
+insertElement(std::list<int> & variables, int variable) : void
|
||||
+maximumSpanningTree() : std::vector<std::pair<int,int>>
|
||||
+reorder(std::vector<std::pair<float,std::pair<int,int>>> T, int root_original) : std::vector<std::pair<int,int>>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Graph" as C_0001197041682001898467
|
||||
class C_0001197041682001898467 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Graph" as C_0009576333456015187741
|
||||
class C_0009576333456015187741 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Graph(int V) : void
|
||||
..
|
||||
+addEdge(int u, int v, float wt) : void
|
||||
@ -332,81 +508,73 @@ class C_0001197041682001898467 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+union_set(int u, int v) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::KDBLd" as C_0000344502277874806837
|
||||
class C_0000344502277874806837 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDBLd(int k) : void
|
||||
+~KDBLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : KDBLd &
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODE" as C_0000786111576121788282
|
||||
class C_0000786111576121788282 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODE(bool predict_voting = false) : void
|
||||
+~AODE() : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "AODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODELd" as C_0001369655639257755354
|
||||
class C_0001369655639257755354 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODELd(int root) : void
|
||||
+~SPODELd() = default : void
|
||||
..
|
||||
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODELd" as C_0000487273479333793647
|
||||
class C_0000487273479333793647 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODELd(bool predict_voting = true) : void
|
||||
+~AODELd() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_) : AODELd &
|
||||
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
__
|
||||
}
|
||||
C_0001303524929067080934 --> C_0001303524929067080934 : -parents
|
||||
C_0001303524929067080934 --> C_0001303524929067080934 : -children
|
||||
C_0001186707649890429575 o-- C_0001303524929067080934 : -nodes
|
||||
C_0000327135989451974539 ..> C_0000738420730783851375
|
||||
C_0002043996622900301644 o-- C_0001186707649890429575 : #model
|
||||
C_0002043996622900301644 o-- C_0000736965376885623323 : #metrics
|
||||
C_0002043996622900301644 o-- C_0000738420730783851375 : #status
|
||||
C_0000327135989451974539 <|-- C_0002043996622900301644
|
||||
C_0002043996622900301644 <|-- C_0001112865019015250005
|
||||
C_0002043996622900301644 <|-- C_0001760994424884323017
|
||||
C_0002219995589162262979 ..> C_0001186707649890429575
|
||||
C_0001760994424884323017 <|-- C_0001668829096702037834
|
||||
C_0002219995589162262979 <|-- C_0001668829096702037834
|
||||
C_0000736965376885623323 <|-- C_0001695326193250580823
|
||||
C_0001695326193250580823 <|-- C_0000011627355691342494
|
||||
C_0001695326193250580823 <|-- C_0000144682015341746929
|
||||
C_0001695326193250580823 <|-- C_0000008268514674428553
|
||||
C_0002043996622900301644 <|-- C_0000512022813807538451
|
||||
C_0001985241386355360576 o-- C_0002043996622900301644 : #models
|
||||
C_0002043996622900301644 <|-- C_0001985241386355360576
|
||||
C_0000358471592399852382 --> C_0001695326193250580823 : -featureSelector
|
||||
C_0001985241386355360576 <|-- C_0000358471592399852382
|
||||
C_0001112865019015250005 <|-- C_0000344502277874806837
|
||||
C_0002219995589162262979 <|-- C_0000344502277874806837
|
||||
C_0001985241386355360576 <|-- C_0000786111576121788282
|
||||
C_0000512022813807538451 <|-- C_0001369655639257755354
|
||||
C_0002219995589162262979 <|-- C_0001369655639257755354
|
||||
C_0001985241386355360576 <|-- C_0000487273479333793647
|
||||
C_0002219995589162262979 <|-- C_0000487273479333793647
|
||||
C_0010428199432536647474 --> C_0010428199432536647474 : -parents
|
||||
C_0010428199432536647474 --> C_0010428199432536647474 : -children
|
||||
C_0009493661199123436603 ..> C_0013393078277439680282
|
||||
C_0009493661199123436603 o-- C_0010428199432536647474 : -nodes
|
||||
C_0002617087915615796317 ..> C_0013393078277439680282
|
||||
C_0002617087915615796317 ..> C_0005907365846270811004
|
||||
C_0016351972983202413152 ..> C_0013393078277439680282
|
||||
C_0016351972983202413152 o-- C_0009493661199123436603 : #model
|
||||
C_0016351972983202413152 o-- C_0005895723015084986588 : #metrics
|
||||
C_0016351972983202413152 o-- C_0005907365846270811004 : #status
|
||||
C_0002617087915615796317 <|-- C_0016351972983202413152
|
||||
|
||||
'Generated with clang-uml, version 0.5.1
|
||||
'LLVM version clang version 17.0.6 (Fedora 17.0.6-2.fc39)
|
||||
C_0016351972983202413152 <|-- C_0008902920152122000044
|
||||
|
||||
C_0016351972983202413152 <|-- C_0004096182510460307610
|
||||
|
||||
C_0016351972983202413152 <|-- C_0016268916386101512883
|
||||
|
||||
C_0016351972983202413152 <|-- C_0014087955399074584137
|
||||
|
||||
C_0017759964713298103839 ..> C_0009493661199123436603
|
||||
C_0002756018222998454702 ..> C_0013393078277439680282
|
||||
C_0008902920152122000044 <|-- C_0002756018222998454702
|
||||
|
||||
C_0017759964713298103839 <|-- C_0002756018222998454702
|
||||
|
||||
C_0010957245114062042836 ..> C_0013393078277439680282
|
||||
C_0004096182510460307610 <|-- C_0010957245114062042836
|
||||
|
||||
C_0017759964713298103839 <|-- C_0010957245114062042836
|
||||
|
||||
C_0013350632773616302678 ..> C_0013393078277439680282
|
||||
C_0014087955399074584137 <|-- C_0013350632773616302678
|
||||
|
||||
C_0017759964713298103839 <|-- C_0013350632773616302678
|
||||
|
||||
C_0015881931090842884611 ..> C_0013393078277439680282
|
||||
C_0015881931090842884611 o-- C_0016351972983202413152 : #models
|
||||
C_0016351972983202413152 <|-- C_0015881931090842884611
|
||||
|
||||
C_0015881931090842884611 <|-- C_0001410789567057647859
|
||||
|
||||
C_0015881931090842884611 <|-- C_0006288892608974306258
|
||||
|
||||
C_0005895723015084986588 <|-- C_0013562609546004646591
|
||||
|
||||
C_0009819322948617116148 --> C_0013562609546004646591 : #featureSelector
|
||||
C_0015881931090842884611 <|-- C_0009819322948617116148
|
||||
|
||||
C_0003898187834670349177 ..> C_0013393078277439680282
|
||||
C_0015881931090842884611 <|-- C_0003898187834670349177
|
||||
|
||||
C_0017759964713298103839 <|-- C_0003898187834670349177
|
||||
|
||||
C_0000272055465257861326 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0000272055465257861326
|
||||
|
||||
C_0002867772739198819061 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0002867772739198819061
|
||||
|
||||
C_0013562609546004646591 <|-- C_0000093018845530739957
|
||||
|
||||
C_0013562609546004646591 <|-- C_0001157456122733975432
|
||||
|
||||
C_0013562609546004646591 <|-- C_0000066148117395428429
|
||||
|
||||
|
||||
'Generated with clang-uml, version 0.5.5
|
||||
'LLVM version clang version 18.1.8 (Fedora 18.1.8-5.fc41)
|
||||
@enduml
|
||||
|
File diff suppressed because one or more lines are too long
Before Width: | Height: | Size: 139 KiB After Width: | Height: | Size: 196 KiB |
@ -1,128 +1,314 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 8.1.0 (20230707.0739)
|
||||
<!-- Generated by graphviz version 12.1.0 (20240811.2233)
|
||||
-->
|
||||
<!-- Title: BayesNet Pages: 1 -->
|
||||
<svg width="1632pt" height="288pt"
|
||||
viewBox="0.00 0.00 1631.95 287.80" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 283.8)">
|
||||
<svg width="3725pt" height="432pt"
|
||||
viewBox="0.00 0.00 3724.84 431.80" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 427.8)">
|
||||
<title>BayesNet</title>
|
||||
<polygon fill="white" stroke="none" points="-4,4 -4,-283.8 1627.95,-283.8 1627.95,4 -4,4"/>
|
||||
<!-- node1 -->
|
||||
<polygon fill="white" stroke="none" points="-4,4 -4,-427.8 3720.84,-427.8 3720.84,4 -4,4"/>
|
||||
<!-- node0 -->
|
||||
<g id="node1" class="node">
|
||||
<title>node0</title>
|
||||
<polygon fill="none" stroke="black" points="1655.43,-398.35 1655.43,-413.26 1625.69,-423.8 1583.63,-423.8 1553.89,-413.26 1553.89,-398.35 1583.63,-387.8 1625.69,-387.8 1655.43,-398.35"/>
|
||||
<text text-anchor="middle" x="1604.66" y="-401.53" font-family="Times,serif" font-size="12.00">BayesNet</text>
|
||||
</g>
|
||||
<!-- node1 -->
|
||||
<g id="node2" class="node">
|
||||
<title>node1</title>
|
||||
<polygon fill="none" stroke="black" points="826.43,-254.35 826.43,-269.26 796.69,-279.8 754.63,-279.8 724.89,-269.26 724.89,-254.35 754.63,-243.8 796.69,-243.8 826.43,-254.35"/>
|
||||
<text text-anchor="middle" x="775.66" y="-257.53" font-family="Times,serif" font-size="12.00">BayesNet</text>
|
||||
<polygon fill="none" stroke="black" points="413.32,-257.8 372.39,-273.03 206.66,-279.8 40.93,-273.03 0,-257.8 114.69,-245.59 298.64,-245.59 413.32,-257.8"/>
|
||||
<text text-anchor="middle" x="206.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10.so</text>
|
||||
</g>
|
||||
<!-- node0->node1 -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>node0->node1</title>
|
||||
<path fill="none" stroke="black" d="M1553.59,-400.53C1451.65,-391.91 1215.69,-371.61 1017.66,-351.8 773.36,-327.37 488.07,-295.22 329.31,-277.01"/>
|
||||
<polygon fill="black" stroke="black" points="329.93,-273.56 319.6,-275.89 329.14,-280.51 329.93,-273.56"/>
|
||||
</g>
|
||||
<!-- node2 -->
|
||||
<g id="node2" class="node">
|
||||
<g id="node3" class="node">
|
||||
<title>node2</title>
|
||||
<polygon fill="none" stroke="black" points="413.32,-185.8 372.39,-201.03 206.66,-207.8 40.93,-201.03 0,-185.8 114.69,-173.59 298.64,-173.59 413.32,-185.8"/>
|
||||
<text text-anchor="middle" x="206.66" y="-185.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10.so</text>
|
||||
<polygon fill="none" stroke="black" points="894.21,-257.8 848.35,-273.03 662.66,-279.8 476.98,-273.03 431.12,-257.8 559.61,-245.59 765.71,-245.59 894.21,-257.8"/>
|
||||
<text text-anchor="middle" x="662.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10_cuda.so</text>
|
||||
</g>
|
||||
<!-- node1->node2 -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>node1->node2</title>
|
||||
<path fill="none" stroke="black" d="M724.41,-254.5C634.7,-243.46 447.04,-220.38 324.01,-205.24"/>
|
||||
<polygon fill="black" stroke="black" points="324.77,-201.69 314.42,-203.94 323.92,-208.63 324.77,-201.69"/>
|
||||
<!-- node0->node2 -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>node0->node2</title>
|
||||
<path fill="none" stroke="black" d="M1555.34,-397.37C1408.12,-375.18 969.52,-309.06 767.13,-278.55"/>
|
||||
<polygon fill="black" stroke="black" points="767.81,-275.12 757.4,-277.09 766.77,-282.04 767.81,-275.12"/>
|
||||
</g>
|
||||
<!-- node3 -->
|
||||
<g id="node3" class="node">
|
||||
<g id="node4" class="node">
|
||||
<title>node3</title>
|
||||
<polygon fill="none" stroke="black" points="857.68,-185.8 815.49,-201.03 644.66,-207.8 473.84,-201.03 431.65,-185.8 549.86,-173.59 739.46,-173.59 857.68,-185.8"/>
|
||||
<text text-anchor="middle" x="644.66" y="-185.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libkineto.a</text>
|
||||
<polygon fill="none" stroke="black" points="1338.68,-257.8 1296.49,-273.03 1125.66,-279.8 954.84,-273.03 912.65,-257.8 1030.86,-245.59 1220.46,-245.59 1338.68,-257.8"/>
|
||||
<text text-anchor="middle" x="1125.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libkineto.a</text>
|
||||
</g>
|
||||
<!-- node1->node3 -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>node1->node3</title>
|
||||
<path fill="none" stroke="black" d="M747.56,-245.79C729.21,-235.98 704.97,-223.03 684.63,-212.16"/>
|
||||
<polygon fill="black" stroke="black" points="686.47,-208.64 676,-207.02 683.17,-214.82 686.47,-208.64"/>
|
||||
<!-- node0->node3 -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>node0->node3</title>
|
||||
<path fill="none" stroke="black" d="M1566.68,-393.54C1484.46,-369.17 1289.3,-311.32 1188.44,-281.41"/>
|
||||
<polygon fill="black" stroke="black" points="1189.53,-278.09 1178.95,-278.6 1187.54,-284.8 1189.53,-278.09"/>
|
||||
</g>
|
||||
<!-- node4 -->
|
||||
<g id="node4" class="node">
|
||||
<title>node4</title>
|
||||
<polygon fill="none" stroke="black" points="939.33,-182.35 939.33,-197.26 920.78,-207.8 894.54,-207.8 875.99,-197.26 875.99,-182.35 894.54,-171.8 920.78,-171.8 939.33,-182.35"/>
|
||||
<text text-anchor="middle" x="907.66" y="-185.53" font-family="Times,serif" font-size="12.00">mdlp</text>
|
||||
</g>
|
||||
<!-- node1->node4 -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>node1->node4</title>
|
||||
<path fill="none" stroke="black" d="M803.66,-245.96C824.66,-234.82 853.45,-219.56 875.41,-207.91"/>
|
||||
<polygon fill="black" stroke="black" points="876.78,-210.61 883.97,-202.84 873.5,-204.43 876.78,-210.61"/>
|
||||
</g>
|
||||
<!-- node9 -->
|
||||
<g id="node5" class="node">
|
||||
<title>node9</title>
|
||||
<polygon fill="none" stroke="black" points="1107.74,-195.37 1032.66,-207.8 957.58,-195.37 986.26,-175.24 1079.06,-175.24 1107.74,-195.37"/>
|
||||
<text text-anchor="middle" x="1032.66" y="-185.53" font-family="Times,serif" font-size="12.00">torch_library</text>
|
||||
<title>node4</title>
|
||||
<polygon fill="none" stroke="black" points="1552.26,-257.8 1532.93,-273.03 1454.66,-279.8 1376.4,-273.03 1357.07,-257.8 1411.23,-245.59 1498.1,-245.59 1552.26,-257.8"/>
|
||||
<text text-anchor="middle" x="1454.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/lib64/libcuda.so</text>
|
||||
</g>
|
||||
<!-- node1->node9 -->
|
||||
<!-- node0->node4 -->
|
||||
<g id="edge4" class="edge">
|
||||
<title>node1->node9</title>
|
||||
<path fill="none" stroke="black" d="M815.25,-250.02C860.25,-237.77 933.77,-217.74 982.68,-204.42"/>
|
||||
<polygon fill="black" stroke="black" points="983.3,-207.61 992.02,-201.6 981.46,-200.85 983.3,-207.61"/>
|
||||
</g>
|
||||
<!-- node10 -->
|
||||
<g id="node6" class="node">
|
||||
<title>node10</title>
|
||||
<polygon fill="none" stroke="black" points="1159.81,-113.8 1086.89,-129.03 791.66,-135.8 496.43,-129.03 423.52,-113.8 627.82,-101.59 955.5,-101.59 1159.81,-113.8"/>
|
||||
<text text-anchor="middle" x="791.66" y="-113.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node9->node10 -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>node9->node10</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M985.62,-175.14C949.2,-164.56 898.31,-149.78 857.79,-138.01"/>
|
||||
<polygon fill="black" stroke="black" points="859.04,-134.44 848.46,-135.01 857.09,-141.16 859.04,-134.44"/>
|
||||
<title>node0->node4</title>
|
||||
<path fill="none" stroke="black" d="M1586.27,-387.39C1559.5,-362.05 1509.72,-314.92 1479.65,-286.46"/>
|
||||
<polygon fill="black" stroke="black" points="1482.13,-283.99 1472.46,-279.65 1477.31,-289.07 1482.13,-283.99"/>
|
||||
</g>
|
||||
<!-- node5 -->
|
||||
<g id="node7" class="node">
|
||||
<g id="node6" class="node">
|
||||
<title>node5</title>
|
||||
<polygon fill="none" stroke="black" points="1371.56,-123.37 1274.66,-135.8 1177.77,-123.37 1214.78,-103.24 1334.55,-103.24 1371.56,-123.37"/>
|
||||
<text text-anchor="middle" x="1274.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch_cpu_library</text>
|
||||
<polygon fill="none" stroke="black" points="1873.26,-257.8 1843.23,-273.03 1721.66,-279.8 1600.09,-273.03 1570.06,-257.8 1654.19,-245.59 1789.13,-245.59 1873.26,-257.8"/>
|
||||
<text text-anchor="middle" x="1721.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libcudart.so</text>
|
||||
</g>
|
||||
<!-- node9->node5 -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>node9->node5</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1079.61,-175.22C1120.66,-163.35 1180.2,-146.13 1222.68,-133.84"/>
|
||||
<polygon fill="black" stroke="black" points="1223.46,-136.97 1232.09,-130.83 1221.51,-130.24 1223.46,-136.97"/>
|
||||
<!-- node0->node5 -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>node0->node5</title>
|
||||
<path fill="none" stroke="black" d="M1619.76,-387.77C1628.83,-377.46 1640.53,-363.98 1650.66,-351.8 1668.32,-330.59 1687.84,-306.03 1701.94,-288.1"/>
|
||||
<polygon fill="black" stroke="black" points="1704.43,-290.59 1707.84,-280.56 1698.92,-286.27 1704.43,-290.59"/>
|
||||
</g>
|
||||
<!-- node6 -->
|
||||
<g id="node8" class="node">
|
||||
<g id="node7" class="node">
|
||||
<title>node6</title>
|
||||
<polygon fill="none" stroke="black" points="1191.4,-27.9 1114.6,-43.12 803.66,-49.9 492.72,-43.12 415.93,-27.9 631.1,-15.68 976.22,-15.68 1191.4,-27.9"/>
|
||||
<text text-anchor="middle" x="803.66" y="-27.63" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cpu.so" -Wl,--as-needed</text>
|
||||
<polygon fill="none" stroke="black" points="2231.79,-257.8 2198.1,-273.03 2061.66,-279.8 1925.23,-273.03 1891.53,-257.8 1985.95,-245.59 2137.38,-245.59 2231.79,-257.8"/>
|
||||
<text text-anchor="middle" x="2061.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libnvToolsExt.so</text>
|
||||
</g>
|
||||
<!-- node5->node6 -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>node5->node6</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1210.16,-105.31C1130.55,-91.13 994.37,-66.87 901.77,-50.38"/>
|
||||
<polygon fill="black" stroke="black" points="902.44,-46.77 891.98,-48.46 901.22,-53.66 902.44,-46.77"/>
|
||||
<!-- node0->node6 -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>node0->node6</title>
|
||||
<path fill="none" stroke="black" d="M1642.06,-393.18C1721.31,-368.56 1906.71,-310.95 2002.32,-281.24"/>
|
||||
<polygon fill="black" stroke="black" points="2003.28,-284.61 2011.79,-278.3 2001.21,-277.92 2003.28,-284.61"/>
|
||||
</g>
|
||||
<!-- node7 -->
|
||||
<g id="node9" class="node">
|
||||
<g id="node8" class="node">
|
||||
<title>node7</title>
|
||||
<polygon fill="none" stroke="black" points="1339.72,-37.46 1274.66,-49.9 1209.61,-37.46 1234.46,-17.34 1314.87,-17.34 1339.72,-37.46"/>
|
||||
<text text-anchor="middle" x="1274.66" y="-27.63" font-family="Times,serif" font-size="12.00">caffe2::mkl</text>
|
||||
<polygon fill="none" stroke="black" points="2541.44,-257.8 2512.56,-273.03 2395.66,-279.8 2278.76,-273.03 2249.89,-257.8 2330.79,-245.59 2460.54,-245.59 2541.44,-257.8"/>
|
||||
<text text-anchor="middle" x="2395.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libnvrtc.so</text>
|
||||
</g>
|
||||
<!-- node5->node7 -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>node5->node7</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1274.66,-102.95C1274.66,-91.56 1274.66,-75.07 1274.66,-60.95"/>
|
||||
<polygon fill="black" stroke="black" points="1278.16,-61.27 1274.66,-51.27 1271.16,-61.27 1278.16,-61.27"/>
|
||||
<!-- node0->node7 -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>node0->node7</title>
|
||||
<path fill="none" stroke="black" d="M1651.19,-396.45C1780.36,-373.26 2144.76,-307.85 2311.05,-277.99"/>
|
||||
<polygon fill="black" stroke="black" points="2311.47,-281.47 2320.7,-276.26 2310.24,-274.58 2311.47,-281.47"/>
|
||||
</g>
|
||||
<!-- node8 -->
|
||||
<g id="node10" class="node">
|
||||
<g id="node9" class="node">
|
||||
<title>node8</title>
|
||||
<polygon fill="none" stroke="black" points="1623.95,-41.76 1490.66,-63.8 1357.37,-41.76 1408.28,-6.09 1573.04,-6.09 1623.95,-41.76"/>
|
||||
<text text-anchor="middle" x="1490.66" y="-34.75" font-family="Times,serif" font-size="12.00">dummy</text>
|
||||
<text text-anchor="middle" x="1490.66" y="-20.5" font-family="Times,serif" font-size="12.00">(protobuf::libprotobuf)</text>
|
||||
<polygon fill="none" stroke="black" points="1642.01,-326.35 1642.01,-341.26 1620.13,-351.8 1589.19,-351.8 1567.31,-341.26 1567.31,-326.35 1589.19,-315.8 1620.13,-315.8 1642.01,-326.35"/>
|
||||
<text text-anchor="middle" x="1604.66" y="-329.53" font-family="Times,serif" font-size="12.00">fimdlp</text>
|
||||
</g>
|
||||
<!-- node5->node8 -->
|
||||
<!-- node0->node8 -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>node0->node8</title>
|
||||
<path fill="none" stroke="black" d="M1604.66,-387.5C1604.66,-380.21 1604.66,-371.53 1604.66,-363.34"/>
|
||||
<polygon fill="black" stroke="black" points="1608.16,-363.42 1604.66,-353.42 1601.16,-363.42 1608.16,-363.42"/>
|
||||
</g>
|
||||
<!-- node19 -->
|
||||
<g id="node10" class="node">
|
||||
<title>node19</title>
|
||||
<polygon fill="none" stroke="black" points="2709.74,-267.37 2634.66,-279.8 2559.58,-267.37 2588.26,-247.24 2681.06,-247.24 2709.74,-267.37"/>
|
||||
<text text-anchor="middle" x="2634.66" y="-257.53" font-family="Times,serif" font-size="12.00">torch_library</text>
|
||||
</g>
|
||||
<!-- node0->node19 -->
|
||||
<g id="edge29" class="edge">
|
||||
<title>node0->node19</title>
|
||||
<path fill="none" stroke="black" d="M1655.87,-399.32C1798.23,-383.79 2210.64,-336.94 2550.66,-279.8 2559.43,-278.33 2568.68,-276.62 2577.72,-274.86"/>
|
||||
<polygon fill="black" stroke="black" points="2578.38,-278.3 2587.5,-272.92 2577.01,-271.43 2578.38,-278.3"/>
|
||||
</g>
|
||||
<!-- node8->node1 -->
|
||||
<g id="edge9" class="edge">
|
||||
<title>node5->node8</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1310.82,-102.76C1341.68,-90.77 1386.88,-73.21 1424.25,-58.7"/>
|
||||
<polygon fill="black" stroke="black" points="1425.01,-61.77 1433.06,-54.89 1422.47,-55.25 1425.01,-61.77"/>
|
||||
<title>node8->node1</title>
|
||||
<path fill="none" stroke="black" d="M1566.84,-331.58C1419.81,-326.72 872.06,-307.69 421.66,-279.8 401.07,-278.53 379.38,-277.02 358.03,-275.43"/>
|
||||
<polygon fill="black" stroke="black" points="358.3,-271.94 348.06,-274.67 357.77,-278.92 358.3,-271.94"/>
|
||||
</g>
|
||||
<!-- node8->node2 -->
|
||||
<g id="edge10" class="edge">
|
||||
<title>node8->node2</title>
|
||||
<path fill="none" stroke="black" d="M1566.86,-330C1445.11,-320.95 1057.97,-292.18 831.67,-275.36"/>
|
||||
<polygon fill="black" stroke="black" points="832.09,-271.89 821.86,-274.63 831.57,-278.87 832.09,-271.89"/>
|
||||
</g>
|
||||
<!-- node8->node3 -->
|
||||
<g id="edge11" class="edge">
|
||||
<title>node8->node3</title>
|
||||
<path fill="none" stroke="black" d="M1567.08,-327.31C1495.4,-316.84 1336.86,-293.67 1230.62,-278.14"/>
|
||||
<polygon fill="black" stroke="black" points="1231.44,-274.72 1221.04,-276.74 1230.42,-281.65 1231.44,-274.72"/>
|
||||
</g>
|
||||
<!-- node8->node4 -->
|
||||
<g id="edge12" class="edge">
|
||||
<title>node8->node4</title>
|
||||
<path fill="none" stroke="black" d="M1578.53,-320.61C1555.96,-310.08 1522.92,-294.66 1496.64,-282.4"/>
|
||||
<polygon fill="black" stroke="black" points="1498.12,-279.22 1487.58,-278.17 1495.16,-285.57 1498.12,-279.22"/>
|
||||
</g>
|
||||
<!-- node8->node5 -->
|
||||
<g id="edge13" class="edge">
|
||||
<title>node8->node5</title>
|
||||
<path fill="none" stroke="black" d="M1627.78,-318.97C1644.15,-309.18 1666.44,-295.84 1685.2,-284.62"/>
|
||||
<polygon fill="black" stroke="black" points="1686.83,-287.73 1693.61,-279.59 1683.23,-281.72 1686.83,-287.73"/>
|
||||
</g>
|
||||
<!-- node8->node6 -->
|
||||
<g id="edge14" class="edge">
|
||||
<title>node8->node6</title>
|
||||
<path fill="none" stroke="black" d="M1642.45,-327.02C1712.36,-316.31 1863.89,-293.1 1964.32,-277.71"/>
|
||||
<polygon fill="black" stroke="black" points="1964.84,-281.18 1974.2,-276.2 1963.78,-274.26 1964.84,-281.18"/>
|
||||
</g>
|
||||
<!-- node8->node7 -->
|
||||
<g id="edge15" class="edge">
|
||||
<title>node8->node7</title>
|
||||
<path fill="none" stroke="black" d="M1642.33,-330.01C1740.75,-322.64 2013.75,-301.7 2240.66,-279.8 2254.16,-278.5 2268.32,-277.06 2282.35,-275.58"/>
|
||||
<polygon fill="black" stroke="black" points="2282.49,-279.08 2292.06,-274.54 2281.75,-272.12 2282.49,-279.08"/>
|
||||
</g>
|
||||
<!-- node8->node19 -->
|
||||
<g id="edge16" class="edge">
|
||||
<title>node8->node19</title>
|
||||
<path fill="none" stroke="black" d="M1642.25,-332.63C1770.06,-331.64 2199.48,-324.94 2550.66,-279.8 2560.1,-278.59 2570.07,-276.92 2579.71,-275.1"/>
|
||||
<polygon fill="black" stroke="black" points="2580.21,-278.57 2589.34,-273.21 2578.86,-271.7 2580.21,-278.57"/>
|
||||
</g>
|
||||
<!-- node20 -->
|
||||
<g id="node11" class="node">
|
||||
<title>node20</title>
|
||||
<polygon fill="none" stroke="black" points="2606.81,-185.8 2533.89,-201.03 2238.66,-207.8 1943.43,-201.03 1870.52,-185.8 2074.82,-173.59 2402.5,-173.59 2606.81,-185.8"/>
|
||||
<text text-anchor="middle" x="2238.66" y="-185.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node19->node20 -->
|
||||
<g id="edge17" class="edge">
|
||||
<title>node19->node20</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2583.63,-250.21C2572.76,-248.03 2561.34,-245.79 2550.66,-243.8 2482.14,-231.05 2404.92,-217.93 2344.44,-207.93"/>
|
||||
<polygon fill="black" stroke="black" points="2345.28,-204.52 2334.84,-206.34 2344.14,-211.42 2345.28,-204.52"/>
|
||||
</g>
|
||||
<!-- node9 -->
|
||||
<g id="node12" class="node">
|
||||
<title>node9</title>
|
||||
<polygon fill="none" stroke="black" points="2542.56,-123.37 2445.66,-135.8 2348.77,-123.37 2385.78,-103.24 2505.55,-103.24 2542.56,-123.37"/>
|
||||
<text text-anchor="middle" x="2445.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch_cpu_library</text>
|
||||
</g>
|
||||
<!-- node19->node9 -->
|
||||
<g id="edge18" class="edge">
|
||||
<title>node19->node9</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2635.72,-246.84C2636.4,-227.49 2634.61,-192.58 2615.66,-171.8 2601.13,-155.87 2551.93,-141.56 2510.18,-131.84"/>
|
||||
<polygon fill="black" stroke="black" points="2511.2,-128.48 2500.67,-129.68 2509.65,-135.31 2511.2,-128.48"/>
|
||||
</g>
|
||||
<!-- node13 -->
|
||||
<g id="node16" class="node">
|
||||
<title>node13</title>
|
||||
<polygon fill="none" stroke="black" points="3056.45,-195.37 2953.66,-207.8 2850.87,-195.37 2890.13,-175.24 3017.19,-175.24 3056.45,-195.37"/>
|
||||
<text text-anchor="middle" x="2953.66" y="-185.53" font-family="Times,serif" font-size="12.00">torch_cuda_library</text>
|
||||
</g>
|
||||
<!-- node19->node13 -->
|
||||
<g id="edge22" class="edge">
|
||||
<title>node19->node13</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2685.21,-249.71C2741.11,-237.45 2831.21,-217.67 2891.42,-204.46"/>
|
||||
<polygon fill="black" stroke="black" points="2891.8,-207.96 2900.82,-202.4 2890.3,-201.13 2891.8,-207.96"/>
|
||||
</g>
|
||||
<!-- node10 -->
|
||||
<g id="node13" class="node">
|
||||
<title>node10</title>
|
||||
<polygon fill="none" stroke="black" points="2362.4,-27.9 2285.6,-43.12 1974.66,-49.9 1663.72,-43.12 1586.93,-27.9 1802.1,-15.68 2147.22,-15.68 2362.4,-27.9"/>
|
||||
<text text-anchor="middle" x="1974.66" y="-27.63" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cpu.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node9->node10 -->
|
||||
<g id="edge19" class="edge">
|
||||
<title>node9->node10</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2381.16,-105.31C2301.63,-91.15 2165.65,-66.92 2073.05,-50.43"/>
|
||||
<polygon fill="black" stroke="black" points="2073.93,-47.03 2063.48,-48.72 2072.71,-53.92 2073.93,-47.03"/>
|
||||
</g>
|
||||
<!-- node11 -->
|
||||
<g id="node14" class="node">
|
||||
<title>node11</title>
|
||||
<polygon fill="none" stroke="black" points="2510.72,-37.46 2445.66,-49.9 2380.61,-37.46 2405.46,-17.34 2485.87,-17.34 2510.72,-37.46"/>
|
||||
<text text-anchor="middle" x="2445.66" y="-27.63" font-family="Times,serif" font-size="12.00">caffe2::mkl</text>
|
||||
</g>
|
||||
<!-- node9->node11 -->
|
||||
<g id="edge20" class="edge">
|
||||
<title>node9->node11</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2445.66,-102.95C2445.66,-91.68 2445.66,-75.4 2445.66,-61.37"/>
|
||||
<polygon fill="black" stroke="black" points="2449.16,-61.78 2445.66,-51.78 2442.16,-61.78 2449.16,-61.78"/>
|
||||
</g>
|
||||
<!-- node12 -->
|
||||
<g id="node15" class="node">
|
||||
<title>node12</title>
|
||||
<polygon fill="none" stroke="black" points="2794.95,-41.76 2661.66,-63.8 2528.37,-41.76 2579.28,-6.09 2744.04,-6.09 2794.95,-41.76"/>
|
||||
<text text-anchor="middle" x="2661.66" y="-34.75" font-family="Times,serif" font-size="12.00">dummy</text>
|
||||
<text text-anchor="middle" x="2661.66" y="-20.5" font-family="Times,serif" font-size="12.00">(protobuf::libprotobuf)</text>
|
||||
</g>
|
||||
<!-- node9->node12 -->
|
||||
<g id="edge21" class="edge">
|
||||
<title>node9->node12</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2481.82,-102.76C2512.55,-90.82 2557.5,-73.36 2594.77,-58.89"/>
|
||||
<polygon fill="black" stroke="black" points="2595.6,-62.32 2603.65,-55.44 2593.06,-55.79 2595.6,-62.32"/>
|
||||
</g>
|
||||
<!-- node13->node9 -->
|
||||
<g id="edge28" class="edge">
|
||||
<title>node13->node9</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2880.59,-179.79C2799.97,-169.71 2666.42,-152.57 2551.66,-135.8 2540.2,-134.13 2528.06,-132.27 2516.24,-130.41"/>
|
||||
<polygon fill="black" stroke="black" points="2516.96,-126.98 2506.54,-128.86 2515.87,-133.89 2516.96,-126.98"/>
|
||||
</g>
|
||||
<!-- node14 -->
|
||||
<g id="node17" class="node">
|
||||
<title>node14</title>
|
||||
<polygon fill="none" stroke="black" points="3346.69,-113.8 3268.85,-129.03 2953.66,-135.8 2638.48,-129.03 2560.63,-113.8 2778.75,-101.59 3128.58,-101.59 3346.69,-113.8"/>
|
||||
<text text-anchor="middle" x="2953.66" y="-113.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cuda.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node13->node14 -->
|
||||
<g id="edge23" class="edge">
|
||||
<title>node13->node14</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2953.66,-174.97C2953.66,-167.13 2953.66,-157.01 2953.66,-147.53"/>
|
||||
<polygon fill="black" stroke="black" points="2957.16,-147.59 2953.66,-137.59 2950.16,-147.59 2957.16,-147.59"/>
|
||||
</g>
|
||||
<!-- node15 -->
|
||||
<g id="node18" class="node">
|
||||
<title>node15</title>
|
||||
<polygon fill="none" stroke="black" points="3514.74,-123.37 3439.66,-135.8 3364.58,-123.37 3393.26,-103.24 3486.06,-103.24 3514.74,-123.37"/>
|
||||
<text text-anchor="middle" x="3439.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch::cudart</text>
|
||||
</g>
|
||||
<!-- node13->node15 -->
|
||||
<g id="edge24" class="edge">
|
||||
<title>node13->node15</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3028.35,-180.51C3109.24,-171.17 3241.96,-154.78 3355.66,-135.8 3364.43,-134.34 3373.69,-132.63 3382.72,-130.88"/>
|
||||
<polygon fill="black" stroke="black" points="3383.38,-134.31 3392.51,-128.93 3382.02,-127.45 3383.38,-134.31"/>
|
||||
</g>
|
||||
<!-- node17 -->
|
||||
<g id="node20" class="node">
|
||||
<title>node17</title>
|
||||
<polygon fill="none" stroke="black" points="3716.84,-123.37 3624.66,-135.8 3532.48,-123.37 3567.69,-103.24 3681.63,-103.24 3716.84,-123.37"/>
|
||||
<text text-anchor="middle" x="3624.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch::nvtoolsext</text>
|
||||
</g>
|
||||
<!-- node13->node17 -->
|
||||
<g id="edge26" class="edge">
|
||||
<title>node13->node17</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3033.64,-183.25C3144.1,-175.14 3349.47,-158.53 3523.66,-135.8 3534.84,-134.35 3546.67,-132.57 3558.15,-130.72"/>
|
||||
<polygon fill="black" stroke="black" points="3558.68,-134.18 3567.98,-129.1 3557.54,-127.27 3558.68,-134.18"/>
|
||||
</g>
|
||||
<!-- node16 -->
|
||||
<g id="node19" class="node">
|
||||
<title>node16</title>
|
||||
<polygon fill="none" stroke="black" points="3510.78,-27.9 3496.7,-43.12 3439.66,-49.9 3382.63,-43.12 3368.54,-27.9 3408.01,-15.68 3471.31,-15.68 3510.78,-27.9"/>
|
||||
<text text-anchor="middle" x="3439.66" y="-27.63" font-family="Times,serif" font-size="12.00">CUDA::cudart</text>
|
||||
</g>
|
||||
<!-- node15->node16 -->
|
||||
<g id="edge25" class="edge">
|
||||
<title>node15->node16</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3439.66,-102.95C3439.66,-91.68 3439.66,-75.4 3439.66,-61.37"/>
|
||||
<polygon fill="black" stroke="black" points="3443.16,-61.78 3439.66,-51.78 3436.16,-61.78 3443.16,-61.78"/>
|
||||
</g>
|
||||
<!-- node18 -->
|
||||
<g id="node21" class="node">
|
||||
<title>node18</title>
|
||||
<polygon fill="none" stroke="black" points="3714.32,-27.9 3696.56,-43.12 3624.66,-49.9 3552.77,-43.12 3535.01,-27.9 3584.76,-15.68 3664.56,-15.68 3714.32,-27.9"/>
|
||||
<text text-anchor="middle" x="3624.66" y="-27.63" font-family="Times,serif" font-size="12.00">CUDA::nvToolsExt</text>
|
||||
</g>
|
||||
<!-- node17->node18 -->
|
||||
<g id="edge27" class="edge">
|
||||
<title>node17->node18</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3624.66,-102.95C3624.66,-91.68 3624.66,-75.4 3624.66,-61.37"/>
|
||||
<polygon fill="black" stroke="black" points="3628.16,-61.78 3624.66,-51.78 3621.16,-61.78 3628.16,-61.78"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 18 KiB |
2
lib/json
2
lib/json
@ -1 +1 @@
|
||||
Subproject commit 8c391e04fe4195d8be862c97f38cfe10e2a3472e
|
||||
Subproject commit 378e091795a70fced276cd882bd8a6a428668fe5
|
2
lib/mdlp
2
lib/mdlp
@ -1 +1 @@
|
||||
Subproject commit 236d1b2f8be185039493fe7fce04a83e02ed72e5
|
||||
Subproject commit 7d62d6af4a6ca944a3bbde0b61f651fd4b2d3f57
|
@ -5,15 +5,21 @@ project(bayesnet_sample)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
find_package(Torch REQUIRED)
|
||||
find_library(BayesNet NAMES BayesNet.a libBayesNet.a REQUIRED)
|
||||
find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a REQUIRED)
|
||||
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet)
|
||||
find_library(FImdlp NAMES libfimdlp.a PATHS REQUIRED)
|
||||
|
||||
message(STATUS "FImdlp=${FImdlp}")
|
||||
message(STATUS "FImdlp_INCLUDE_DIRS=${FImdlp_INCLUDE_DIRS}")
|
||||
message(STATUS "BayesNet=${BayesNet}")
|
||||
message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
|
||||
|
||||
include_directories(
|
||||
lib/Files
|
||||
lib/mdlp
|
||||
../tests/lib/Files
|
||||
lib/json/include
|
||||
/usr/local/include
|
||||
${FImdlp_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
add_subdirectory(lib/mdlp)
|
||||
add_executable(bayesnet_sample sample.cc)
|
||||
target_link_libraries(bayesnet_sample mdlp "${TORCH_LIBRARIES}" "${BayesNet}")
|
||||
target_link_libraries(bayesnet_sample fimdlp "${TORCH_LIBRARIES}" "${BayesNet}")
|
@ -1,11 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(mdlp)
|
||||
|
||||
if (POLICY CMP0135)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
endif ()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
||||
|
@ -1,222 +0,0 @@
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
|
||||
max_depth(max_depth_),
|
||||
proposed_cuts(proposed)
|
||||
{
|
||||
}
|
||||
|
||||
CPPFImdlp::CPPFImdlp() = default;
|
||||
|
||||
CPPFImdlp::~CPPFImdlp() = default;
|
||||
|
||||
size_t CPPFImdlp::compute_max_num_cut_points() const
|
||||
{
|
||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||
if (proposed_cuts == 0) {
|
||||
return numeric_limits<size_t>::max();
|
||||
}
|
||||
if (proposed_cuts < 0 || proposed_cuts > static_cast<float>(X.size())) {
|
||||
throw invalid_argument("wrong proposed num_cuts value");
|
||||
}
|
||||
if (proposed_cuts < 1)
|
||||
return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
|
||||
return static_cast<size_t>(proposed_cuts);
|
||||
}
|
||||
|
||||
void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
X = X_;
|
||||
y = y_;
|
||||
num_cut_points = compute_max_num_cut_points();
|
||||
depth = 0;
|
||||
discretizedData.clear();
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
if (X.empty() || y.empty()) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
if (min_length < 3) {
|
||||
throw invalid_argument("min_length must be greater than 2");
|
||||
}
|
||||
if (max_depth < 1) {
|
||||
throw invalid_argument("max_depth must be greater than 0");
|
||||
}
|
||||
indices = sortIndices(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
computeCutPoints(0, X.size(), 1);
|
||||
sort(cutPoints.begin(), cutPoints.end());
|
||||
if (num_cut_points > 0) {
|
||||
// Select the best (with lower entropy) cut points
|
||||
while (cutPoints.size() > num_cut_points) {
|
||||
resizeCutPoints();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t n;
|
||||
size_t m;
|
||||
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
|
||||
size_t idxNext = cut + 1 < end ? cut + 1 : cut;
|
||||
bool backWall; // true if duplicates reach beginning of the interval
|
||||
precision_t previous;
|
||||
precision_t actual;
|
||||
precision_t next;
|
||||
previous = X[indices[idxPrev]];
|
||||
actual = X[indices[cut]];
|
||||
next = X[indices[idxNext]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
// get the first equal value of X in the interval
|
||||
while (idxPrev > start && actual == previous) {
|
||||
previous = X[indices[--idxPrev]];
|
||||
}
|
||||
backWall = idxPrev == start && actual == previous;
|
||||
// get the last equal value of X in the interval
|
||||
while (idxNext < end - 1 && actual == next) {
|
||||
next = X[indices[++idxNext]];
|
||||
}
|
||||
// # of duplicates before cutpoint
|
||||
n = cut - 1 - idxPrev;
|
||||
// # of duplicates after cutpoint
|
||||
m = idxNext - cut - 1;
|
||||
// Decide which values to use
|
||||
cut = cut + (backWall ? m + 1 : -n);
|
||||
actual = X[indices[cut]];
|
||||
return { (actual + previous) / 2, cut };
|
||||
}
|
||||
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
|
||||
{
|
||||
size_t cut;
|
||||
pair<precision_t, size_t> result;
|
||||
// Check if the interval length and the depth are Ok
|
||||
if (end - start < min_length || depth_ > max_depth)
|
||||
return;
|
||||
depth = depth_ > depth ? depth_ : depth;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
result = valueCutPoint(start, cut, end);
|
||||
cut = result.second;
|
||||
cutPoints.push_back(result.first);
|
||||
computeCutPoints(start, cut, depth_ + 1);
|
||||
computeCutPoints(cut, end, depth_ + 1);
|
||||
}
|
||||
}
|
||||
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amongst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max();
|
||||
size_t elements = end - start;
|
||||
bool sameValues = true;
|
||||
precision_t entropy_left;
|
||||
precision_t entropy_right;
|
||||
precision_t minEntropy;
|
||||
// Check if all the values of the variable in the interval are the same
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
if (X[indices[idx]] != X[indices[start]]) {
|
||||
sameValues = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sameValues)
|
||||
return candidate;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / static_cast<precision_t>(elements) * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / static_cast<precision_t>(elements) * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k;
|
||||
int k1;
|
||||
int k2;
|
||||
precision_t ig;
|
||||
precision_t delta;
|
||||
precision_t ent;
|
||||
precision_t ent1;
|
||||
precision_t ent2;
|
||||
auto N = precision_t(end - start);
|
||||
k = metrics.computeNumClasses(start, end);
|
||||
k1 = metrics.computeNumClasses(start, cut);
|
||||
k2 = metrics.computeNumClasses(cut, end);
|
||||
ent = metrics.entropy(start, end);
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
||||
if (X_[i1] == X_[i2])
|
||||
return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
void CPPFImdlp::resizeCutPoints()
|
||||
{
|
||||
//Compute entropy of each of the whole cutpoint set and discards the biggest value
|
||||
precision_t maxEntropy = 0;
|
||||
precision_t entropy;
|
||||
size_t maxEntropyIdx = 0;
|
||||
size_t begin = 0;
|
||||
size_t end;
|
||||
for (size_t idx = 0; idx < cutPoints.size(); idx++) {
|
||||
end = begin;
|
||||
while (X[indices[end]] < cutPoints[idx] && end < X.size())
|
||||
end++;
|
||||
entropy = metrics.entropy(begin, end);
|
||||
if (entropy > maxEntropy) {
|
||||
maxEntropy = entropy;
|
||||
maxEntropyIdx = idx;
|
||||
}
|
||||
begin = end;
|
||||
}
|
||||
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
||||
}
|
||||
labels_t& CPPFImdlp::transform(const samples_t& data)
|
||||
{
|
||||
discretizedData.clear();
|
||||
discretizedData.reserve(data.size());
|
||||
for (const precision_t& item : data) {
|
||||
auto upper = upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
||||
discretizedData.push_back(upper - cutPoints.begin());
|
||||
}
|
||||
return discretizedData;
|
||||
}
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
protected:
|
||||
size_t min_length = 3;
|
||||
int depth = 0;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
float proposed_cuts = 0;
|
||||
indices_t indices = indices_t();
|
||||
samples_t X = samples_t();
|
||||
labels_t y = labels_t();
|
||||
Metrics metrics = Metrics(y, indices);
|
||||
cutPoints_t cutPoints;
|
||||
size_t num_cut_points = numeric_limits<size_t>::max();
|
||||
labels_t discretizedData = labels_t();
|
||||
|
||||
static indices_t sortIndices(samples_t&, labels_t&);
|
||||
|
||||
void computeCutPoints(size_t, size_t, int);
|
||||
void resizeCutPoints();
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
size_t getCandidate(size_t, size_t);
|
||||
size_t compute_max_num_cut_points() const;
|
||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||
|
||||
public:
|
||||
CPPFImdlp();
|
||||
CPPFImdlp(size_t, int, float);
|
||||
~CPPFImdlp();
|
||||
void fit(samples_t&, labels_t&);
|
||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
||||
labels_t& transform(const samples_t&);
|
||||
inline int get_depth() const { return depth; };
|
||||
static inline string version() { return "1.1.2"; };
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Ricardo Montañana Gómez
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -1,78 +0,0 @@
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_),
|
||||
numClasses(computeNumClasses(0, indices.size()))
|
||||
{
|
||||
}
|
||||
|
||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||
{
|
||||
set<int> nClasses;
|
||||
for (auto i = start; i < end; ++i) {
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
return static_cast<int>(nClasses.size());
|
||||
}
|
||||
|
||||
void Metrics::setData(const labels_t& y_, const indices_t& indices_)
|
||||
{
|
||||
indices = indices_;
|
||||
y = y_;
|
||||
numClasses = computeNumClasses(0, indices.size());
|
||||
entropyCache.clear();
|
||||
igCache.clear();
|
||||
}
|
||||
|
||||
precision_t Metrics::entropy(size_t start, size_t end)
|
||||
{
|
||||
precision_t p;
|
||||
precision_t ventropy = 0;
|
||||
int nElements = 0;
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
if (end - start < 2)
|
||||
return 0;
|
||||
if (entropyCache.find({ start, end }) != entropyCache.end()) {
|
||||
return entropyCache[{start, end}];
|
||||
}
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
nElements++;
|
||||
}
|
||||
for (auto count : counts) {
|
||||
if (count > 0) {
|
||||
p = static_cast<precision_t>(count) / static_cast<precision_t>(nElements);
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
entropyCache[{start, end}] = ventropy;
|
||||
return ventropy;
|
||||
}
|
||||
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
precision_t iGain;
|
||||
precision_t entropyInterval;
|
||||
precision_t entropyLeft;
|
||||
precision_t entropyRight;
|
||||
size_t nElementsLeft = cut - start;
|
||||
size_t nElementsRight = end - cut;
|
||||
size_t nElements = end - start;
|
||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||
return igCache[make_tuple(start, cut, end)];
|
||||
}
|
||||
entropyInterval = entropy(start, end);
|
||||
entropyLeft = entropy(start, cut);
|
||||
entropyRight = entropy(cut, end);
|
||||
iGain = entropyInterval -
|
||||
(static_cast<precision_t>(nElementsLeft) * entropyLeft +
|
||||
static_cast<precision_t>(nElementsRight) * entropyRight) /
|
||||
static_cast<precision_t>(nElements);
|
||||
igCache[make_tuple(start, cut, end)] = iGain;
|
||||
return iGain;
|
||||
}
|
||||
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
|
||||
namespace mdlp {
|
||||
class Metrics {
|
||||
protected:
|
||||
labels_t& y;
|
||||
indices_t& indices;
|
||||
int numClasses;
|
||||
cacheEnt_t entropyCache = cacheEnt_t();
|
||||
cacheIg_t igCache = cacheIg_t();
|
||||
public:
|
||||
Metrics(labels_t&, indices_t&);
|
||||
void setData(const labels_t&, const indices_t&);
|
||||
int computeNumClasses(size_t, size_t);
|
||||
precision_t entropy(size_t, size_t);
|
||||
precision_t informationGain(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,41 +0,0 @@
|
||||
[![Build](https://github.com/rmontanana/mdlp/actions/workflows/build.yml/badge.svg)](https://github.com/rmontanana/mdlp/actions/workflows/build.yml)
|
||||
[![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_mdlp&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_mdlp&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
|
||||
# mdlp
|
||||
|
||||
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
||||
|
||||
The implementation tries to mitigate the problem of different label values with the same value of the variable:
|
||||
|
||||
- Sorts the values of the variable using the label values as a tie-breaker
|
||||
- Once found a valid candidate for the split, it checks if the previous value is the same as actual one, and tries to get previous one, or next if the former is not possible.
|
||||
|
||||
Other features:
|
||||
|
||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||
- Intervals have to have more than two examples to be evaluated.
|
||||
|
||||
The algorithm returns the cut points for the variable.
|
||||
|
||||
## Sample
|
||||
|
||||
To run the sample, just execute the following commands:
|
||||
|
||||
```bash
|
||||
cd sample
|
||||
cmake -B build
|
||||
cd build
|
||||
make
|
||||
./sample -f iris -m 2
|
||||
./sample -h
|
||||
```
|
||||
|
||||
## Test
|
||||
|
||||
To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:
|
||||
|
||||
```bash
|
||||
cd tests
|
||||
./test
|
||||
```
|
@ -1,24 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
typedef float precision_t;
|
||||
typedef vector<precision_t> samples_t;
|
||||
typedef vector<int> labels_t;
|
||||
typedef vector<size_t> indices_t;
|
||||
typedef vector<precision_t> cutPoints_t;
|
||||
typedef map<pair<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
@ -60,9 +60,9 @@ int main(int argc, char* argv[])
|
||||
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
|
||||
std::cout << "Library version: " << clf.getVersion() << std::endl;
|
||||
tie(X, y, features, className, states) = loadDataset(file_name, true);
|
||||
clf.fit(X, y, features, className, states);
|
||||
clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto score = clf.score(X, y);
|
||||
std::cout << "File: " << file_name << " score: " << score << std::endl;
|
||||
std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@ if(ENABLE_TESTING)
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/tests/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${BayesNet_SOURCE_DIR}
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
@ -10,8 +10,8 @@ if(ENABLE_TESTING)
|
||||
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc
|
||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestA2DE.cc
|
||||
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" mdlp PRIVATE Catch2::Catch2WithMain)
|
||||
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp PRIVATE Catch2::Catch2WithMain)
|
||||
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
||||
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
|
||||
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")
|
||||
@ -24,4 +24,5 @@ if(ENABLE_TESTING)
|
||||
add_test(NAME Modules COMMAND TestBayesNet "[Modules]")
|
||||
add_test(NAME Network COMMAND TestBayesNet "[Network]")
|
||||
add_test(NAME Node COMMAND TestBayesNet "[Node]")
|
||||
add_test(NAME MST COMMAND TestBayesNet "[MST]")
|
||||
endif(ENABLE_TESTING)
|
||||
|
@ -16,7 +16,7 @@ TEST_CASE("Fit and Score", "[A2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::A2DE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.831776).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.getNumberOfNodes() == 360);
|
||||
REQUIRE(clf.getNumberOfEdges() == 756);
|
||||
@ -30,20 +30,20 @@ TEST_CASE("Test score with predict_voting", "[A2DE]")
|
||||
{"predict_voting", true},
|
||||
};
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.82243).epsilon(raw.epsilon));
|
||||
hyperparameters["predict_voting"] = false;
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.83178).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Test graph", "[A2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::A2DE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = clf.graph();
|
||||
REQUIRE(graph.size() == 78);
|
||||
REQUIRE(graph[0] == "digraph BayesNet {\nlabel=<BayesNet A2DE_0>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
|
||||
REQUIRE(graph[1] == "class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n");
|
||||
REQUIRE(graph[1] == "\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n");
|
||||
}
|
||||
|
@ -18,38 +18,38 @@ TEST_CASE("Test Cannot build dataset with wrong data vector", "[Classifier]")
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
raw.yv.pop_back();
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
}
|
||||
TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto yshort = torch::zeros({ 149 }, torch::kInt32);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
}
|
||||
TEST_CASE("Invalid data type", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", false);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states), "dataset (X, y) must be of type Integer");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "dataset (X, y) must be of type Integer");
|
||||
}
|
||||
TEST_CASE("Invalid number of features", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0);
|
||||
REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states), "Classifier: X 5 and features 4 must have the same number of features");
|
||||
REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "Classifier: X 5 and features 4 must have the same number of features");
|
||||
}
|
||||
TEST_CASE("Invalid class name", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states), "class name not found in states");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), "class name not found in states");
|
||||
}
|
||||
TEST_CASE("Invalid feature name", "[Classifier]")
|
||||
{
|
||||
@ -57,8 +57,8 @@ TEST_CASE("Invalid feature name", "[Classifier]")
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto statest = raw.states;
|
||||
statest.erase("petallength");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest), "feature [petallength] not found in states");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), "feature [petallength] not found in states");
|
||||
}
|
||||
TEST_CASE("Invalid hyperparameter", "[Classifier]")
|
||||
{
|
||||
@ -71,7 +71,7 @@ TEST_CASE("Topological order", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states);
|
||||
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto order = model.topological_order();
|
||||
REQUIRE(order.size() == 4);
|
||||
REQUIRE(order[0] == "petallength");
|
||||
@ -83,9 +83,9 @@ TEST_CASE("Dump_cpt", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states);
|
||||
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto cpt = model.dump_cpt();
|
||||
REQUIRE(cpt.size() == 1713);
|
||||
REQUIRE(cpt.size() == 1718);
|
||||
}
|
||||
TEST_CASE("Not fitted model", "[Classifier]")
|
||||
{
|
||||
@ -111,7 +111,7 @@ TEST_CASE("KDB Graph", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::KDB(2);
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = model.graph();
|
||||
REQUIRE(graph.size() == 15);
|
||||
}
|
||||
@ -119,7 +119,7 @@ TEST_CASE("KDBLd Graph", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::KDBLd(2);
|
||||
auto raw = RawDatasets("iris", false);
|
||||
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states);
|
||||
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = model.graph();
|
||||
REQUIRE(graph.size() == 15);
|
||||
}
|
@ -18,7 +18,7 @@ TEST_CASE("Topological Order", "[Ensemble]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto order = clf.topological_order();
|
||||
REQUIRE(order.size() == 0);
|
||||
}
|
||||
@ -26,7 +26,7 @@ TEST_CASE("Dump CPT", "[Ensemble]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto dump = clf.dump_cpt();
|
||||
REQUIRE(dump == "");
|
||||
}
|
||||
@ -34,7 +34,7 @@ TEST_CASE("Number of States", "[Ensemble]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfStates() == 76);
|
||||
}
|
||||
TEST_CASE("Show", "[Ensemble]")
|
||||
@ -46,7 +46,7 @@ TEST_CASE("Show", "[Ensemble]")
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
std::vector<std::string> expected = {
|
||||
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||
"petallength -> sepallength, sepalwidth, petalwidth, ",
|
||||
@ -78,16 +78,16 @@ TEST_CASE("Graph", "[Ensemble]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = clf.graph();
|
||||
REQUIRE(graph.size() == 56);
|
||||
auto clf2 = bayesnet::AODE();
|
||||
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
graph = clf2.graph();
|
||||
REQUIRE(graph.size() == 56);
|
||||
raw = RawDatasets("glass", false);
|
||||
auto clf3 = bayesnet::AODELd();
|
||||
clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states);
|
||||
clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
graph = clf3.graph();
|
||||
REQUIRE(graph.size() == 261);
|
||||
}
|
||||
|
@ -20,20 +20,20 @@
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
const std::string ACTUAL_VERSION = "1.0.5.1";
|
||||
const std::string ACTUAL_VERSION = "1.0.6";
|
||||
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
{
|
||||
map <pair<std::string, std::string>, float> scores{
|
||||
// Diabetes
|
||||
{{"diabetes", "AODE"}, 0.82161}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
|
||||
{{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
|
||||
{{"diabetes", "AODELd"}, 0.8125f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.7890625f}, {{"diabetes", "TANLd"}, 0.803385437f}, {{"diabetes", "BoostAODE"}, 0.83984f},
|
||||
// Ecoli
|
||||
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
|
||||
{{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f},
|
||||
{{"ecoli", "AODELd"}, 0.875f}, {{"ecoli", "KDBLd"}, 0.880952358f}, {{"ecoli", "SPODELd"}, 0.839285731f}, {{"ecoli", "TANLd"}, 0.848214269f}, {{"ecoli", "BoostAODE"}, 0.89583f},
|
||||
// Glass
|
||||
{{"glass", "AODE"}, 0.79439}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
|
||||
{{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f},
|
||||
{{"glass", "AODELd"}, 0.799065411f}, {{"glass", "KDBLd"}, 0.82710278f}, {{"glass", "SPODELd"}, 0.780373812f}, {{"glass", "TANLd"}, 0.869158864f}, {{"glass", "BoostAODE"}, 0.84579f},
|
||||
// Iris
|
||||
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
|
||||
{{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f}
|
||||
@ -54,7 +54,7 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
auto clf = models[name];
|
||||
auto discretize = name.substr(name.length() - 2) != "Ld";
|
||||
auto raw = RawDatasets(file_name, discretize);
|
||||
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states);
|
||||
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf->score(raw.Xt, raw.yt);
|
||||
INFO("Classifier: " << name << " File: " << file_name);
|
||||
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
||||
@ -71,17 +71,17 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
TEST_CASE("Models features & Graph", "[Models]")
|
||||
{
|
||||
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
|
||||
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
||||
"class -> sepallength", "class -> sepalwidth", "class -> petallength", "class -> petalwidth", "petallength [shape=circle] \n",
|
||||
"petallength -> sepallength", "petalwidth [shape=circle] \n", "sepallength [shape=circle] \n",
|
||||
"sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n"
|
||||
"\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
||||
"\"class\" -> \"sepallength\"", "\"class\" -> \"sepalwidth\"", "\"class\" -> \"petallength\"", "\"class\" -> \"petalwidth\"", "\"petallength\" [shape=circle] \n",
|
||||
"\"petallength\" -> \"sepallength\"", "\"petalwidth\" [shape=circle] \n", "\"sepallength\" [shape=circle] \n",
|
||||
"\"sepallength\" -> \"sepalwidth\"", "\"sepalwidth\" [shape=circle] \n", "\"sepalwidth\" -> \"petalwidth\"", "}\n"
|
||||
}
|
||||
);
|
||||
SECTION("Test TAN")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::TAN();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 19);
|
||||
@ -93,10 +93,10 @@ TEST_CASE("Models features & Graph", "[Models]")
|
||||
{
|
||||
auto clf = bayesnet::TANLd();
|
||||
auto raw = RawDatasets("iris", false);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 19);
|
||||
REQUIRE(clf.getNumberOfStates() == 27);
|
||||
REQUIRE(clf.getClassNumStates() == 3);
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
@ -106,7 +106,7 @@ TEST_CASE("Get num features & num edges", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::KDB(2);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 8);
|
||||
}
|
||||
@ -166,7 +166,7 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
SECTION("Test " + model + " predict_proba")
|
||||
{
|
||||
auto clf = models[model];
|
||||
clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto y_pred_proba = clf->predict_proba(raw.Xv);
|
||||
auto yt_pred_proba = clf->predict_proba(raw.Xt);
|
||||
auto y_pred = clf->predict(raw.Xv);
|
||||
@ -203,7 +203,7 @@ TEST_CASE("AODE voting-proba", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::AODE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
clf.setHyperparameters({
|
||||
@ -222,9 +222,9 @@ TEST_CASE("SPODELd dataset", "[Models]")
|
||||
auto raw = RawDatasets("iris", false);
|
||||
auto clf = bayesnet::SPODELd(0);
|
||||
// raw.dataset.to(torch::kFloat32);
|
||||
clf.fit(raw.dataset, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xt, raw.yt);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon));
|
||||
@ -233,13 +233,13 @@ TEST_CASE("KDB with hyperparameters", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::KDB(2);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
clf.setHyperparameters({
|
||||
{"k", 3},
|
||||
{"theta", 0.7},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto scoret = clf.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
|
||||
@ -248,7 +248,7 @@ TEST_CASE("Incorrect type of data for SPODELd", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::SPODELd(0);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states), std::runtime_error);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
||||
}
|
||||
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
|
||||
{
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "bayesnet/network/Node.h"
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
|
||||
const double threshold = 1e-4;
|
||||
void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::string& className)
|
||||
{
|
||||
std::vector<pair<int, int>> network = { {0, 1}, {0, 2}, {1, 3} };
|
||||
@ -29,13 +30,11 @@ void buildModel(bayesnet::Network& net, const std::vector<std::string>& features
|
||||
net.addEdge(className, feature);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
{
|
||||
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto net = bayesnet::Network();
|
||||
double threshold = 1e-4;
|
||||
|
||||
SECTION("Test get features")
|
||||
{
|
||||
@ -115,9 +114,9 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
REQUIRE(children == children3);
|
||||
}
|
||||
// Fit networks
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states);
|
||||
net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states);
|
||||
net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(net.getStates() == net2.getStates());
|
||||
REQUIRE(net.getStates() == net3.getStates());
|
||||
REQUIRE(net.getFeatures() == net2.getFeatures());
|
||||
@ -150,6 +149,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test show")
|
||||
{
|
||||
INFO("Test show");
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@ -163,6 +163,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test topological_sort")
|
||||
{
|
||||
INFO("Test topological sort");
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@ -176,6 +177,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test graph")
|
||||
{
|
||||
INFO("Test graph");
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@ -184,17 +186,18 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
auto str = net.graph("Test Graph");
|
||||
REQUIRE(str.size() == 7);
|
||||
REQUIRE(str[0] == "digraph BayesNet {\nlabel=<BayesNet Test Graph>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
|
||||
REQUIRE(str[1] == "A [shape=circle] \n");
|
||||
REQUIRE(str[2] == "A -> B");
|
||||
REQUIRE(str[3] == "A -> C");
|
||||
REQUIRE(str[4] == "B [shape=circle] \n");
|
||||
REQUIRE(str[5] == "C [shape=circle] \n");
|
||||
REQUIRE(str[1] == "\"A\" [shape=circle] \n");
|
||||
REQUIRE(str[2] == "\"A\" -> \"B\"");
|
||||
REQUIRE(str[3] == "\"A\" -> \"C\"");
|
||||
REQUIRE(str[4] == "\"B\" [shape=circle] \n");
|
||||
REQUIRE(str[5] == "\"C\" [shape=circle] \n");
|
||||
REQUIRE(str[6] == "}\n");
|
||||
}
|
||||
SECTION("Test predict")
|
||||
{
|
||||
INFO("Test predict");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
std::vector<int> y_test = { 2, 2, 0, 2, 1 };
|
||||
auto y_pred = net.predict(test);
|
||||
@ -202,8 +205,9 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test predict_proba")
|
||||
{
|
||||
INFO("Test predict_proba");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
std::vector<std::vector<double>> y_test = {
|
||||
{0.450237, 0.0866621, 0.463101},
|
||||
@ -223,15 +227,17 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test score")
|
||||
{
|
||||
INFO("Test score");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = net.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.97333333).margin(threshold));
|
||||
}
|
||||
SECTION("Copy constructor")
|
||||
{
|
||||
INFO("Test copy constructor");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto net2 = bayesnet::Network(net);
|
||||
REQUIRE(net.getFeatures() == net2.getFeatures());
|
||||
REQUIRE(net.getEdges() == net2.getEdges());
|
||||
@ -251,8 +257,9 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
REQUIRE(node->getCPT().equal(node2->getCPT()));
|
||||
}
|
||||
}
|
||||
SECTION("Test oddities")
|
||||
SECTION("Network oddities")
|
||||
{
|
||||
INFO("Network oddities");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
// predict without fitting
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
@ -268,27 +275,27 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
// predict with wrong data
|
||||
auto netx = bayesnet::Network();
|
||||
buildModel(netx, raw.features, raw.className);
|
||||
netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states);
|
||||
netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
std::vector<std::vector<int>> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} };
|
||||
auto test_tensor2 = bayesnet::vectorToTensor(test2, false);
|
||||
REQUIRE_THROWS_AS(netx.predict(test2), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test2), "Sample size (3) does not match the number of features (4)");
|
||||
REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "Sample size (3) does not match the number of features (4)");
|
||||
REQUIRE_THROWS_AS(netx.predict(test2), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test2), "(V) Sample size (3) does not match the number of features (4)");
|
||||
REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "(T) Sample size (3) does not match the number of features (4)");
|
||||
// fit with wrong data
|
||||
// Weights
|
||||
auto net2 = bayesnet::Network();
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
std::string invalid_weights = "Weights (0) must have the same number of elements as samples (150) in Network::fit";
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states), invalid_weights);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states, raw.smoothing), invalid_weights);
|
||||
// X & y
|
||||
std::string invalid_labels = "X and y must have the same number of samples in Network::fit (150 != 0)";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states), invalid_labels);
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), invalid_labels);
|
||||
// Features
|
||||
std::string invalid_features = "X and features must have the same number of features in Network::fit (4 != 0)";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states), invalid_features);
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states, raw.smoothing), invalid_features);
|
||||
// Different number of features
|
||||
auto net3 = bayesnet::Network();
|
||||
auto test2y = { 1, 2, 3, 4, 5 };
|
||||
@ -296,23 +303,23 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
auto features3 = raw.features;
|
||||
features3.pop_back();
|
||||
std::string invalid_features2 = "X and local features must have the same number of features in Network::fit (3 != 4)";
|
||||
REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states), invalid_features2);
|
||||
REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states, raw.smoothing), invalid_features2);
|
||||
// Uninitialized network
|
||||
std::string network_invalid = "The network has not been initialized. You must call addNode() before calling fit()";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), network_invalid);
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), network_invalid);
|
||||
// Classname
|
||||
std::string invalid_classname = "Class Name not found in Network::features";
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states), invalid_classname);
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), invalid_classname);
|
||||
// Invalid feature
|
||||
auto features2 = raw.features;
|
||||
features2.pop_back();
|
||||
features2.push_back("duck");
|
||||
std::string invalid_feature = "Feature duck not found in Network::features";
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states), invalid_feature);
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), invalid_feature);
|
||||
// Add twice the same node name to the network => Nothing should happen
|
||||
net.addNode("A");
|
||||
net.addNode("A");
|
||||
@ -320,8 +327,16 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
auto net4 = bayesnet::Network();
|
||||
buildModel(net4, raw.features, raw.className);
|
||||
std::string invalid_state = "Feature sepallength not found in states";
|
||||
REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>()), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>()), invalid_state);
|
||||
REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>(), raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>(), raw.smoothing), invalid_state);
|
||||
// Try to add node or edge to a fitted network
|
||||
auto net5 = bayesnet::Network();
|
||||
buildModel(net5, raw.features, raw.className);
|
||||
net5.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE_THROWS_AS(net5.addNode("A"), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net5.addNode("A"), "Cannot add node to a fitted network. Initialize first.");
|
||||
REQUIRE_THROWS_AS(net5.addEdge("A", "B"), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net5.addEdge("A", "B"), "Cannot add edge to a fitted network. Initialize first.");
|
||||
}
|
||||
|
||||
}
|
||||
@ -342,15 +357,6 @@ TEST_CASE("Cicle in Network", "[Network]")
|
||||
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Adding this edge forms a cycle in the graph.");
|
||||
}
|
||||
TEST_CASE("Test max threads constructor", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
REQUIRE(net.getMaxThreads() == 0.95f);
|
||||
auto net2 = bayesnet::Network(4);
|
||||
REQUIRE(net2.getMaxThreads() == 4);
|
||||
auto net3 = bayesnet::Network(1.75);
|
||||
REQUIRE(net3.getMaxThreads() == 1.75);
|
||||
}
|
||||
TEST_CASE("Edges troubles", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
@ -360,19 +366,22 @@ TEST_CASE("Edges troubles", "[Network]")
|
||||
REQUIRE_THROWS_WITH(net.addEdge("A", "C"), "Child node C does not exist");
|
||||
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Parent node C does not exist");
|
||||
net.addEdge("A", "B");
|
||||
REQUIRE_THROWS_AS(net.addEdge("A", "B"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("A", "B"), "Edge A -> B already exists");
|
||||
}
|
||||
TEST_CASE("Dump CPT", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto res = net.dump_cpt();
|
||||
std::string expected = R"(* class: (3) : [3]
|
||||
0.3333
|
||||
0.3333
|
||||
0.3333
|
||||
[ CPUFloatType{3} ]
|
||||
[ CPUDoubleType{3} ]
|
||||
* petallength: (4) : [4, 3, 3]
|
||||
(1,.,.) =
|
||||
0.9388 0.1000 0.2000
|
||||
@ -393,7 +402,7 @@ TEST_CASE("Dump CPT", "[Network]")
|
||||
0.0204 0.1000 0.2000
|
||||
0.1250 0.0526 0.1667
|
||||
0.2000 0.0606 0.8235
|
||||
[ CPUFloatType{4,3,3} ]
|
||||
[ CPUDoubleType{4,3,3} ]
|
||||
* petalwidth: (3) : [3, 6, 3]
|
||||
(1,.,.) =
|
||||
0.5000 0.0417 0.0714
|
||||
@ -418,12 +427,12 @@ TEST_CASE("Dump CPT", "[Network]")
|
||||
0.1111 0.0909 0.8000
|
||||
0.0667 0.2000 0.8667
|
||||
0.0303 0.2500 0.7500
|
||||
[ CPUFloatType{3,6,3} ]
|
||||
[ CPUDoubleType{3,6,3} ]
|
||||
* sepallength: (3) : [3, 3]
|
||||
0.8679 0.1321 0.0377
|
||||
0.0943 0.3019 0.0566
|
||||
0.0377 0.5660 0.9057
|
||||
[ CPUFloatType{3,3} ]
|
||||
[ CPUDoubleType{3,3} ]
|
||||
* sepalwidth: (6) : [6, 3, 3]
|
||||
(1,.,.) =
|
||||
0.0392 0.5000 0.2857
|
||||
@ -454,8 +463,136 @@ TEST_CASE("Dump CPT", "[Network]")
|
||||
0.5098 0.0833 0.1429
|
||||
0.5000 0.0476 0.1250
|
||||
0.2857 0.0571 0.1132
|
||||
[ CPUFloatType{6,3,3} ]
|
||||
[ CPUDoubleType{6,3,3} ]
|
||||
)";
|
||||
REQUIRE(res == expected);
|
||||
}
|
||||
|
||||
TEST_CASE("Test Smoothing A", "[Network]")
|
||||
{
|
||||
/*
|
||||
Tomando m = 1 Pa = 0.5
|
||||
Si estoy calculando P(A | C), con C en{ 0,1,2 } y tengo :
|
||||
AC = { 11, 12, 11, 10, 10, 12, 10, 01, 00, 02 }
|
||||
Entonces:
|
||||
P(A = 1 | C = 0) = (3 + 1 / 2 * 1) / (4 + 1) = 3.5 / 5
|
||||
P(A = 0 | C = 0) = (1 + 1 / 2 * 1) / (4 + 1) = 1.5 / 5
|
||||
Donde m aquí es el número de veces de C = 0 que es la que condiciona y la a priori vuelve a ser sobre A que es sobre las que estaríamos calculando esas marginales.
|
||||
P(A = 1 | C = 1) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 4
|
||||
P(A = 0 | C = 1) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 4
|
||||
P(A = 1 | C = 2) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 5
|
||||
P(A = 0 | C = 2) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 5
|
||||
En realidad es parecido a Laplace, que en este caso p.e.con C = 0 sería
|
||||
P(A = 1 | C = 0) = (3 + 1) / (4 + 2) = 4 / 6
|
||||
P(A = 0 | C = 0) = (1 + 1) / (4 + 2) = 2 / 6
|
||||
*/
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("C");
|
||||
net.addEdge("C", "A");
|
||||
std::vector<int> C = { 1, 2, 1, 0, 0, 2, 0, 1, 0, 2 };
|
||||
std::vector<std::vector<int>> A = { { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } };
|
||||
std::map<std::string, std::vector<int>> states = { { "A", {0, 1} }, { "C", {0, 1, 2} } };
|
||||
auto weights = std::vector<double>(C.size(), 1);
|
||||
//
|
||||
// Laplace
|
||||
//
|
||||
net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto cpt_c_laplace = net.getNodes().at("C")->getCPT();
|
||||
REQUIRE(cpt_c_laplace.size(0) == 3);
|
||||
auto laplace_c = std::vector<float>({ 0.3846, 0.3077, 0.3077 });
|
||||
for (int i = 0; i < laplace_c.size(); ++i) {
|
||||
REQUIRE(cpt_c_laplace.index({ i }).item<float>() == Catch::Approx(laplace_c[i]).margin(threshold));
|
||||
}
|
||||
auto cpt_a_laplace = net.getNodes().at("A")->getCPT();
|
||||
REQUIRE(cpt_a_laplace.size(0) == 2);
|
||||
REQUIRE(cpt_a_laplace.size(1) == 3);
|
||||
auto laplace_a = std::vector<std::vector<float>>({ {0.3333, 0.4000,0.4000}, {0.6667, 0.6000, 0.6000} });
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
REQUIRE(cpt_a_laplace.index({ i, j }).item<float>() == Catch::Approx(laplace_a[i][j]).margin(threshold));
|
||||
}
|
||||
}
|
||||
//
|
||||
// Cestnik
|
||||
//
|
||||
net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::CESTNIK);
|
||||
auto cpt_c_cestnik = net.getNodes().at("C")->getCPT();
|
||||
REQUIRE(cpt_c_cestnik.size(0) == 3);
|
||||
auto cestnik_c = std::vector<float>({ 0.3939, 0.3030, 0.3030 });
|
||||
for (int i = 0; i < laplace_c.size(); ++i) {
|
||||
REQUIRE(cpt_c_cestnik.index({ i }).item<float>() == Catch::Approx(cestnik_c[i]).margin(threshold));
|
||||
}
|
||||
auto cpt_a_cestnik = net.getNodes().at("A")->getCPT();
|
||||
REQUIRE(cpt_a_cestnik.size(0) == 2);
|
||||
REQUIRE(cpt_a_cestnik.size(1) == 3);
|
||||
auto cestnik_a = std::vector<std::vector<float>>({ {0.3000, 0.3750, 0.3750}, {0.7000, 0.6250, 0.6250} });
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
REQUIRE(cpt_a_cestnik.index({ i, j }).item<float>() == Catch::Approx(cestnik_a[i][j]).margin(threshold));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Test Smoothing B", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("X");
|
||||
net.addNode("Y");
|
||||
net.addNode("Z");
|
||||
net.addNode("C");
|
||||
net.addEdge("C", "X");
|
||||
net.addEdge("C", "Y");
|
||||
net.addEdge("C", "Z");
|
||||
net.addEdge("Y", "Z");
|
||||
std::vector<int> C = { 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1 };
|
||||
std::vector<std::vector<int>> Data = {
|
||||
{ 0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0},
|
||||
{ 1,2,0,2,2,2,1,0,0,1,1,1,0,1,2,1,0,2},
|
||||
{ 2,1,3,3,2,0,0,1,3,2,1,2,2,3,0,0,1,2}
|
||||
};
|
||||
std::map<std::string, std::vector<int>> states = {
|
||||
{ "X", {0, 1} },
|
||||
{ "Y", {0, 1, 2} },
|
||||
{ "Z", {0, 1, 2, 3} },
|
||||
{ "C", {0, 1} }
|
||||
};
|
||||
auto weights = std::vector<double>(C.size(), 1);
|
||||
// See https://www.overleaf.com/read/tfnhpfysfkfx#2d576c example for calculations
|
||||
INFO("Test Smoothing B - Laplace");
|
||||
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto laplace_values = std::vector<std::vector<float>>({ {0.377418, 0.622582}, {0.217821, 0.782179} });
|
||||
auto laplace_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
|
||||
for (auto i = 0; i < 2; ++i) {
|
||||
for (auto j = 0; j < 2; ++j) {
|
||||
REQUIRE(laplace_score.at(i).at(j) == Catch::Approx(laplace_values.at(i).at(j)).margin(threshold));
|
||||
}
|
||||
}
|
||||
INFO("Test Smoothing B - Original");
|
||||
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::ORIGINAL);
|
||||
auto original_values = std::vector<std::vector<float>>({ {0.344769, 0.655231}, {0.0421263, 0.957874} });
|
||||
auto original_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
|
||||
for (auto i = 0; i < 2; ++i) {
|
||||
for (auto j = 0; j < 2; ++j) {
|
||||
REQUIRE(original_score.at(i).at(j) == Catch::Approx(original_values.at(i).at(j)).margin(threshold));
|
||||
}
|
||||
}
|
||||
INFO("Test Smoothing B - Cestnik");
|
||||
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::CESTNIK);
|
||||
auto cestnik_values = std::vector<std::vector<float>>({ {0.353422, 0.646578}, {0.12364, 0.87636} });
|
||||
auto cestnik_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
|
||||
for (auto i = 0; i < 2; ++i) {
|
||||
for (auto j = 0; j < 2; ++j) {
|
||||
REQUIRE(cestnik_score.at(i).at(j) == Catch::Approx(cestnik_values.at(i).at(j)).margin(threshold));
|
||||
}
|
||||
}
|
||||
INFO("Test Smoothing B - No smoothing");
|
||||
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::NONE);
|
||||
auto nosmooth_values = std::vector<std::vector<float>>({ {0.342465753, 0.65753424}, {0.0, 1.0} });
|
||||
auto nosmooth_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
|
||||
for (auto i = 0; i < 2; ++i) {
|
||||
for (auto j = 0; j < 2; ++j) {
|
||||
REQUIRE(nosmooth_score.at(i).at(j) == Catch::Approx(nosmooth_values.at(i).at(j)).margin(threshold));
|
||||
}
|
||||
}
|
||||
}
|
@ -62,15 +62,17 @@ TEST_CASE("Test Node computeCPT", "[Node]")
|
||||
// Create a vector with the names of the classes
|
||||
auto className = std::string("Class");
|
||||
// weights
|
||||
auto weights = torch::tensor({ 1.0, 1.0, 1.0, 1.0 });
|
||||
auto weights = torch::tensor({ 1.0, 1.0, 1.0, 1.0 }, torch::kDouble);
|
||||
std::vector<bayesnet::Node> nodes;
|
||||
for (int i = 0; i < features.size(); i++) {
|
||||
auto node = bayesnet::Node(features[i]);
|
||||
node.setNumStates(states[i]);
|
||||
nodes.push_back(node);
|
||||
}
|
||||
// Create node class with 2 states
|
||||
nodes.push_back(bayesnet::Node(className));
|
||||
nodes[features.size()].setNumStates(2);
|
||||
// The network is c->f1, f2, f3 y f1->f2, f3
|
||||
for (int i = 0; i < features.size(); i++) {
|
||||
// Add class node as parent of all feature nodes
|
||||
nodes[i].addParent(&nodes[features.size()]);
|
||||
|
@ -17,7 +17,7 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 342);
|
||||
REQUIRE(clf.getNumberOfEdges() == 684);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
@ -27,189 +27,192 @@ TEST_CASE("Build basic model", "[BoostA2DE]")
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.919271).epsilon(raw.epsilon));
|
||||
}
|
||||
// TEST_CASE("Feature_select IWSS", "[BoostAODE]")
|
||||
// {
|
||||
// auto raw = RawDatasets("glass", true);
|
||||
// auto clf = bayesnet::BoostAODE();
|
||||
// clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
// REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
// REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
// REQUIRE(clf.getNotes().size() == 2);
|
||||
// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
||||
// REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
// }
|
||||
// TEST_CASE("Feature_select FCBF", "[BoostAODE]")
|
||||
// {
|
||||
// auto raw = RawDatasets("glass", true);
|
||||
// auto clf = bayesnet::BoostAODE();
|
||||
// clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
// REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
// REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
// REQUIRE(clf.getNotes().size() == 2);
|
||||
// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
|
||||
// REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
// }
|
||||
// TEST_CASE("Test used features in train note and score", "[BoostAODE]")
|
||||
// {
|
||||
// auto raw = RawDatasets("diabetes", true);
|
||||
// auto clf = bayesnet::BoostAODE(true);
|
||||
// clf.setHyperparameters({
|
||||
// {"order", "asc"},
|
||||
// {"convergence", true},
|
||||
// {"select_features","CFS"},
|
||||
// });
|
||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
// REQUIRE(clf.getNumberOfNodes() == 72);
|
||||
// REQUIRE(clf.getNumberOfEdges() == 120);
|
||||
// REQUIRE(clf.getNotes().size() == 2);
|
||||
// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||
// REQUIRE(clf.getNotes()[1] == "Number of models: 8");
|
||||
// auto score = clf.score(raw.Xv, raw.yv);
|
||||
// auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
// REQUIRE(score == Catch::Approx(0.809895813).epsilon(raw.epsilon));
|
||||
// REQUIRE(scoret == Catch::Approx(0.809895813).epsilon(raw.epsilon));
|
||||
// }
|
||||
// TEST_CASE("Voting vs proba", "[BoostAODE]")
|
||||
// {
|
||||
// auto raw = RawDatasets("iris", true);
|
||||
// auto clf = bayesnet::BoostAODE(false);
|
||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
// auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
// auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
// clf.setHyperparameters({
|
||||
// {"predict_voting",true},
|
||||
// });
|
||||
// auto score_voting = clf.score(raw.Xv, raw.yv);
|
||||
// auto pred_voting = clf.predict_proba(raw.Xv);
|
||||
// REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
|
||||
// REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||
// REQUIRE(pred_voting[83][2] == Catch::Approx(1.0).epsilon(raw.epsilon));
|
||||
// REQUIRE(pred_proba[83][2] == Catch::Approx(0.86121525).epsilon(raw.epsilon));
|
||||
// REQUIRE(clf.dump_cpt() == "");
|
||||
// REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
// }
|
||||
// TEST_CASE("Order asc, desc & random", "[BoostAODE]")
|
||||
// {
|
||||
// auto raw = RawDatasets("glass", true);
|
||||
// std::map<std::string, double> scores{
|
||||
// {"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
|
||||
// };
|
||||
// for (const std::string& order : { "asc", "desc", "rand" }) {
|
||||
// auto clf = bayesnet::BoostAODE();
|
||||
// clf.setHyperparameters({
|
||||
// {"order", order},
|
||||
// {"bisection", false},
|
||||
// {"maxTolerance", 1},
|
||||
// {"convergence", false},
|
||||
// });
|
||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
// auto score = clf.score(raw.Xv, raw.yv);
|
||||
// auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
// INFO("BoostAODE order: " + order);
|
||||
// REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
// REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
// }
|
||||
// }
|
||||
// TEST_CASE("Oddities", "[BoostAODE]")
|
||||
// {
|
||||
// auto clf = bayesnet::BoostAODE();
|
||||
// auto raw = RawDatasets("iris", true);
|
||||
// auto bad_hyper = nlohmann::json{
|
||||
// { { "order", "duck" } },
|
||||
// { { "select_features", "duck" } },
|
||||
// { { "maxTolerance", 0 } },
|
||||
// { { "maxTolerance", 5 } },
|
||||
// };
|
||||
// for (const auto& hyper : bad_hyper.items()) {
|
||||
// INFO("BoostAODE hyper: " + hyper.value().dump());
|
||||
// REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
// }
|
||||
// REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0 } }), std::invalid_argument);
|
||||
// auto bad_hyper_fit = nlohmann::json{
|
||||
// { { "select_features","IWSS" }, { "threshold", -0.01 } },
|
||||
// { { "select_features","IWSS" }, { "threshold", 0.51 } },
|
||||
// { { "select_features","FCBF" }, { "threshold", 1e-8 } },
|
||||
// { { "select_features","FCBF" }, { "threshold", 1.01 } },
|
||||
// };
|
||||
// for (const auto& hyper : bad_hyper_fit.items()) {
|
||||
// INFO("BoostAODE hyper: " + hyper.value().dump());
|
||||
// clf.setHyperparameters(hyper.value());
|
||||
// REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::invalid_argument);
|
||||
// }
|
||||
// }
|
||||
|
||||
// TEST_CASE("Bisection Best", "[BoostAODE]")
|
||||
// {
|
||||
// auto clf = bayesnet::BoostAODE();
|
||||
// auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
|
||||
// clf.setHyperparameters({
|
||||
// {"bisection", true},
|
||||
// {"maxTolerance", 3},
|
||||
// {"convergence", true},
|
||||
// {"block_update", false},
|
||||
// {"convergence_best", false},
|
||||
// });
|
||||
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
|
||||
// REQUIRE(clf.getNumberOfNodes() == 210);
|
||||
// REQUIRE(clf.getNumberOfEdges() == 378);
|
||||
// REQUIRE(clf.getNotes().size() == 1);
|
||||
// REQUIRE(clf.getNotes().at(0) == "Number of models: 14");
|
||||
// auto score = clf.score(raw.X_test, raw.y_test);
|
||||
// auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
// REQUIRE(score == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
|
||||
// REQUIRE(scoret == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
|
||||
// }
|
||||
// TEST_CASE("Bisection Best vs Last", "[BoostAODE]")
|
||||
// {
|
||||
// auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
|
||||
// auto clf = bayesnet::BoostAODE(true);
|
||||
// auto hyperparameters = nlohmann::json{
|
||||
// {"bisection", true},
|
||||
// {"maxTolerance", 3},
|
||||
// {"convergence", true},
|
||||
// {"convergence_best", true},
|
||||
// };
|
||||
// clf.setHyperparameters(hyperparameters);
|
||||
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
|
||||
// auto score_best = clf.score(raw.X_test, raw.y_test);
|
||||
// REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon));
|
||||
// // Now we will set the hyperparameter to use the last accuracy
|
||||
// hyperparameters["convergence_best"] = false;
|
||||
// clf.setHyperparameters(hyperparameters);
|
||||
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
|
||||
// auto score_last = clf.score(raw.X_test, raw.y_test);
|
||||
// REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
|
||||
// }
|
||||
|
||||
// TEST_CASE("Block Update", "[BoostAODE]")
|
||||
// {
|
||||
// auto clf = bayesnet::BoostAODE();
|
||||
// auto raw = RawDatasets("mfeat-factors", true, 500);
|
||||
// clf.setHyperparameters({
|
||||
// {"bisection", true},
|
||||
// {"block_update", true},
|
||||
// {"maxTolerance", 3},
|
||||
// {"convergence", true},
|
||||
// });
|
||||
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
|
||||
// REQUIRE(clf.getNumberOfNodes() == 868);
|
||||
// REQUIRE(clf.getNumberOfEdges() == 1724);
|
||||
// REQUIRE(clf.getNotes().size() == 3);
|
||||
// REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
// REQUIRE(clf.getNotes()[1] == "Used features in train: 19 of 216");
|
||||
// REQUIRE(clf.getNotes()[2] == "Number of models: 4");
|
||||
// auto score = clf.score(raw.X_test, raw.y_test);
|
||||
// auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
// REQUIRE(score == Catch::Approx(0.99f).epsilon(raw.epsilon));
|
||||
// REQUIRE(scoret == Catch::Approx(0.99f).epsilon(raw.epsilon));
|
||||
// //
|
||||
// // std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;
|
||||
// // std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;
|
||||
// // std::cout << "Notes size " << clf.getNotes().size() << std::endl;
|
||||
// // for (auto note : clf.getNotes()) {
|
||||
// // std::cout << note << std::endl;
|
||||
// // }
|
||||
// // std::cout << "Score " << score << std::endl;
|
||||
// }
|
||||
TEST_CASE("Feature_select IWSS", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 140);
|
||||
REQUIRE(clf.getNumberOfEdges() == 294);
|
||||
REQUIRE(clf.getNotes().size() == 4);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
||||
REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2");
|
||||
REQUIRE(clf.getNotes()[3] == "Number of models: 14");
|
||||
}
|
||||
TEST_CASE("Feature_select FCBF", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 110);
|
||||
REQUIRE(clf.getNumberOfEdges() == 231);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
|
||||
REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2");
|
||||
REQUIRE(clf.getNotes()[3] == "Number of models: 11");
|
||||
}
|
||||
TEST_CASE("Test used features in train note and score", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::BoostA2DE(true);
|
||||
clf.setHyperparameters({
|
||||
{"order", "asc"},
|
||||
{"convergence", true},
|
||||
{"select_features","CFS"},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 144);
|
||||
REQUIRE(clf.getNumberOfEdges() == 288);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 16");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.856771).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.856771).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Voting vs proba", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostA2DE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
clf.setHyperparameters({
|
||||
{"predict_voting",true},
|
||||
});
|
||||
auto score_voting = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_voting = clf.predict_proba(raw.Xv);
|
||||
REQUIRE(score_proba == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||
REQUIRE(score_voting == Catch::Approx(0.946667).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[83][2] == Catch::Approx(0.53508).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.48394).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.dump_cpt() == "");
|
||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
}
|
||||
TEST_CASE("Order asc, desc & random", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<std::string, double> scores{
|
||||
{"asc", 0.752336f }, { "desc", 0.813084f }, { "rand", 0.850467 }
|
||||
};
|
||||
for (const std::string& order : { "asc", "desc", "rand" }) {
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.setHyperparameters({
|
||||
{"order", order},
|
||||
{"bisection", false},
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("BoostA2DE order: " + order);
|
||||
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
TEST_CASE("Oddities2", "[BoostA2DE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto bad_hyper = nlohmann::json{
|
||||
{ { "order", "duck" } },
|
||||
{ { "select_features", "duck" } },
|
||||
{ { "maxTolerance", 0 } },
|
||||
{ { "maxTolerance", 5 } },
|
||||
};
|
||||
for (const auto& hyper : bad_hyper.items()) {
|
||||
INFO("BoostA2DE hyper: " + hyper.value().dump());
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
}
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0 } }), std::invalid_argument);
|
||||
auto bad_hyper_fit = nlohmann::json{
|
||||
{ { "select_features","IWSS" }, { "threshold", -0.01 } },
|
||||
{ { "select_features","IWSS" }, { "threshold", 0.51 } },
|
||||
{ { "select_features","FCBF" }, { "threshold", 1e-8 } },
|
||||
{ { "select_features","FCBF" }, { "threshold", 1.01 } },
|
||||
};
|
||||
for (const auto& hyper : bad_hyper_fit.items()) {
|
||||
INFO("BoostA2DE hyper: " + hyper.value().dump());
|
||||
clf.setHyperparameters(hyper.value());
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
}
|
||||
}
|
||||
TEST_CASE("No features selected", "[BoostA2DE]")
|
||||
{
|
||||
// Check that the note "No features selected in initialization" is added
|
||||
//
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.setHyperparameters({ {"select_features","FCBF"}, {"threshold", 1 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNotes().size() == 1);
|
||||
REQUIRE(clf.getNotes()[0] == "No features selected in initialization");
|
||||
}
|
||||
TEST_CASE("Bisection Best", "[BoostA2DE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"block_update", false},
|
||||
{"convergence_best", false},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 480);
|
||||
REQUIRE(clf.getNumberOfEdges() == 1152);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes().at(0) == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes().at(1) == "Pairs not used in train: 83");
|
||||
REQUIRE(clf.getNotes().at(2) == "Number of models: 32");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(0.966667f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.966667f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Block Update", "[BoostA2DE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
auto raw = RawDatasets("spambase", true, 500);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"block_update", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 58);
|
||||
REQUIRE(clf.getNumberOfEdges() == 165);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Pairs not used in train: 1588");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
//
|
||||
// std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;
|
||||
// std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;
|
||||
// std::cout << "Notes size " << clf.getNotes().size() << std::endl;
|
||||
// for (auto note : clf.getNotes()) {
|
||||
// std::cout << note << std::endl;
|
||||
// }
|
||||
// std::cout << "Score " << score << std::endl;
|
||||
}
|
||||
TEST_CASE("Test graph b2a2de", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = clf.graph();
|
||||
REQUIRE(graph.size() == 26);
|
||||
REQUIRE(graph[0] == "digraph BayesNet {\nlabel=<BayesNet BoostA2DE_0>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
|
||||
REQUIRE(graph[1] == "\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n");
|
||||
}
|
@ -18,7 +18,7 @@ TEST_CASE("Feature_select CFS", "[BoostAODE]")
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "CFS"} });
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
@ -30,7 +30,7 @@ TEST_CASE("Feature_select IWSS", "[BoostAODE]")
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
@ -42,7 +42,7 @@ TEST_CASE("Feature_select FCBF", "[BoostAODE]")
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
@ -58,7 +58,7 @@ TEST_CASE("Test used features in train note and score", "[BoostAODE]")
|
||||
{"convergence", true},
|
||||
{"select_features","CFS"},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 72);
|
||||
REQUIRE(clf.getNumberOfEdges() == 120);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
@ -73,7 +73,7 @@ TEST_CASE("Voting vs proba", "[BoostAODE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostAODE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
clf.setHyperparameters({
|
||||
@ -102,7 +102,7 @@ TEST_CASE("Order asc, desc & random", "[BoostAODE]")
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("BoostAODE order: " << order);
|
||||
@ -134,7 +134,7 @@ TEST_CASE("Oddities", "[BoostAODE]")
|
||||
for (const auto& hyper : bad_hyper_fit.items()) {
|
||||
INFO("BoostAODE hyper: " << hyper.value().dump());
|
||||
clf.setHyperparameters(hyper.value());
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states), std::invalid_argument);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,7 +149,7 @@ TEST_CASE("Bisection Best", "[BoostAODE]")
|
||||
{"block_update", false},
|
||||
{"convergence_best", false},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 210);
|
||||
REQUIRE(clf.getNumberOfEdges() == 378);
|
||||
REQUIRE(clf.getNotes().size() == 1);
|
||||
@ -170,13 +170,13 @@ TEST_CASE("Bisection Best vs Last", "[BoostAODE]")
|
||||
{"convergence_best", true},
|
||||
};
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_best = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon));
|
||||
// Now we will set the hyperparameter to use the last accuracy
|
||||
hyperparameters["convergence_best"] = false;
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_last = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
|
||||
}
|
||||
@ -191,7 +191,7 @@ TEST_CASE("Block Update", "[BoostAODE]")
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 868);
|
||||
REQUIRE(clf.getNumberOfEdges() == 1724);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
|
72
tests/TestMST.cc
Normal file
72
tests/TestMST.cc
Normal file
@ -0,0 +1,72 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/utils/Mst.h"
|
||||
|
||||
|
||||
TEST_CASE("MST::insertElement tests", "[MST]")
|
||||
{
|
||||
bayesnet::MST mst({}, torch::tensor({}), 0);
|
||||
SECTION("Insert into an empty list")
|
||||
{
|
||||
std::list<int> variables;
|
||||
mst.insertElement(variables, 5);
|
||||
REQUIRE(variables == std::list<int>{5});
|
||||
}
|
||||
SECTION("Insert a non-duplicate element")
|
||||
{
|
||||
std::list<int> variables = { 1, 2, 3 };
|
||||
mst.insertElement(variables, 4);
|
||||
REQUIRE(variables == std::list<int>{4, 1, 2, 3});
|
||||
}
|
||||
SECTION("Insert a duplicate element")
|
||||
{
|
||||
std::list<int> variables = { 1, 2, 3 };
|
||||
mst.insertElement(variables, 2);
|
||||
REQUIRE(variables == std::list<int>{1, 2, 3});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("MST::reorder tests", "[MST]")
|
||||
{
|
||||
bayesnet::MST mst({}, torch::tensor({}), 0);
|
||||
SECTION("Reorder simple graph")
|
||||
{
|
||||
std::vector<std::pair<float, std::pair<int, int>>> T = { {2.0, {1, 2}}, {1.0, {0, 1}} };
|
||||
auto result = mst.reorder(T, 0);
|
||||
REQUIRE(result == std::vector<std::pair<int, int>>{{0, 1}, { 1, 2 }});
|
||||
}
|
||||
SECTION("Reorder with disconnected graph")
|
||||
{
|
||||
std::vector<std::pair<float, std::pair<int, int>>> T = { {2.0, {2, 3}}, {1.0, {0, 1}} };
|
||||
auto result = mst.reorder(T, 0);
|
||||
REQUIRE(result == std::vector<std::pair<int, int>>{{0, 1}, { 2, 3 }});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("MST::maximumSpanningTree tests", "[MST]")
|
||||
{
|
||||
std::vector<std::string> features = { "A", "B", "C" };
|
||||
auto weights = torch::tensor({
|
||||
{0.0, 1.0, 2.0},
|
||||
{1.0, 0.0, 3.0},
|
||||
{2.0, 3.0, 0.0}
|
||||
});
|
||||
bayesnet::MST mst(features, weights, 0);
|
||||
|
||||
SECTION("MST of a complete graph")
|
||||
{
|
||||
auto result = mst.maximumSpanningTree();
|
||||
REQUIRE(result.size() == 2); // Un MST para 3 nodos tiene 2 aristas
|
||||
}
|
||||
}
|
@ -16,10 +16,10 @@
|
||||
#include "TestUtils.h"
|
||||
|
||||
std::map<std::string, std::string> modules = {
|
||||
{ "mdlp", "1.1.2" },
|
||||
{ "mdlp", "2.0.1" },
|
||||
{ "Folding", "1.1.0" },
|
||||
{ "json", "3.11" },
|
||||
{ "ArffFiles", "1.0.0" }
|
||||
{ "ArffFiles", "1.1.0" }
|
||||
};
|
||||
|
||||
TEST_CASE("MDLP", "[Modules]")
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <ArffFiles.hpp>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <folding.hpp>
|
||||
#include <bayesnet/network/Network.h>
|
||||
|
||||
|
||||
class RawDatasets {
|
||||
@ -32,6 +33,7 @@ public:
|
||||
bool discretize;
|
||||
int num_samples = 0;
|
||||
bool shuffle = false;
|
||||
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::ORIGINAL;
|
||||
private:
|
||||
std::string to_string()
|
||||
{
|
||||
|
4811
tests/data/spambase.arff
Executable file
4811
tests/data/spambase.arff
Executable file
File diff suppressed because it is too large
Load Diff
@ -1 +1 @@
|
||||
Subproject commit 40ac38011a2445e00df8a18048c67abaff16fa59
|
||||
Subproject commit a4329f5f9dfdb18ee3faa63bd5b665f2f253b8d2
|
@ -1 +1 @@
|
||||
Subproject commit 4e8d92bf02f7d1c8006a0e7a5ecabd8e62d98502
|
||||
Subproject commit 506276c59217429c93abd2fe9507c7f45eb81072
|
Loading…
Reference in New Issue
Block a user