Compare commits
38 Commits
alphablock
...
actions
Author | SHA1 | Date | |
---|---|---|---|
da05de6e15
|
|||
b0c2ff1aa8
|
|||
306d3a4b55
|
|||
bf08b0de89
|
|||
b976db53c6
|
|||
be39d2dedb
|
|||
4ca770d16b
|
|||
6bf3b939bc
|
|||
7076efc2a1 | |||
9ee388561f
|
|||
70c7d3dd3d
|
|||
400967b4e3
|
|||
c234308701
|
|||
4ded6f51eb
|
|||
b1d317d8f4
|
|||
7876d1a370
|
|||
3bdb14bd65
|
|||
71b05cc1a7
|
|||
a59689272d
|
|||
3d8be79b37
|
|||
619276a5ea
|
|||
e681099360
|
|||
5919fbfd34
|
|||
a26522e62f
|
|||
86cccb6c7b
|
|||
d1b235261e
|
|||
7a8e0391dc
|
|||
6cfbc482d8
|
|||
ca54f799ee
|
|||
06621ea361
|
|||
a70ac3e883
|
|||
b987dcbcc4
|
|||
81fd7df7f0
|
|||
dd98cf159d
|
|||
f658149977
|
|||
fb957ac3fe
|
|||
b90e558238
|
|||
64970cf7f7 |
10
.clang-format
Normal file
10
.clang-format
Normal file
@@ -0,0 +1,10 @@
|
||||
# .clang-format
|
||||
---
|
||||
BasedOnStyle: LLVM
|
||||
AccessModifierOffset: -4
|
||||
BreakBeforeBraces: Linux
|
||||
ColumnLimit: 0
|
||||
FixNamespaceComments: false
|
||||
IndentWidth: 4
|
||||
NamespaceIndentation: All
|
||||
TabWidth: 4
|
@@ -35,7 +35,8 @@ RUN rm -fr lcov-2.1
|
||||
|
||||
# Install Miniconda
|
||||
RUN mkdir -p /opt/conda
|
||||
RUN wget --quiet "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh" -O /opt/conda/miniconda.sh && \
|
||||
# RUN wget --quiet "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh" -O /opt/conda/miniconda.sh && \
|
||||
RUN wget --quiet "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" -O /opt/conda/miniconda.sh && \
|
||||
bash /opt/conda/miniconda.sh -b -p /opt/miniconda
|
||||
|
||||
# Add conda to PATH
|
||||
|
19
.gitea/workflows/action.yaml
Normal file
19
.gitea/workflows/action.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
name: Gitea Actions Demo
|
||||
run-name: ${{ github.actor }} is testing out Gitea Actions 🚀
|
||||
on: [push]
|
||||
jobs:
|
||||
Explore-GitHub-Actions:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
|
||||
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
|
||||
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
|
||||
- run: echo "🖥️ The workflow is now ready to test your code on the runner."
|
||||
- name: List files in the repository
|
||||
run: |
|
||||
ls ${{ github.workspace }}
|
||||
- run: echo "🍏 This job's status is ${{ job.status }}."
|
||||
|
19
.gitea/workflows/testing.yaml
Normal file
19
.gitea/workflows/testing.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
name: Gitea Actions Demo
|
||||
run-name: ${{ github.actor }} is testing out Gitea Actions 🚀
|
||||
on: [push]
|
||||
jobs:
|
||||
Explore-GitHub-Actions:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
|
||||
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
|
||||
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
|
||||
- run: echo "🖥️ The workflow is now ready to test your code on the runner."
|
||||
- name: List files in the repository
|
||||
run: |
|
||||
ls ${{ github.workspace }}
|
||||
- run: echo "🍏 This job's status is ${{ job.status }}."
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -44,4 +44,5 @@ docs/manual
|
||||
docs/man3
|
||||
docs/man
|
||||
docs/Doxyfile
|
||||
.cache
|
||||
|
||||
|
4
.vscode/launch.json
vendored
4
.vscode/launch.json
vendored
@@ -5,7 +5,7 @@
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "sample",
|
||||
"program": "${workspaceFolder}/build_release/sample/bayesnet_sample",
|
||||
"program": "${workspaceFolder}/sample/build/bayesnet_sample",
|
||||
"args": [
|
||||
"${workspaceFolder}/tests/data/glass.arff"
|
||||
]
|
||||
@@ -16,7 +16,7 @@
|
||||
"name": "test",
|
||||
"program": "${workspaceFolder}/build_Debug/tests/TestBayesNet",
|
||||
"args": [
|
||||
"No features selected"
|
||||
"[XBAODE]"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build_Debug/tests"
|
||||
},
|
||||
|
25
CHANGELOG.md
25
CHANGELOG.md
@@ -7,11 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Internal
|
||||
|
||||
- Add changes to .clang-format to ajust to vscode format style thanks to https://clang-format-configurator.site/
|
||||
|
||||
## [1.0.7] 2025-03-16
|
||||
|
||||
### Added
|
||||
|
||||
- Add a new hyperparameter to the BoostAODE class, *alphablock*, to control the way α is computed, with the last model or with the ensmble built so far. Default value is *false*.
|
||||
- Add a new hyperparameter to the SPODE class, *parent*, to set the root node of the model. If no value is set the root parameter of the constructor is used.
|
||||
- Add a new hyperparameter to the TAN class, *parent*, to set the root node of the model. If not set the first feature is used as root.
|
||||
- A new hyperparameter to the BoostAODE class, *alphablock*, to control the way α is computed, with the last model or with the ensmble built so far. Default value is *false*.
|
||||
- A new hyperparameter to the SPODE class, *parent*, to set the root node of the model. If no value is set the root parameter of the constructor is used.
|
||||
- A new hyperparameter to the TAN class, *parent*, to set the root node of the model. If not set the first feature is used as root.
|
||||
- A new model named XSPODE, an optimized for speed averaged one dependence estimator.
|
||||
- A new model named XSP2DE, an optimized for speed averaged two dependence estimator.
|
||||
- A new model named XBAODE, an optimized for speed BoostAODE model.
|
||||
- A new model named XBA2DE, an optimized for speed BoostA2DE model.
|
||||
|
||||
### Internal
|
||||
|
||||
- Optimize ComputeCPT method in the Node class.
|
||||
- Add methods getCount and getMaxCount to the CountingSemaphore class, returning the current count and the maximum count of threads respectively.
|
||||
|
||||
### Changed
|
||||
|
||||
- Hyperparameter *maxTolerance* in the BoostAODE class is now in [1, 6] range (it was in [1, 4] range before).
|
||||
|
||||
## [1.0.6] 2024-11-23
|
||||
|
||||
|
@@ -1,22 +1,18 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
cmake_minimum_required(VERSION 3.24)
|
||||
|
||||
project(BayesNet
|
||||
VERSION 1.0.6
|
||||
VERSION 1.0.7
|
||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||
LANGUAGES CXX
|
||||
)
|
||||
|
||||
if (CODE_COVERAGE AND NOT ENABLE_TESTING)
|
||||
MESSAGE(FATAL_ERROR "Code coverage requires testing enabled")
|
||||
message(FATAL_ERROR "Code coverage requires testing enabled")
|
||||
endif (CODE_COVERAGE AND NOT ENABLE_TESTING)
|
||||
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
if (POLICY CMP0135)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
endif ()
|
||||
|
||||
# Global CMake variables
|
||||
# ----------------------
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
@@ -24,7 +20,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
@@ -44,19 +40,28 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_P
|
||||
include(AddGitSubmodule)
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
MESSAGE("Debug mode")
|
||||
message(STATUS "* Debug mode")
|
||||
enable_testing()
|
||||
set(ENABLE_TESTING ON)
|
||||
set(CODE_COVERAGE ON)
|
||||
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
|
||||
# Testing
|
||||
# -------
|
||||
if (ENABLE_TESTING)
|
||||
message(STATUS "Testing enabled")
|
||||
add_subdirectory(tests/lib/catch2)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
endif (ENABLE_TESTING)
|
||||
if (CODE_COVERAGE)
|
||||
include(CodeCoverage)
|
||||
message(STATUS "Code coverage enabled")
|
||||
set(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
|
||||
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
message(STATUS "Languages=${LANGUAGES}")
|
||||
if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE(STATUS "Code coverage enabled")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
|
||||
if (ENABLE_CLANG_TIDY)
|
||||
include(StaticAnalyzers) # clang-tidy
|
||||
@@ -74,14 +79,7 @@ add_git_submodule("lib/mdlp")
|
||||
add_subdirectory(config)
|
||||
add_subdirectory(bayesnet)
|
||||
|
||||
# Testing
|
||||
# -------
|
||||
if (ENABLE_TESTING)
|
||||
MESSAGE(STATUS "Testing enabled")
|
||||
add_subdirectory(tests/lib/catch2)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
endif (ENABLE_TESTING)
|
||||
|
||||
|
||||
# Installation
|
||||
# ------------
|
||||
@@ -100,9 +98,7 @@ if (Doxygen_FOUND)
|
||||
set(doxyfile_in ${DOC_DIR}/Doxyfile.in)
|
||||
set(doxyfile ${DOC_DIR}/Doxyfile)
|
||||
configure_file(${doxyfile_in} ${doxyfile} @ONLY)
|
||||
doxygen_add_docs(doxygen
|
||||
WORKING_DIRECTORY ${DOC_DIR}
|
||||
CONFIG_FILE ${doxyfile})
|
||||
doxygen_add_docs(doxygen WORKING_DIRECTORY ${DOC_DIR} CONFIG_FILE ${doxyfile})
|
||||
else (Doxygen_FOUND)
|
||||
MESSAGE("* Doxygen not found")
|
||||
message(WARNING "* Doxygen not found")
|
||||
endif (Doxygen_FOUND)
|
||||
|
10
Makefile
10
Makefile
@@ -97,10 +97,18 @@ fname = "tests/data/iris.arff"
|
||||
sample: ## Build sample
|
||||
@echo ">>> Building Sample...";
|
||||
@if [ -d ./sample/build ]; then rm -rf ./sample/build; fi
|
||||
@cd sample && cmake -B build -S . && cmake --build build -t bayesnet_sample
|
||||
@cd sample && cmake -B build -S . -D CMAKE_BUILD_TYPE=Debug && cmake --build build -t bayesnet_sample
|
||||
sample/build/bayesnet_sample $(fname)
|
||||
@echo ">>> Done";
|
||||
|
||||
fname = "tests/data/iris.arff"
|
||||
sample2: ## Build sample2
|
||||
@echo ">>> Building Sample...";
|
||||
@if [ -d ./sample/build ]; then rm -rf ./sample/build; fi
|
||||
@cd sample && cmake -B build -S . -D CMAKE_BUILD_TYPE=Debug && cmake --build build -t bayesnet_sample_xspode
|
||||
sample/build/bayesnet_sample_xspode $(fname)
|
||||
@echo ">>> Done";
|
||||
|
||||
opt = ""
|
||||
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
|
||||
@echo ">>> Running BayesNet tests...";
|
||||
|
10
README.md
10
README.md
@@ -2,12 +2,12 @@
|
||||
|
||||

|
||||
[](<https://opensource.org/licenses/MIT>)
|
||||

|
||||

|
||||
[](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||

|
||||
[](html/index.html)
|
||||

|
||||
[](https://gitea.rmontanana.es/rmontanana/BayesNet)
|
||||
[](https://doi.org/10.5281/zenodo.14210344)
|
||||
|
||||
Bayesian Network Classifiers library
|
||||
@@ -76,8 +76,12 @@ make sample fname=tests/data/glass.arff
|
||||
|
||||
#### - [BoostAODE](docs/BoostAODE.md)
|
||||
|
||||
#### - XBAODE
|
||||
|
||||
#### - BoostA2DE
|
||||
|
||||
#### - XBA2DE
|
||||
|
||||
### With Local Discretization
|
||||
|
||||
#### - TANLd
|
||||
|
@@ -14,13 +14,13 @@ namespace bayesnet {
|
||||
enum status_t { NORMAL, WARNING, ERROR };
|
||||
class BaseClassifier {
|
||||
public:
|
||||
virtual ~BaseClassifier() = default;
|
||||
// X is nxm std::vector, y is nx1 std::vector
|
||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
// X is nxm tensor, y is nx1 tensor
|
||||
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||
virtual ~BaseClassifier() = default;
|
||||
torch::Tensor virtual predict(torch::Tensor& X) = 0;
|
||||
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
|
||||
torch::Tensor virtual predict_proba(torch::Tensor& X) = 0;
|
||||
@@ -43,5 +43,7 @@ namespace bayesnet {
|
||||
protected:
|
||||
virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||
std::vector<std::string> validHyperparameters;
|
||||
std::vector<std::string> notes; // Used to store messages occurred during the fit process
|
||||
status_t status = NORMAL;
|
||||
};
|
||||
}
|
@@ -1,4 +1,5 @@
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/lib/log
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
|
@@ -10,7 +10,6 @@
|
||||
|
||||
namespace bayesnet {
|
||||
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
|
||||
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
||||
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
this->features = features;
|
||||
|
@@ -46,12 +46,11 @@ namespace bayesnet {
|
||||
std::string className;
|
||||
std::map<std::string, std::vector<int>> states;
|
||||
torch::Tensor dataset; // (n+1)xm tensor
|
||||
status_t status = NORMAL;
|
||||
std::vector<std::string> notes; // Used to store messages occurred during the fit process
|
||||
void checkFitParameters();
|
||||
virtual void buildModel(const torch::Tensor& weights) = 0;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void buildDataset(torch::Tensor& y);
|
||||
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
||||
private:
|
||||
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
|
||||
};
|
||||
|
@@ -3,7 +3,7 @@
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "KDB.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -7,15 +7,14 @@
|
||||
#ifndef KDB_H
|
||||
#define KDB_H
|
||||
#include <torch/torch.h>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "Classifier.h"
|
||||
namespace bayesnet {
|
||||
class KDB : public Classifier {
|
||||
private:
|
||||
int k;
|
||||
float theta;
|
||||
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
|
||||
protected:
|
||||
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
public:
|
||||
explicit KDB(int k, float theta = 0.03);
|
||||
|
575
bayesnet/classifiers/XSP2DE.cc
Normal file
575
bayesnet/classifiers/XSP2DE.cc
Normal file
@@ -0,0 +1,575 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "XSP2DE.h"
|
||||
#include <pthread.h> // for pthread_setname_np on linux
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include "bayesnet/utils/TensorUtils.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
// --------------------------------------
|
||||
// Constructor
|
||||
// --------------------------------------
|
||||
XSp2de::XSp2de(int spIndex1, int spIndex2)
|
||||
: superParent1_{ spIndex1 }
|
||||
, superParent2_{ spIndex2 }
|
||||
, nFeatures_{0}
|
||||
, statesClass_{0}
|
||||
, alpha_{1.0}
|
||||
, initializer_{1.0}
|
||||
, semaphore_{ CountingSemaphore::getInstance() }
|
||||
, Classifier(Network())
|
||||
{
|
||||
validHyperparameters = { "parent1", "parent2" };
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// setHyperparameters
|
||||
// --------------------------------------
|
||||
void XSp2de::setHyperparameters(const nlohmann::json &hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("parent1")) {
|
||||
superParent1_ = hyperparameters["parent1"];
|
||||
hyperparameters.erase("parent1");
|
||||
}
|
||||
if (hyperparameters.contains("parent2")) {
|
||||
superParent2_ = hyperparameters["parent2"];
|
||||
hyperparameters.erase("parent2");
|
||||
}
|
||||
// Hand off anything else to base Classifier
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// fitx
|
||||
// --------------------------------------
|
||||
void XSp2de::fitx(torch::Tensor & X, torch::Tensor & y,
|
||||
torch::Tensor & weights_, const Smoothing_t smoothing)
|
||||
{
|
||||
m = X.size(1); // number of samples
|
||||
n = X.size(0); // number of features
|
||||
dataset = X;
|
||||
|
||||
// Build the dataset in your environment if needed:
|
||||
buildDataset(y);
|
||||
|
||||
// Construct the data structures needed for counting
|
||||
buildModel(weights_);
|
||||
|
||||
// Accumulate counts & convert to probabilities
|
||||
trainModel(weights_, smoothing);
|
||||
fitted = true;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// buildModel
|
||||
// --------------------------------------
|
||||
void XSp2de::buildModel(const torch::Tensor &weights)
|
||||
{
|
||||
nFeatures_ = n;
|
||||
|
||||
// Derive the number of states for each feature from the dataset
|
||||
// states_[f] = max value in dataset[f] + 1.
|
||||
states_.resize(nFeatures_);
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
// This is naive: we take max in feature f. You might adapt for real data.
|
||||
states_[f] = dataset[f].max().item<int>() + 1;
|
||||
}
|
||||
// Class states:
|
||||
statesClass_ = dataset[-1].max().item<int>() + 1;
|
||||
|
||||
// Initialize the class counts
|
||||
classCounts_.resize(statesClass_, 0.0);
|
||||
|
||||
// For sp1 -> p(sp1Val| c)
|
||||
sp1FeatureCounts_.resize(states_[superParent1_] * statesClass_, 0.0);
|
||||
|
||||
// For sp2 -> p(sp2Val| c)
|
||||
sp2FeatureCounts_.resize(states_[superParent2_] * statesClass_, 0.0);
|
||||
|
||||
// For child features, we store p(childVal | c, sp1Val, sp2Val).
|
||||
// childCounts_ will hold raw counts. We’ll gather them in one big vector.
|
||||
// We need an offset for each feature.
|
||||
childOffsets_.resize(nFeatures_, -1);
|
||||
|
||||
int totalSize = 0;
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent1_ || f == superParent2_) {
|
||||
// skip the superparents
|
||||
childOffsets_[f] = -1;
|
||||
continue;
|
||||
}
|
||||
childOffsets_[f] = totalSize;
|
||||
// block size for a single child f: states_[f] * statesClass_
|
||||
// * states_[superParent1_]
|
||||
// * states_[superParent2_].
|
||||
totalSize += (states_[f] * statesClass_
|
||||
* states_[superParent1_]
|
||||
* states_[superParent2_]);
|
||||
}
|
||||
childCounts_.resize(totalSize, 0.0);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// trainModel
|
||||
// --------------------------------------
|
||||
void XSp2de::trainModel(const torch::Tensor &weights,
|
||||
const bayesnet::Smoothing_t smoothing)
|
||||
{
|
||||
// Accumulate raw counts
|
||||
for (int i = 0; i < m; i++) {
|
||||
std::vector<int> instance(nFeatures_ + 1);
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
instance[f] = dataset[f][i].item<int>();
|
||||
}
|
||||
instance[nFeatures_] = dataset[-1][i].item<int>(); // class
|
||||
double w = weights[i].item<double>();
|
||||
addSample(instance, w);
|
||||
}
|
||||
|
||||
// Choose alpha based on smoothing:
|
||||
switch (smoothing) {
|
||||
case bayesnet::Smoothing_t::ORIGINAL:
|
||||
alpha_ = 1.0 / m;
|
||||
break;
|
||||
case bayesnet::Smoothing_t::LAPLACE:
|
||||
alpha_ = 1.0;
|
||||
break;
|
||||
default:
|
||||
alpha_ = 0.0; // no smoothing
|
||||
}
|
||||
|
||||
// Large initializer factor for numerical stability
|
||||
initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
|
||||
|
||||
// Convert raw counts to probabilities
|
||||
computeProbabilities();
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// addSample
|
||||
// --------------------------------------
|
||||
void XSp2de::addSample(const std::vector<int> &instance, double weight)
|
||||
{
|
||||
if (weight <= 0.0)
|
||||
return;
|
||||
|
||||
int c = instance.back();
|
||||
// increment classCounts
|
||||
classCounts_[c] += weight;
|
||||
|
||||
int sp1Val = instance[superParent1_];
|
||||
int sp2Val = instance[superParent2_];
|
||||
|
||||
// p(sp1|c)
|
||||
sp1FeatureCounts_[sp1Val * statesClass_ + c] += weight;
|
||||
|
||||
// p(sp2|c)
|
||||
sp2FeatureCounts_[sp2Val * statesClass_ + c] += weight;
|
||||
|
||||
// p(childVal| c, sp1Val, sp2Val)
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent1_ || f == superParent2_)
|
||||
continue;
|
||||
|
||||
int childVal = instance[f];
|
||||
int offset = childOffsets_[f];
|
||||
// block layout:
|
||||
// offset + (sp1Val*(states_[sp2_]* states_[f]* statesClass_))
|
||||
// + (sp2Val*(states_[f]* statesClass_))
|
||||
// + childVal*(statesClass_)
|
||||
// + c
|
||||
int blockSizeSp2 = states_[superParent2_]
|
||||
* states_[f]
|
||||
* statesClass_;
|
||||
int blockSizeChild = states_[f] * statesClass_;
|
||||
|
||||
int idx = offset
|
||||
+ sp1Val*blockSizeSp2
|
||||
+ sp2Val*blockSizeChild
|
||||
+ childVal*statesClass_
|
||||
+ c;
|
||||
childCounts_[idx] += weight;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// computeProbabilities
|
||||
// --------------------------------------
|
||||
void XSp2de::computeProbabilities()
|
||||
{
|
||||
double totalCount = std::accumulate(classCounts_.begin(),
|
||||
classCounts_.end(), 0.0);
|
||||
|
||||
// classPriors_
|
||||
classPriors_.resize(statesClass_, 0.0);
|
||||
if (totalCount <= 0.0) {
|
||||
// fallback => uniform
|
||||
double unif = 1.0 / static_cast<double>(statesClass_);
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
classPriors_[c] = unif;
|
||||
}
|
||||
} else {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
classPriors_[c] =
|
||||
(classCounts_[c] + alpha_)
|
||||
/ (totalCount + alpha_ * statesClass_);
|
||||
}
|
||||
}
|
||||
|
||||
// p(sp1Val| c)
|
||||
sp1FeatureProbs_.resize(sp1FeatureCounts_.size());
|
||||
int sp1Card = states_[superParent1_];
|
||||
for (int spVal = 0; spVal < sp1Card; spVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double denom = classCounts_[c] + alpha_ * sp1Card;
|
||||
double num = sp1FeatureCounts_[spVal * statesClass_ + c] + alpha_;
|
||||
sp1FeatureProbs_[spVal * statesClass_ + c] =
|
||||
(denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
|
||||
// p(sp2Val| c)
|
||||
sp2FeatureProbs_.resize(sp2FeatureCounts_.size());
|
||||
int sp2Card = states_[superParent2_];
|
||||
for (int spVal = 0; spVal < sp2Card; spVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double denom = classCounts_[c] + alpha_ * sp2Card;
|
||||
double num = sp2FeatureCounts_[spVal * statesClass_ + c] + alpha_;
|
||||
sp2FeatureProbs_[spVal * statesClass_ + c] =
|
||||
(denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
|
||||
// p(childVal| c, sp1Val, sp2Val)
|
||||
childProbs_.resize(childCounts_.size());
|
||||
int offset = 0;
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent1_ || f == superParent2_)
|
||||
continue;
|
||||
|
||||
int fCard = states_[f];
|
||||
int sp1Card_ = states_[superParent1_];
|
||||
int sp2Card_ = states_[superParent2_];
|
||||
int childBlockSizeSp2 = sp2Card_ * fCard * statesClass_;
|
||||
int childBlockSizeF = fCard * statesClass_;
|
||||
|
||||
int blockSize = fCard * sp1Card_ * sp2Card_ * statesClass_;
|
||||
for (int sp1Val = 0; sp1Val < sp1Card_; sp1Val++) {
|
||||
for (int sp2Val = 0; sp2Val < sp2Card_; sp2Val++) {
|
||||
for (int childVal = 0; childVal < fCard; childVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
// index in childCounts_
|
||||
int idx = offset
|
||||
+ sp1Val*childBlockSizeSp2
|
||||
+ sp2Val*childBlockSizeF
|
||||
+ childVal*statesClass_
|
||||
+ c;
|
||||
double num = childCounts_[idx] + alpha_;
|
||||
// denominator is the count of (sp1Val,sp2Val,c) plus alpha * fCard
|
||||
// We can find that by summing childVal dimension, but we already
|
||||
// have it in childCounts_[...] or we can re-check the superparent
|
||||
// counts if your approach is purely hierarchical.
|
||||
// Here we'll do it like the XSpode approach: sp1&sp2 are
|
||||
// conditionally independent given c, so denominators come from
|
||||
// summing the relevant block or we treat sp1,sp2 as "parents."
|
||||
// A simpler approach:
|
||||
double sumSp1Sp2C = 0.0;
|
||||
// sum over all childVal:
|
||||
for (int cv = 0; cv < fCard; cv++) {
|
||||
int idx2 = offset
|
||||
+ sp1Val*childBlockSizeSp2
|
||||
+ sp2Val*childBlockSizeF
|
||||
+ cv*statesClass_ + c;
|
||||
sumSp1Sp2C += childCounts_[idx2];
|
||||
}
|
||||
double denom = sumSp1Sp2C + alpha_ * fCard;
|
||||
childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
offset += blockSize;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict_proba (single instance)
|
||||
// --------------------------------------
|
||||
std::vector<double> XSp2de::predict_proba(const std::vector<int> &instance) const
|
||||
{
|
||||
if (!fitted) {
|
||||
throw std::logic_error(CLASSIFIER_NOT_FITTED);
|
||||
}
|
||||
std::vector<double> probs(statesClass_, 0.0);
|
||||
|
||||
int sp1Val = instance[superParent1_];
|
||||
int sp2Val = instance[superParent2_];
|
||||
|
||||
// Start with p(c) * p(sp1Val| c) * p(sp2Val| c)
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double pC = classPriors_[c];
|
||||
double pSp1C = sp1FeatureProbs_[sp1Val * statesClass_ + c];
|
||||
double pSp2C = sp2FeatureProbs_[sp2Val * statesClass_ + c];
|
||||
probs[c] = pC * pSp1C * pSp2C * initializer_;
|
||||
}
|
||||
|
||||
// Multiply by each child feature f
|
||||
int offset = 0;
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent1_ || f == superParent2_)
|
||||
continue;
|
||||
|
||||
int valF = instance[f];
|
||||
int fCard = states_[f];
|
||||
int sp1Card = states_[superParent1_];
|
||||
int sp2Card = states_[superParent2_];
|
||||
int blockSizeSp2 = sp2Card * fCard * statesClass_;
|
||||
int blockSizeF = fCard * statesClass_;
|
||||
|
||||
// base index for childProbs_ for this child and sp1Val, sp2Val
|
||||
int base = offset
|
||||
+ sp1Val*blockSizeSp2
|
||||
+ sp2Val*blockSizeF
|
||||
+ valF*statesClass_;
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
probs[c] *= childProbs_[base + c];
|
||||
}
|
||||
offset += (fCard * sp1Card * sp2Card * statesClass_);
|
||||
}
|
||||
|
||||
// Normalize
|
||||
normalize(probs);
|
||||
return probs;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict_proba (batch)
|
||||
// --------------------------------------
|
||||
std::vector<std::vector<double>> XSp2de::predict_proba(std::vector<std::vector<int>> &test_data)
|
||||
{
|
||||
int test_size = test_data[0].size(); // each feature is test_data[f], size = #samples
|
||||
int sample_size = test_data.size(); // = nFeatures_
|
||||
std::vector<std::vector<double>> probabilities(
|
||||
test_size, std::vector<double>(statesClass_, 0.0));
|
||||
|
||||
// same concurrency approach
|
||||
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
auto worker = [&](const std::vector<std::vector<int>> &samples,
|
||||
int begin,
|
||||
int chunk,
|
||||
int sample_size,
|
||||
std::vector<std::vector<double>> &predictions) {
|
||||
std::string threadName =
|
||||
"XSp2de-" + std::to_string(begin) + "-" + std::to_string(chunk);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
|
||||
std::vector<int> instance(sample_size);
|
||||
for (int sample = begin; sample < begin + chunk; ++sample) {
|
||||
for (int feature = 0; feature < sample_size; ++feature) {
|
||||
instance[feature] = samples[feature][sample];
|
||||
}
|
||||
predictions[sample] = predict_proba(instance);
|
||||
}
|
||||
semaphore_.release();
|
||||
};
|
||||
|
||||
for (int begin = 0; begin < test_size; begin += chunk_size) {
|
||||
int chunk = std::min(chunk_size, test_size - begin);
|
||||
semaphore_.acquire();
|
||||
threads.emplace_back(worker, test_data, begin, chunk, sample_size,
|
||||
std::ref(probabilities));
|
||||
}
|
||||
for (auto &th : threads) {
|
||||
th.join();
|
||||
}
|
||||
return probabilities;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict (single instance)
|
||||
// --------------------------------------
|
||||
int XSp2de::predict(const std::vector<int> &instance) const
|
||||
{
|
||||
auto p = predict_proba(instance);
|
||||
return static_cast<int>(
|
||||
std::distance(p.begin(), std::max_element(p.begin(), p.end()))
|
||||
);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict (batch of data)
|
||||
// --------------------------------------
|
||||
std::vector<int> XSp2de::predict(std::vector<std::vector<int>> &test_data)
|
||||
{
|
||||
auto probabilities = predict_proba(test_data);
|
||||
std::vector<int> predictions(probabilities.size(), 0);
|
||||
|
||||
for (size_t i = 0; i < probabilities.size(); i++) {
|
||||
predictions[i] = static_cast<int>(
|
||||
std::distance(probabilities[i].begin(),
|
||||
std::max_element(probabilities[i].begin(),
|
||||
probabilities[i].end()))
|
||||
);
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict (torch::Tensor version)
|
||||
// --------------------------------------
|
||||
torch::Tensor XSp2de::predict(torch::Tensor &X)
|
||||
{
|
||||
auto X_ = TensorUtils::to_matrix(X);
|
||||
auto result_v = predict(X_);
|
||||
return torch::tensor(result_v, torch::kInt32);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict_proba (torch::Tensor version)
|
||||
// --------------------------------------
|
||||
torch::Tensor XSp2de::predict_proba(torch::Tensor &X)
|
||||
{
|
||||
auto X_ = TensorUtils::to_matrix(X);
|
||||
auto result_v = predict_proba(X_);
|
||||
int n_samples = X.size(1);
|
||||
torch::Tensor result =
|
||||
torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
|
||||
for (int i = 0; i < (int)result_v.size(); ++i) {
|
||||
result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// score (torch::Tensor version)
|
||||
// --------------------------------------
|
||||
float XSp2de::score(torch::Tensor &X, torch::Tensor &y)
|
||||
{
|
||||
torch::Tensor y_pred = predict(X);
|
||||
return (y_pred == y).sum().item<float>() / y.size(0);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// score (vector version)
|
||||
// --------------------------------------
|
||||
float XSp2de::score(std::vector<std::vector<int>> &X, std::vector<int> &y)
|
||||
{
|
||||
auto y_pred = predict(X);
|
||||
int correct = 0;
|
||||
for (size_t i = 0; i < y_pred.size(); ++i) {
|
||||
if (y_pred[i] == y[i]) {
|
||||
correct++;
|
||||
}
|
||||
}
|
||||
return static_cast<float>(correct) / static_cast<float>(y_pred.size());
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// Utility: normalize
|
||||
// --------------------------------------
|
||||
void XSp2de::normalize(std::vector<double> &v) const
|
||||
{
|
||||
double sum = 0.0;
|
||||
for (auto &val : v) {
|
||||
sum += val;
|
||||
}
|
||||
if (sum > 0.0) {
|
||||
for (auto &val : v) {
|
||||
val /= sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// to_string
|
||||
// --------------------------------------
|
||||
std::string XSp2de::to_string() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "----- XSp2de Model -----\n"
|
||||
<< "nFeatures_ = " << nFeatures_ << "\n"
|
||||
<< "superParent1_ = " << superParent1_ << "\n"
|
||||
<< "superParent2_ = " << superParent2_ << "\n"
|
||||
<< "statesClass_ = " << statesClass_ << "\n\n";
|
||||
|
||||
oss << "States: [";
|
||||
for (auto s : states_) oss << s << " ";
|
||||
oss << "]\n";
|
||||
|
||||
oss << "classCounts_:\n";
|
||||
for (auto v : classCounts_) oss << v << " ";
|
||||
oss << "\nclassPriors_:\n";
|
||||
for (auto v : classPriors_) oss << v << " ";
|
||||
oss << "\nsp1FeatureCounts_ (size=" << sp1FeatureCounts_.size() << ")\n";
|
||||
for (auto v : sp1FeatureCounts_) oss << v << " ";
|
||||
oss << "\nsp2FeatureCounts_ (size=" << sp2FeatureCounts_.size() << ")\n";
|
||||
for (auto v : sp2FeatureCounts_) oss << v << " ";
|
||||
oss << "\nchildCounts_ (size=" << childCounts_.size() << ")\n";
|
||||
for (auto v : childCounts_) oss << v << " ";
|
||||
|
||||
oss << "\nchildOffsets_:\n";
|
||||
for (auto c : childOffsets_) oss << c << " ";
|
||||
|
||||
oss << "\n----------------------------------------\n";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// Some introspection about the graph
|
||||
// --------------------------------------
|
||||
int XSp2de::getNumberOfNodes() const
|
||||
{
|
||||
// nFeatures + 1 class node
|
||||
return nFeatures_ + 1;
|
||||
}
|
||||
|
||||
int XSp2de::getClassNumStates() const
|
||||
{
|
||||
return statesClass_;
|
||||
}
|
||||
|
||||
int XSp2de::getNFeatures() const
|
||||
{
|
||||
return nFeatures_;
|
||||
}
|
||||
|
||||
int XSp2de::getNumberOfStates() const
|
||||
{
|
||||
// purely an example. Possibly you want to sum up actual
|
||||
// cardinalities or something else.
|
||||
return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
|
||||
}
|
||||
|
||||
int XSp2de::getNumberOfEdges() const
|
||||
{
|
||||
// In an SPNDE with n=2, for each feature we have edges from class, sp1, sp2.
|
||||
// So that’s 3*(nFeatures_) edges, minus the ones for the superparents themselves,
|
||||
// plus the edges from class->superparent1, class->superparent2.
|
||||
// For a quick approximation:
|
||||
// - class->sp1, class->sp2 => 2 edges
|
||||
// - class->child => (nFeatures -2) edges
|
||||
// - sp1->child, sp2->child => 2*(nFeatures -2) edges
|
||||
// total = 2 + (nFeatures-2) + 2*(nFeatures-2) = 2 + 3*(nFeatures-2)
|
||||
// = 3nFeatures - 4 (just an example).
|
||||
// You can adapt to your liking:
|
||||
return 3 * nFeatures_ - 4;
|
||||
}
|
||||
|
||||
} // namespace bayesnet
|
||||
|
75
bayesnet/classifiers/XSP2DE.h
Normal file
75
bayesnet/classifiers/XSP2DE.h
Normal file
@@ -0,0 +1,75 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef XSP2DE_H
|
||||
#define XSP2DE_H
|
||||
|
||||
#include "Classifier.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
class XSp2de : public Classifier {
|
||||
public:
|
||||
XSp2de(int spIndex1, int spIndex2);
|
||||
void setHyperparameters(const nlohmann::json &hyperparameters_) override;
|
||||
void fitx(torch::Tensor &X, torch::Tensor &y, torch::Tensor &weights_, const Smoothing_t smoothing);
|
||||
std::vector<double> predict_proba(const std::vector<int> &instance) const;
|
||||
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>> &test_data) override;
|
||||
int predict(const std::vector<int> &instance) const;
|
||||
std::vector<int> predict(std::vector<std::vector<int>> &test_data) override;
|
||||
torch::Tensor predict(torch::Tensor &X) override;
|
||||
torch::Tensor predict_proba(torch::Tensor &X) override;
|
||||
|
||||
float score(torch::Tensor &X, torch::Tensor &y) override;
|
||||
float score(std::vector<std::vector<int>> &X, std::vector<int> &y) override;
|
||||
std::string to_string() const;
|
||||
std::vector<std::string> graph(const std::string &title) const override {
|
||||
return std::vector<std::string>({title});
|
||||
}
|
||||
|
||||
int getNumberOfNodes() const override;
|
||||
int getNumberOfEdges() const override;
|
||||
int getNFeatures() const;
|
||||
int getClassNumStates() const override;
|
||||
int getNumberOfStates() const override;
|
||||
|
||||
protected:
|
||||
void buildModel(const torch::Tensor &weights) override;
|
||||
void trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) override;
|
||||
|
||||
private:
|
||||
void addSample(const std::vector<int> &instance, double weight);
|
||||
void normalize(std::vector<double> &v) const;
|
||||
void computeProbabilities();
|
||||
|
||||
int superParent1_;
|
||||
int superParent2_;
|
||||
int nFeatures_;
|
||||
int statesClass_;
|
||||
double alpha_;
|
||||
double initializer_;
|
||||
|
||||
std::vector<int> states_;
|
||||
std::vector<double> classCounts_;
|
||||
std::vector<double> classPriors_;
|
||||
std::vector<double> sp1FeatureCounts_, sp1FeatureProbs_;
|
||||
std::vector<double> sp2FeatureCounts_, sp2FeatureProbs_;
|
||||
// childOffsets_[f] will be the offset into childCounts_ for feature f.
|
||||
// If f is either superParent1 or superParent2, childOffsets_[f] = -1
|
||||
std::vector<int> childOffsets_;
|
||||
// For each child f, we store p(x_f | c, sp1Val, sp2Val). We'll store the raw
|
||||
// counts in childCounts_, and the probabilities in childProbs_, with a
|
||||
// dimension block of size: states_[f]* statesClass_* states_[sp1]* states_[sp2].
|
||||
std::vector<double> childCounts_;
|
||||
std::vector<double> childProbs_;
|
||||
CountingSemaphore &semaphore_;
|
||||
};
|
||||
|
||||
} // namespace bayesnet
|
||||
#endif // XSP2DE_H
|
450
bayesnet/classifiers/XSPODE.cc
Normal file
450
bayesnet/classifiers/XSPODE.cc
Normal file
@@ -0,0 +1,450 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include "XSPODE.h"
|
||||
#include "bayesnet/utils/TensorUtils.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
// --------------------------------------
|
||||
// Constructor
|
||||
// --------------------------------------
|
||||
XSpode::XSpode(int spIndex)
|
||||
: superParent_{ spIndex }, nFeatures_{ 0 }, statesClass_{ 0 }, alpha_{ 1.0 },
|
||||
initializer_{ 1.0 }, semaphore_{ CountingSemaphore::getInstance() },
|
||||
Classifier(Network())
|
||||
{
|
||||
validHyperparameters = { "parent" };
|
||||
}
|
||||
|
||||
void XSpode::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("parent")) {
|
||||
superParent_ = hyperparameters["parent"];
|
||||
hyperparameters.erase("parent");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
|
||||
void XSpode::fitx(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
|
||||
{
|
||||
m = X.size(1);
|
||||
n = X.size(0);
|
||||
dataset = X;
|
||||
buildDataset(y);
|
||||
buildModel(weights_);
|
||||
trainModel(weights_, smoothing);
|
||||
fitted = true;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// trainModel
|
||||
// --------------------------------------
|
||||
// Initialize storage needed for the super-parent and child features counts and
|
||||
// probs.
|
||||
// --------------------------------------
|
||||
void XSpode::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
int numInstances = m;
|
||||
nFeatures_ = n;
|
||||
|
||||
// Derive the number of states for each feature and for the class.
|
||||
// (This is just one approach; adapt to match your environment.)
|
||||
// Here, we assume the user also gave us the total #states per feature in e.g.
|
||||
// statesMap. We'll simply reconstruct the integer states_ array. The last
|
||||
// entry is statesClass_.
|
||||
states_.resize(nFeatures_);
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
// Suppose you look up in “statesMap” by the feature name, or read directly
|
||||
// from X. We'll assume states_[f] = max value in X[f] + 1.
|
||||
states_[f] = dataset[f].max().item<int>() + 1;
|
||||
}
|
||||
// For the class: states_.back() = max(y)+1
|
||||
statesClass_ = dataset[-1].max().item<int>() + 1;
|
||||
|
||||
// Initialize counts
|
||||
classCounts_.resize(statesClass_, 0.0);
|
||||
// p(x_sp = spVal | c)
|
||||
// We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].
|
||||
spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0);
|
||||
|
||||
// For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of
|
||||
// childCounts_. childCounts_ will be sized as sum_{child≠sp} (states_[child]
|
||||
// * statesClass_ * states_[sp]). We also need an offset for each child to
|
||||
// index into childCounts_.
|
||||
childOffsets_.resize(nFeatures_, -1);
|
||||
int totalSize = 0;
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent_)
|
||||
continue; // skip sp
|
||||
childOffsets_[f] = totalSize;
|
||||
// block size for this child's counts: states_[f] * statesClass_ *
|
||||
// states_[superParent_]
|
||||
totalSize += (states_[f] * statesClass_ * states_[superParent_]);
|
||||
}
|
||||
childCounts_.resize(totalSize, 0.0);
|
||||
}
|
||||
// --------------------------------------
|
||||
// buildModel
|
||||
// --------------------------------------
|
||||
//
|
||||
// We only store conditional probabilities for:
|
||||
// p(x_sp| c) (the super-parent feature)
|
||||
// p(x_child| c, x_sp) for all child ≠ sp
|
||||
//
|
||||
// --------------------------------------
|
||||
void XSpode::trainModel(const torch::Tensor& weights,
|
||||
const bayesnet::Smoothing_t smoothing)
|
||||
{
|
||||
// Accumulate raw counts
|
||||
for (int i = 0; i < m; i++) {
|
||||
std::vector<int> instance(nFeatures_ + 1);
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
instance[f] = dataset[f][i].item<int>();
|
||||
}
|
||||
instance[nFeatures_] = dataset[-1][i].item<int>();
|
||||
addSample(instance, weights[i].item<double>());
|
||||
}
|
||||
switch (smoothing) {
|
||||
case bayesnet::Smoothing_t::ORIGINAL:
|
||||
alpha_ = 1.0 / m;
|
||||
break;
|
||||
case bayesnet::Smoothing_t::LAPLACE:
|
||||
alpha_ = 1.0;
|
||||
break;
|
||||
default:
|
||||
alpha_ = 0.0; // No smoothing
|
||||
}
|
||||
initializer_ = std::numeric_limits<double>::max() /
|
||||
(nFeatures_ * nFeatures_); // for numerical stability
|
||||
// Convert raw counts to probabilities
|
||||
computeProbabilities();
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// addSample
|
||||
// --------------------------------------
|
||||
//
|
||||
// instance has size nFeatures_ + 1, with the class at the end.
|
||||
// We add 1 to the appropriate counters for each (c, superParentVal, childVal).
|
||||
//
|
||||
void XSpode::addSample(const std::vector<int>& instance, double weight)
|
||||
{
|
||||
if (weight <= 0.0)
|
||||
return;
|
||||
|
||||
int c = instance.back();
|
||||
// (A) increment classCounts
|
||||
classCounts_[c] += weight;
|
||||
|
||||
// (B) increment super-parent counts => p(x_sp | c)
|
||||
int spVal = instance[superParent_];
|
||||
spFeatureCounts_[spVal * statesClass_ + c] += weight;
|
||||
|
||||
// (C) increment child counts => p(childVal | c, x_sp)
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent_)
|
||||
continue;
|
||||
int childVal = instance[f];
|
||||
int offset = childOffsets_[f];
|
||||
// Compute index in childCounts_.
|
||||
// Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]
|
||||
int blockSize = states_[f] * statesClass_;
|
||||
int idx = offset + spVal * blockSize + childVal * statesClass_ + c;
|
||||
childCounts_[idx] += weight;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// computeProbabilities
|
||||
// --------------------------------------
|
||||
//
|
||||
// Once all samples are added in COUNTS mode, call this to:
|
||||
// p(c)
|
||||
// p(x_sp = spVal | c)
|
||||
// p(x_child = v | c, x_sp = s_sp)
|
||||
//
|
||||
// --------------------------------------
|
||||
void XSpode::computeProbabilities()
|
||||
{
|
||||
double totalCount =
|
||||
std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);
|
||||
|
||||
// p(c) => classPriors_
|
||||
classPriors_.resize(statesClass_, 0.0);
|
||||
if (totalCount <= 0.0) {
|
||||
// fallback => uniform
|
||||
double unif = 1.0 / static_cast<double>(statesClass_);
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
classPriors_[c] = unif;
|
||||
}
|
||||
} else {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
classPriors_[c] =
|
||||
(classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_);
|
||||
}
|
||||
}
|
||||
|
||||
// p(x_sp | c)
|
||||
spFeatureProbs_.resize(spFeatureCounts_.size());
|
||||
// denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ *
|
||||
// (#states of sp)
|
||||
int spCard = states_[superParent_];
|
||||
for (int spVal = 0; spVal < spCard; spVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double denom = classCounts_[c] + alpha_ * spCard;
|
||||
double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_;
|
||||
spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
|
||||
// p(x_child | c, x_sp)
|
||||
childProbs_.resize(childCounts_.size());
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent_)
|
||||
continue;
|
||||
int offset = childOffsets_[f];
|
||||
int childCard = states_[f];
|
||||
|
||||
// For each spVal, c, childVal in childCounts_:
|
||||
for (int spVal = 0; spVal < spCard; spVal++) {
|
||||
for (int childVal = 0; childVal < childCard; childVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
int idx = offset + spVal * (childCard * statesClass_) +
|
||||
childVal * statesClass_ + c;
|
||||
|
||||
double num = childCounts_[idx] + alpha_;
|
||||
// denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ *
|
||||
// (#states of child)
|
||||
double denom =
|
||||
spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * childCard;
|
||||
childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict_proba
|
||||
// --------------------------------------
|
||||
//
|
||||
// For a single instance x of dimension nFeatures_:
|
||||
// P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).
|
||||
//
|
||||
// --------------------------------------
|
||||
std::vector<double> XSpode::predict_proba(const std::vector<int>& instance) const
|
||||
{
|
||||
if (!fitted) {
|
||||
throw std::logic_error(CLASSIFIER_NOT_FITTED);
|
||||
}
|
||||
std::vector<double> probs(statesClass_, 0.0);
|
||||
// Multiply p(c) × p(x_sp | c)
|
||||
int spVal = instance[superParent_];
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double pc = classPriors_[c];
|
||||
double pSpC = spFeatureProbs_[spVal * statesClass_ + c];
|
||||
probs[c] = pc * pSpC * initializer_;
|
||||
}
|
||||
|
||||
// Multiply by each child’s probability p(x_child | c, x_sp)
|
||||
for (int feature = 0; feature < nFeatures_; feature++) {
|
||||
if (feature == superParent_)
|
||||
continue; // skip sp
|
||||
int sf = instance[feature];
|
||||
int offset = childOffsets_[feature];
|
||||
int childCard = states_[feature]; // not used directly, but for clarity
|
||||
// Index into childProbs_ = offset + spVal*(childCard*statesClass_) +
|
||||
// childVal*statesClass_ + c
|
||||
int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_;
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
probs[c] *= childProbs_[base + c];
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize
|
||||
normalize(probs);
|
||||
return probs;
|
||||
}
|
||||
std::vector<std::vector<double>> XSpode::predict_proba(std::vector<std::vector<int>>& test_data)
|
||||
{
|
||||
int test_size = test_data[0].size();
|
||||
int sample_size = test_data.size();
|
||||
auto probabilities = std::vector<std::vector<double>>(
|
||||
test_size, std::vector<double>(statesClass_));
|
||||
|
||||
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
|
||||
std::vector<std::thread> threads;
|
||||
auto worker = [&](const std::vector<std::vector<int>>& samples, int begin,
|
||||
int chunk, int sample_size,
|
||||
std::vector<std::vector<double>>& predictions) {
|
||||
std::string threadName =
|
||||
"(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
|
||||
std::vector<int> instance(sample_size);
|
||||
for (int sample = begin; sample < begin + chunk; ++sample) {
|
||||
for (int feature = 0; feature < sample_size; ++feature) {
|
||||
instance[feature] = samples[feature][sample];
|
||||
}
|
||||
predictions[sample] = predict_proba(instance);
|
||||
}
|
||||
semaphore_.release();
|
||||
};
|
||||
for (int begin = 0; begin < test_size; begin += chunk_size) {
|
||||
int chunk = std::min(chunk_size, test_size - begin);
|
||||
semaphore_.acquire();
|
||||
threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
return probabilities;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// Utility: normalize
|
||||
// --------------------------------------
|
||||
void XSpode::normalize(std::vector<double>& v) const
|
||||
{
|
||||
double sum = 0.0;
|
||||
for (auto val : v) {
|
||||
sum += val;
|
||||
}
|
||||
if (sum <= 0.0) {
|
||||
return;
|
||||
}
|
||||
for (auto& val : v) {
|
||||
val /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// representation of the model
|
||||
// --------------------------------------
|
||||
std::string XSpode::to_string() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "----- XSpode Model -----" << std::endl
|
||||
<< "nFeatures_ = " << nFeatures_ << std::endl
|
||||
<< "superParent_ = " << superParent_ << std::endl
|
||||
<< "statesClass_ = " << statesClass_ << std::endl
|
||||
<< std::endl;
|
||||
|
||||
oss << "States: [";
|
||||
for (int s : states_)
|
||||
oss << s << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "classCounts_: [";
|
||||
for (double c : classCounts_)
|
||||
oss << c << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "classPriors_: [";
|
||||
for (double c : classPriors_)
|
||||
oss << c << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << std::endl
|
||||
<< "[";
|
||||
for (double c : spFeatureCounts_)
|
||||
oss << c << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << std::endl
|
||||
<< "[";
|
||||
for (double c : spFeatureProbs_)
|
||||
oss << c << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "childCounts_: size = " << childCounts_.size() << std::endl << "[";
|
||||
for (double cc : childCounts_)
|
||||
oss << cc << " ";
|
||||
oss << "]" << std::endl;
|
||||
|
||||
for (double cp : childProbs_)
|
||||
oss << cp << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "childOffsets_: [";
|
||||
for (int co : childOffsets_)
|
||||
oss << co << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << std::string(40,'-') << std::endl;
|
||||
return oss.str();
|
||||
}
|
||||
int XSpode::getNumberOfNodes() const { return nFeatures_ + 1; }
|
||||
int XSpode::getClassNumStates() const { return statesClass_; }
|
||||
int XSpode::getNFeatures() const { return nFeatures_; }
|
||||
int XSpode::getNumberOfStates() const
|
||||
{
|
||||
return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
|
||||
}
|
||||
int XSpode::getNumberOfEdges() const
|
||||
{
|
||||
return 2 * nFeatures_ + 1;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Predict overrides (classifier interface)
|
||||
// ------------------------------------------------------
|
||||
int XSpode::predict(const std::vector<int>& instance) const
|
||||
{
|
||||
auto p = predict_proba(instance);
|
||||
return static_cast<int>(std::distance(p.begin(), std::max_element(p.begin(), p.end())));
|
||||
}
|
||||
std::vector<int> XSpode::predict(std::vector<std::vector<int>>& test_data)
|
||||
{
|
||||
auto probabilities = predict_proba(test_data);
|
||||
std::vector<int> predictions(probabilities.size(), 0);
|
||||
|
||||
for (size_t i = 0; i < probabilities.size(); i++) {
|
||||
predictions[i] = std::distance(
|
||||
probabilities[i].begin(),
|
||||
std::max_element(probabilities[i].begin(), probabilities[i].end()));
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
torch::Tensor XSpode::predict(torch::Tensor& X)
|
||||
{
|
||||
auto X_ = TensorUtils::to_matrix(X);
|
||||
auto result_v = predict(X_);
|
||||
return torch::tensor(result_v, torch::kInt32);
|
||||
}
|
||||
torch::Tensor XSpode::predict_proba(torch::Tensor& X)
|
||||
{
|
||||
auto X_ = TensorUtils::to_matrix(X);
|
||||
auto result_v = predict_proba(X_);
|
||||
int n_samples = X.size(1);
|
||||
torch::Tensor result =
|
||||
torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
|
||||
for (int i = 0; i < result_v.size(); ++i) {
|
||||
result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
float XSpode::score(torch::Tensor& X, torch::Tensor& y)
|
||||
{
|
||||
torch::Tensor y_pred = predict(X);
|
||||
return (y_pred == y).sum().item<float>() / y.size(0);
|
||||
}
|
||||
float XSpode::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
|
||||
{
|
||||
auto y_pred = this->predict(X);
|
||||
int correct = 0;
|
||||
for (int i = 0; i < y_pred.size(); ++i) {
|
||||
if (y_pred[i] == y[i]) {
|
||||
correct++;
|
||||
}
|
||||
}
|
||||
return (double)correct / y_pred.size();
|
||||
}
|
||||
} // namespace bayesnet
|
76
bayesnet/classifiers/XSPODE.h
Normal file
76
bayesnet/classifiers/XSPODE.h
Normal file
@@ -0,0 +1,76 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef XSPODE_H
|
||||
#define XSPODE_H
|
||||
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
#include "Classifier.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
class XSpode : public Classifier {
|
||||
public:
|
||||
explicit XSpode(int spIndex);
|
||||
std::vector<double> predict_proba(const std::vector<int>& instance) const;
|
||||
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
|
||||
int predict(const std::vector<int>& instance) const;
|
||||
void normalize(std::vector<double>& v) const;
|
||||
std::string to_string() const;
|
||||
int getNFeatures() const;
|
||||
int getNumberOfNodes() const override;
|
||||
int getNumberOfEdges() const override;
|
||||
int getNumberOfStates() const override;
|
||||
int getClassNumStates() const override;
|
||||
std::vector<int>& getStates();
|
||||
std::vector<std::string> graph(const std::string& title) const override { return std::vector<std::string>({ title }); }
|
||||
void fitx(torch::Tensor& X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing);
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
|
||||
//
|
||||
// Classifier interface
|
||||
//
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
|
||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||
float score(torch::Tensor& X, torch::Tensor& y) override;
|
||||
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
||||
private:
|
||||
void addSample(const std::vector<int>& instance, double weight);
|
||||
void computeProbabilities();
|
||||
int superParent_;
|
||||
int nFeatures_;
|
||||
int statesClass_;
|
||||
std::vector<int> states_; // [states_feat0, ..., states_feat(N-1)] (class not included in this array)
|
||||
|
||||
// Class counts
|
||||
std::vector<double> classCounts_; // [c], accumulative
|
||||
std::vector<double> classPriors_; // [c], after normalization
|
||||
|
||||
// For p(x_sp = spVal | c)
|
||||
std::vector<double> spFeatureCounts_; // [spVal * statesClass_ + c]
|
||||
std::vector<double> spFeatureProbs_; // same shape, after normalization
|
||||
|
||||
// For p(x_child = childVal | x_sp = spVal, c)
|
||||
// childCounts_ is big enough to hold all child features except sp:
|
||||
// For each child f, we store childOffsets_[f] as the start index, then
|
||||
// childVal, spVal, c => the data.
|
||||
std::vector<double> childCounts_;
|
||||
std::vector<double> childProbs_;
|
||||
std::vector<int> childOffsets_;
|
||||
|
||||
double alpha_ = 1.0;
|
||||
double initializer_; // for numerical stability
|
||||
CountingSemaphore& semaphore_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // XSPODE_H
|
@@ -3,26 +3,25 @@
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
#include <folding.hpp>
|
||||
#include "Boost.h"
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "Boost.h"
|
||||
#include <folding.hpp>
|
||||
|
||||
namespace bayesnet {
|
||||
Boost::Boost(bool predict_voting) : Ensemble(predict_voting)
|
||||
{
|
||||
validHyperparameters = { "alpha_block", "order", "convergence", "convergence_best", "bisection", "threshold", "maxTolerance",
|
||||
"predict_voting", "select_features", "block_update" };
|
||||
Boost::Boost(bool predict_voting) : Ensemble(predict_voting) {
|
||||
validHyperparameters = {"alpha_block", "order", "convergence", "convergence_best", "bisection",
|
||||
"threshold", "maxTolerance", "predict_voting", "select_features", "block_update"};
|
||||
}
|
||||
void Boost::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
void Boost::setHyperparameters(const nlohmann::json &hyperparameters_) {
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("order")) {
|
||||
std::vector<std::string> algos = {Orders.ASC, Orders.DESC, Orders.RAND};
|
||||
order_algorithm = hyperparameters["order"];
|
||||
if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC + ", " + Orders.RAND + "]");
|
||||
throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC +
|
||||
", " + Orders.RAND + "]");
|
||||
}
|
||||
hyperparameters.erase("order");
|
||||
}
|
||||
@@ -48,8 +47,8 @@ namespace bayesnet {
|
||||
}
|
||||
if (hyperparameters.contains("maxTolerance")) {
|
||||
maxTolerance = hyperparameters["maxTolerance"];
|
||||
if (maxTolerance < 1 || maxTolerance > 4)
|
||||
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]");
|
||||
if (maxTolerance < 1 || maxTolerance > 6)
|
||||
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 6]");
|
||||
hyperparameters.erase("maxTolerance");
|
||||
}
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
@@ -62,7 +61,8 @@ namespace bayesnet {
|
||||
selectFeatures = true;
|
||||
select_features_algorithm = selectedAlgorithm;
|
||||
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " + SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
|
||||
throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " +
|
||||
SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
|
||||
}
|
||||
hyperparameters.erase("select_features");
|
||||
}
|
||||
@@ -78,8 +78,17 @@ namespace bayesnet {
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void Boost::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
void Boost::add_model(std::unique_ptr<Classifier> model, double significance) {
|
||||
models.push_back(std::move(model));
|
||||
n_models++;
|
||||
significanceModels.push_back(significance);
|
||||
}
|
||||
void Boost::remove_last_model() {
|
||||
models.pop_back();
|
||||
significanceModels.pop_back();
|
||||
n_models--;
|
||||
}
|
||||
void Boost::buildModel(const torch::Tensor &weights) {
|
||||
// Models shall be built in trainModel
|
||||
models.clear();
|
||||
significanceModels.clear();
|
||||
@@ -109,8 +118,7 @@ namespace bayesnet {
|
||||
y_train = y_;
|
||||
}
|
||||
}
|
||||
std::vector<int> Boost::featureSelection(torch::Tensor& weights_)
|
||||
{
|
||||
std::vector<int> Boost::featureSelection(torch::Tensor &weights_) {
|
||||
int maxFeatures = 0;
|
||||
if (select_features_algorithm == SelectFeatures.CFS) {
|
||||
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||
@@ -118,26 +126,30 @@ namespace bayesnet {
|
||||
if (threshold < 0 || threshold > 0.5) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]");
|
||||
}
|
||||
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
featureSelector =
|
||||
new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
} else if (select_features_algorithm == SelectFeatures.FCBF) {
|
||||
if (threshold < 1e-7 || threshold > 1) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]");
|
||||
}
|
||||
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
featureSelector =
|
||||
new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
}
|
||||
featureSelector->fit();
|
||||
auto featuresUsed = featureSelector->getFeatures();
|
||||
delete featureSelector;
|
||||
return featuresUsed;
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> Boost::update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
|
||||
{
|
||||
std::tuple<torch::Tensor &, double, bool> Boost::update_weights(torch::Tensor &ytrain, torch::Tensor &ypred,
|
||||
torch::Tensor &weights) {
|
||||
bool terminate = false;
|
||||
double alpha_t = 0;
|
||||
auto mask_wrong = ypred != ytrain;
|
||||
auto mask_right = ypred == ytrain;
|
||||
auto masked_weights = weights * mask_wrong.to(weights.dtype());
|
||||
double epsilon_t = masked_weights.sum().item<double>();
|
||||
// std::cout << "epsilon_t: " << epsilon_t << " count wrong: " << mask_wrong.sum().item<int>() << " count right: "
|
||||
// << mask_right.sum().item<int>() << std::endl;
|
||||
if (epsilon_t > 0.5) {
|
||||
// Inverse the weights policy (plot ln(wt))
|
||||
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
|
||||
@@ -157,8 +169,8 @@ namespace bayesnet {
|
||||
}
|
||||
return {weights, alpha_t, terminate};
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> Boost::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
|
||||
{
|
||||
std::tuple<torch::Tensor &, double, bool> Boost::update_weights_block(int k, torch::Tensor &ytrain,
|
||||
torch::Tensor &weights) {
|
||||
/* Update Block algorithm
|
||||
k = # of models in block
|
||||
n_models = # of models in ensemble to make predictions
|
||||
@@ -253,4 +265,4 @@ namespace bayesnet {
|
||||
n_models = n_models_bak;
|
||||
return {weights, alpha_t, terminate};
|
||||
}
|
||||
}
|
||||
} // namespace bayesnet
|
||||
|
@@ -27,22 +27,27 @@ namespace bayesnet {
|
||||
class Boost : public Ensemble {
|
||||
public:
|
||||
explicit Boost(bool predict_voting = false);
|
||||
virtual ~Boost() = default;
|
||||
virtual ~Boost() override = default;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
protected:
|
||||
std::vector<int> featureSelection(torch::Tensor& weights_);
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights);
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
|
||||
void add_model(std::unique_ptr<Classifier> model, double significance);
|
||||
void remove_last_model();
|
||||
//
|
||||
// Attributes
|
||||
//
|
||||
torch::Tensor X_train, y_train, X_test, y_test;
|
||||
// Hyperparameters
|
||||
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
||||
int maxTolerance = 3;
|
||||
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
||||
std::string order_algorithm = Orders.DESC; // order to process the KBest features asc, desc, rand
|
||||
bool convergence = true; //if true, stop when the model does not improve
|
||||
bool convergence_best = false; // wether to keep the best accuracy to the moment or the last accuracy as prior accuracy
|
||||
bool selectFeatures = false; // if true, use feature selection
|
||||
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
||||
std::string select_features_algorithm; // Selected feature selection algorithm
|
||||
FeatureSelect* featureSelector = nullptr;
|
||||
double threshold = -1;
|
||||
bool block_update = false; // if true, use block update algorithm, only meaningful if bisection is true
|
||||
|
@@ -4,14 +4,9 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <set>
|
||||
#include <functional>
|
||||
#include <limits.h>
|
||||
#include <tuple>
|
||||
#include <folding.hpp>
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "BoostA2DE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -4,14 +4,17 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "BoostAODE.h"
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
#include <limits.h>
|
||||
#include <loguru.cpp>
|
||||
#include <loguru.hpp>
|
||||
#include <random>
|
||||
#include <set>
|
||||
#include <functional>
|
||||
#include <limits.h>
|
||||
#include <tuple>
|
||||
#include "BoostAODE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
namespace bayesnet
|
||||
{
|
||||
|
||||
BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting)
|
||||
{
|
||||
@@ -46,14 +49,16 @@ namespace bayesnet {
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
n_models = 0;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
significanceModels[i] = alpha_t;
|
||||
significanceModels.push_back(alpha_t);
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models);
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
@@ -77,10 +82,8 @@ namespace bayesnet {
|
||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||
}
|
||||
// Remove used features
|
||||
featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x)
|
||||
{ return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
|
||||
end(featureSelection)
|
||||
);
|
||||
featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x) { return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed); }),
|
||||
end(featureSelection));
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
@@ -120,7 +123,7 @@ namespace bayesnet {
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
// VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
|
||||
// VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size());
|
||||
}
|
||||
if (block_update) {
|
||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||
@@ -163,7 +166,7 @@ namespace bayesnet {
|
||||
}
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
// VLG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (featuresUsed.size() != features.size()) {
|
||||
|
@@ -8,7 +8,6 @@
|
||||
#define BOOSTAODE_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
#include "Boost.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -4,7 +4,6 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
#include "Ensemble.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
@@ -86,6 +85,7 @@ namespace bayesnet {
|
||||
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
/*std::cout << "model " << i << " prediction: " << ypredict << " significance " << significanceModels[i] << std::endl;*/
|
||||
y_pred += ypredict * significanceModels[i];
|
||||
}
|
||||
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
|
||||
|
@@ -33,9 +33,15 @@ namespace bayesnet {
|
||||
}
|
||||
std::string dump_cpt() const override
|
||||
{
|
||||
return "";
|
||||
std::string output;
|
||||
for (auto& model : models) {
|
||||
output += model->dump_cpt();
|
||||
output += std::string(80, '-') + "\n";
|
||||
}
|
||||
return output;
|
||||
}
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
torch::Tensor predict_average_voting(torch::Tensor& X);
|
||||
std::vector<std::vector<double>> predict_average_voting(std::vector<std::vector<int>>& X);
|
||||
torch::Tensor predict_average_proba(torch::Tensor& X);
|
||||
@@ -43,10 +49,10 @@ namespace bayesnet {
|
||||
torch::Tensor compute_arg_max(torch::Tensor& X);
|
||||
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X);
|
||||
torch::Tensor voting(torch::Tensor& votes);
|
||||
// Attributes
|
||||
unsigned n_models;
|
||||
std::vector<std::unique_ptr<Classifier>> models;
|
||||
std::vector<double> significanceModels;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
bool predict_voting;
|
||||
};
|
||||
}
|
||||
|
168
bayesnet/ensembles/XBA2DE.cc
Normal file
168
bayesnet/ensembles/XBA2DE.cc
Normal file
@@ -0,0 +1,168 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <folding.hpp>
|
||||
#include <limits.h>
|
||||
#include "XBA2DE.h"
|
||||
#include "bayesnet/classifiers/XSP2DE.h"
|
||||
#include "bayesnet/utils/TensorUtils.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
XBA2DE::XBA2DE(bool predict_voting) : Boost(predict_voting) {}
|
||||
std::vector<int> XBA2DE::initializeModels(const Smoothing_t smoothing) {
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
if (featuresSelected.size() < 2) {
|
||||
notes.push_back("No features selected in initialization");
|
||||
status = ERROR;
|
||||
return std::vector<int>();
|
||||
}
|
||||
for (int i = 0; i < featuresSelected.size() - 1; i++) {
|
||||
for (int j = i + 1; j < featuresSelected.size(); j++) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<XSp2de>(featuresSelected[i], featuresSelected[j]);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
add_model(std::move(model), 1.0);
|
||||
}
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " +
|
||||
std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void XBA2DE::trainModel(const torch::Tensor &weights, const Smoothing_t smoothing) {
|
||||
//
|
||||
// Logging setup
|
||||
//
|
||||
// loguru::set_thread_name("XBA2DE");
|
||||
// loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
|
||||
// loguru::add_file("boostA2DE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
||||
|
||||
// Algorithm based on the adaboost algorithm for classification
|
||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||
X_train_ = TensorUtils::to_matrix(X_train);
|
||||
y_train_ = TensorUtils::to_vector<int>(y_train);
|
||||
if (convergence) {
|
||||
X_test_ = TensorUtils::to_matrix(X_test);
|
||||
y_test_ = TensorUtils::to_vector<int>(y_test);
|
||||
}
|
||||
fitted = true;
|
||||
double alpha_t = 0;
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
if (featuresUsed.size() == 0) {
|
||||
return;
|
||||
}
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
significanceModels[i] = alpha_t;
|
||||
}
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||
// Variables to control the accuracy finish condition
|
||||
double priorAccuracy = 0.0;
|
||||
double improvement = 1.0;
|
||||
double convergence_threshold = 1e-4;
|
||||
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
// Step 0: Set the finish condition
|
||||
// epsilon sub t > 0.5 => inverse the weights policy
|
||||
// validation error is not decreasing
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == Orders.ASC;
|
||||
std::mt19937 g{173};
|
||||
std::vector<std::pair<int, int>> pairSelection;
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
pairSelection = metrics.SelectKPairs(weights_, featuresUsed, ascending, 0); // Get all the pairs sorted
|
||||
if (order_algorithm == Orders.RAND) {
|
||||
std::shuffle(pairSelection.begin(), pairSelection.end(), g);
|
||||
}
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
while (counter++ < k && pairSelection.size() > 0) {
|
||||
auto feature_pair = pairSelection[0];
|
||||
pairSelection.erase(pairSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<XSp2de>(feature_pair.first, feature_pair.second);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
}
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
numItemsPack++;
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
// VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models,
|
||||
// featuresUsed.size());
|
||||
}
|
||||
if (block_update) {
|
||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||
}
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
} else {
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (improvement < convergence_threshold) {
|
||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f
|
||||
// current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
} else {
|
||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f
|
||||
// prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
if (convergence_best) {
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
} else {
|
||||
// Keep the last accuray obtained as the prior accuracy
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(),
|
||||
// features.size());
|
||||
finished = finished || tolerance > maxTolerance || pairSelection.size() == 0;
|
||||
}
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||
for (int i = 0; i < numItemsPack; ++i) {
|
||||
significanceModels.pop_back();
|
||||
models.pop_back();
|
||||
n_models--;
|
||||
}
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d",
|
||||
// n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (pairSelection.size() > 0) {
|
||||
notes.push_back("Pairs not used in train: " + std::to_string(pairSelection.size()));
|
||||
status = WARNING;
|
||||
}
|
||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||
}
|
||||
std::vector<std::string> XBA2DE::graph(const std::string &title) const { return Ensemble::graph(title); }
|
||||
} // namespace bayesnet
|
28
bayesnet/ensembles/XBA2DE.h
Normal file
28
bayesnet/ensembles/XBA2DE.h
Normal file
@@ -0,0 +1,28 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef XBA2DE_H
|
||||
#define XBA2DE_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "Boost.h"
|
||||
namespace bayesnet {
|
||||
class XBA2DE : public Boost {
|
||||
public:
|
||||
explicit XBA2DE(bool predict_voting = false);
|
||||
virtual ~XBA2DE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "XBA2DE") const override;
|
||||
std::string getVersion() override { return version; };
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
std::vector<std::vector<int>> X_train_, X_test_;
|
||||
std::vector<int> y_train_, y_test_;
|
||||
std::string version = "0.9.7";
|
||||
};
|
||||
}
|
||||
#endif
|
184
bayesnet/ensembles/XBAODE.cc
Normal file
184
bayesnet/ensembles/XBAODE.cc
Normal file
@@ -0,0 +1,184 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
#include "XBAODE.h"
|
||||
#include "bayesnet/classifiers/XSPODE.h"
|
||||
#include "bayesnet/utils/TensorUtils.h"
|
||||
#include <limits.h>
|
||||
#include <random>
|
||||
#include <tuple>
|
||||
|
||||
namespace bayesnet
|
||||
{
|
||||
XBAODE::XBAODE() : Boost(false) {}
|
||||
std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing)
|
||||
{
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
for (const int &feature : featuresSelected) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
add_model(std::move(model), 1.0);
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " +
|
||||
std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing)
|
||||
{
|
||||
X_train_ = TensorUtils::to_matrix(X_train);
|
||||
y_train_ = TensorUtils::to_vector<int>(y_train);
|
||||
if (convergence) {
|
||||
X_test_ = TensorUtils::to_matrix(X_test);
|
||||
y_test_ = TensorUtils::to_vector<int>(y_test);
|
||||
}
|
||||
fitted = true;
|
||||
double alpha_t;
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
n_models = 0;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
auto ypred = predict(X_train_);
|
||||
auto ypred_t = torch::tensor(ypred);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
|
||||
// Update significance of the models
|
||||
for (const int &feature : featuresUsed) {
|
||||
significanceModels.pop_back();
|
||||
}
|
||||
for (const int &feature : featuresUsed) {
|
||||
significanceModels.push_back(alpha_t);
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t,
|
||||
// n_models);
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||
// Variables to control the accuracy finish condition
|
||||
double priorAccuracy = 0.0;
|
||||
double improvement = 1.0;
|
||||
double convergence_threshold = 1e-4;
|
||||
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
// Step 0: Set the finish condition
|
||||
// epsilon sub t > 0.5 => inverse the weights_ policy
|
||||
// validation error is not decreasing
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == bayesnet::Orders.ASC;
|
||||
std::mt19937 g{173};
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||
if (order_algorithm == bayesnet::Orders.RAND) {
|
||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||
}
|
||||
// Remove used features
|
||||
featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(),
|
||||
[&](auto x) {
|
||||
return std::find(featuresUsed.begin(), featuresUsed.end(), x) !=
|
||||
featuresUsed.end();
|
||||
}),
|
||||
featureSelection.end());
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k,
|
||||
// featureSelection.size());
|
||||
while (counter++ < k && featureSelection.size() > 0) {
|
||||
auto feature = featureSelection[0];
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<XSpode>(feature);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
/*dynamic_cast<XSpode*>(model.get())->fitx(X_train, y_train, weights_,
|
||||
* smoothing); // using exclusive XSpode fit method*/
|
||||
// DEBUG
|
||||
/*std::cout << dynamic_cast<XSpode*>(model.get())->to_string() <<
|
||||
* std::endl;*/
|
||||
// DEBUG
|
||||
std::vector<int> ypred;
|
||||
if (alpha_block) {
|
||||
//
|
||||
// Compute the prediction with the current ensemble + model
|
||||
//
|
||||
// Add the model to the ensemble
|
||||
add_model(std::move(model), 1.0);
|
||||
// Compute the prediction
|
||||
ypred = predict(X_train_);
|
||||
model = std::move(models.back());
|
||||
// Remove the model from the ensemble
|
||||
remove_last_model();
|
||||
} else {
|
||||
ypred = model->predict(X_train_);
|
||||
}
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
auto ypred_t = torch::tensor(ypred);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
numItemsPack++;
|
||||
featuresUsed.push_back(feature);
|
||||
add_model(std::move(model), alpha_t);
|
||||
// VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d
|
||||
// featuresUsed: %zu", finished, numItemsPack, n_models,
|
||||
// featuresUsed.size());
|
||||
} // End of the pack
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
} else {
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (improvement < convergence_threshold) {
|
||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d
|
||||
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
|
||||
// numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
} else {
|
||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d
|
||||
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
|
||||
// numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
if (convergence_best) {
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
} else {
|
||||
// Keep the last accuray obtained as the prior accuracy
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size:
|
||||
// %zu", tolerance, featuresUsed.size(), features.size());
|
||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||
}
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated
|
||||
// of %d", numItemsPack, n_models);
|
||||
for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) {
|
||||
remove_last_model();
|
||||
}
|
||||
// VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features
|
||||
// used.", n_models, featuresUsed.size());
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated
|
||||
// n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (featuresUsed.size() != features.size()) {
|
||||
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " +
|
||||
std::to_string(features.size()));
|
||||
status = bayesnet::WARNING;
|
||||
}
|
||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||
return;
|
||||
}
|
||||
} // namespace bayesnet
|
27
bayesnet/ensembles/XBAODE.h
Normal file
27
bayesnet/ensembles/XBAODE.h
Normal file
@@ -0,0 +1,27 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef XBAODE_H
|
||||
#define XBAODE_H
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include "Boost.h"
|
||||
|
||||
namespace bayesnet {
|
||||
class XBAODE : public Boost {
|
||||
public:
|
||||
XBAODE();
|
||||
std::string getVersion() override { return version; };
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
std::vector<std::vector<int>> X_train_, X_test_;
|
||||
std::vector<int> y_train_, y_test_;
|
||||
std::string version = "0.9.7";
|
||||
};
|
||||
}
|
||||
#endif // XBAODE_H
|
@@ -10,14 +10,10 @@
|
||||
#include <vector>
|
||||
#include "bayesnet/config.h"
|
||||
#include "Node.h"
|
||||
#include "Smoothing.h"
|
||||
|
||||
namespace bayesnet {
|
||||
enum class Smoothing_t {
|
||||
NONE = -1,
|
||||
ORIGINAL = 0,
|
||||
LAPLACE,
|
||||
CESTNIK
|
||||
};
|
||||
|
||||
class Network {
|
||||
public:
|
||||
Network();
|
||||
|
@@ -93,36 +93,42 @@ namespace bayesnet {
|
||||
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
|
||||
{
|
||||
dimensions.clear();
|
||||
dimensions.reserve(parents.size() + 1);
|
||||
// Get dimensions of the CPT
|
||||
dimensions.push_back(numStates);
|
||||
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
|
||||
// Create a tensor of zeros with the dimensions of the CPT
|
||||
cpTable = torch::zeros(dimensions, torch::kDouble) + smoothing;
|
||||
// Fill table with counts
|
||||
auto pos = find(features.begin(), features.end(), name);
|
||||
if (pos == features.end()) {
|
||||
throw std::logic_error("Feature " + name + " not found in dataset");
|
||||
for (const auto& parent : parents) {
|
||||
dimensions.push_back(parent->getNumStates());
|
||||
}
|
||||
//transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
|
||||
// Create a tensor initialized with smoothing
|
||||
cpTable = torch::full(dimensions, smoothing, torch::kDouble);
|
||||
// Create a map for quick feature index lookup
|
||||
std::unordered_map<std::string, int> featureIndexMap;
|
||||
for (size_t i = 0; i < features.size(); ++i) {
|
||||
featureIndexMap[features[i]] = i;
|
||||
}
|
||||
// Fill table with counts
|
||||
// Get the index of this node's feature
|
||||
int name_index = featureIndexMap[name];
|
||||
// Get parent indices in dataset
|
||||
std::vector<int> parent_indices;
|
||||
parent_indices.reserve(parents.size());
|
||||
for (const auto& parent : parents) {
|
||||
parent_indices.push_back(featureIndexMap[parent->getName()]);
|
||||
}
|
||||
int name_index = pos - features.begin();
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
|
||||
coordinates.clear();
|
||||
auto sample = dataset.index({ "...", n_sample });
|
||||
coordinates.push_back(sample[name_index]);
|
||||
for (auto parent : parents) {
|
||||
pos = find(features.begin(), features.end(), parent->getName());
|
||||
if (pos == features.end()) {
|
||||
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
|
||||
}
|
||||
int parent_index = pos - features.begin();
|
||||
coordinates.push_back(sample[parent_index]);
|
||||
for (size_t i = 0; i < parent_indices.size(); ++i) {
|
||||
coordinates.push_back(sample[parent_indices[i]]);
|
||||
}
|
||||
// Increment the count of the corresponding coordinate
|
||||
cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
|
||||
}
|
||||
// Normalize the counts
|
||||
// Divide each row by the sum of the row
|
||||
cpTable = cpTable / cpTable.sum(0);
|
||||
// Normalize the counts (dividing each row by the sum of the row)
|
||||
cpTable /= cpTable.sum(0, true);
|
||||
}
|
||||
double Node::getFactorValue(std::map<std::string, int>& evidence)
|
||||
{
|
||||
|
17
bayesnet/network/Smoothing.h
Normal file
17
bayesnet/network/Smoothing.h
Normal file
@@ -0,0 +1,17 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef SMOOTHING_H
|
||||
#define SMOOTHING_H
|
||||
namespace bayesnet {
|
||||
enum class Smoothing_t {
|
||||
NONE = -1,
|
||||
ORIGINAL = 0,
|
||||
LAPLACE,
|
||||
CESTNIK
|
||||
};
|
||||
}
|
||||
#endif // SMOOTHING_H
|
@@ -32,6 +32,14 @@ public:
|
||||
cv_.notify_one();
|
||||
}
|
||||
}
|
||||
uint getCount() const
|
||||
{
|
||||
return count_;
|
||||
}
|
||||
uint getMaxCount() const
|
||||
{
|
||||
return max_count_;
|
||||
}
|
||||
private:
|
||||
CountingSemaphore()
|
||||
: max_count_(std::max(1u, static_cast<uint>(0.95 * std::thread::hardware_concurrency()))),
|
||||
|
51
bayesnet/utils/TensorUtils.h
Normal file
51
bayesnet/utils/TensorUtils.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef TENSORUTILS_H
|
||||
#define TENSORUTILS_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
namespace bayesnet {
|
||||
class TensorUtils {
|
||||
public:
|
||||
static std::vector<std::vector<int>> to_matrix(const torch::Tensor& X)
|
||||
{
|
||||
// Ensure tensor is contiguous in memory
|
||||
auto X_contig = X.contiguous();
|
||||
|
||||
// Access tensor data pointer directly
|
||||
auto data_ptr = X_contig.data_ptr<int>();
|
||||
|
||||
// IF you are using int64_t as the data type, use the following line
|
||||
//auto data_ptr = X_contig.data_ptr<int64_t>();
|
||||
//std::vector<std::vector<int64_t>> data(X.size(0), std::vector<int64_t>(X.size(1)));
|
||||
|
||||
// Prepare output container
|
||||
std::vector<std::vector<int>> data(X.size(0), std::vector<int>(X.size(1)));
|
||||
|
||||
// Fill the 2D vector in a single loop using pointer arithmetic
|
||||
int rows = X.size(0);
|
||||
int cols = X.size(1);
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
std::copy(data_ptr + i * cols, data_ptr + (i + 1) * cols, data[i].begin());
|
||||
}
|
||||
return data;
|
||||
}
|
||||
template <typename T>
|
||||
static std::vector<T> to_vector(const torch::Tensor& y)
|
||||
{
|
||||
// Ensure the tensor is contiguous in memory
|
||||
auto y_contig = y.contiguous();
|
||||
|
||||
// Access data pointer
|
||||
auto data_ptr = y_contig.data_ptr<T>();
|
||||
|
||||
// Prepare output container
|
||||
std::vector<T> data(y.size(0));
|
||||
|
||||
// Copy data efficiently
|
||||
std::copy(data_ptr, data_ptr + y.size(0), data.begin());
|
||||
|
||||
return data;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // TENSORUTILS_H
|
@@ -96,6 +96,8 @@ abstract C_0002617087915615796317 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
{abstract} +topological_order() = 0 : std::vector<std::string>
|
||||
{abstract} #trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) = 0 : void
|
||||
__
|
||||
#notes : std::vector<std::string>
|
||||
#status : status_t
|
||||
#validHyperparameters : std::vector<std::string>
|
||||
}
|
||||
class "bayesnet::Metrics" as C_0005895723015084986588
|
||||
@@ -153,6 +155,7 @@ abstract C_0016351972983202413152 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+topological_order() : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
#CLASSIFIER_NOT_FITTED : const std::string
|
||||
#className : std::string
|
||||
#dataset : torch::Tensor
|
||||
#features : std::vector<std::string>
|
||||
@@ -161,20 +164,44 @@ __
|
||||
#metrics : Metrics
|
||||
#model : Network
|
||||
#n : unsigned int
|
||||
#notes : std::vector<std::string>
|
||||
#states : std::map<std::string,std::vector<int>>
|
||||
#status : status_t
|
||||
}
|
||||
class "bayesnet::Proposal" as C_0017759964713298103839
|
||||
class C_0017759964713298103839 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Proposal(torch::Tensor & pDataset, std::vector<std::string> & features_, std::string & className_) : void
|
||||
+~Proposal() : void
|
||||
..
|
||||
#checkInput(const torch::Tensor & X, const torch::Tensor & y) : void
|
||||
#fit_local_discretization(const torch::Tensor & y) : std::map<std::string,std::vector<int>>
|
||||
#localDiscretizationProposal(const std::map<std::string,std::vector<int>> & states, Network & model) : std::map<std::string,std::vector<int>>
|
||||
#prepareX(torch::Tensor & X) : torch::Tensor
|
||||
__
|
||||
#Xf : torch::Tensor
|
||||
#discretizers : map<std::string,mdlp::CPPFImdlp *>
|
||||
#y : torch::Tensor
|
||||
}
|
||||
class "bayesnet::KDB" as C_0008902920152122000044
|
||||
class C_0008902920152122000044 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDB(int k, float theta = 0.03) : void
|
||||
+~KDB() = default : void
|
||||
..
|
||||
#add_m_edges(int idx, std::vector<int> & S, torch::Tensor & weights) : void
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::KDBLd" as C_0002756018222998454702
|
||||
class C_0002756018222998454702 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDBLd(int k) : void
|
||||
+~KDBLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : KDBLd &
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODE" as C_0004096182510460307610
|
||||
class C_0004096182510460307610 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODE(int root) : void
|
||||
@@ -182,6 +209,20 @@ class C_0004096182510460307610 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODELd" as C_0010957245114062042836
|
||||
class C_0010957245114062042836 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODELd(int root) : void
|
||||
+~SPODELd() = default : void
|
||||
..
|
||||
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+graph(const std::string & name = "SPODELd") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPnDE" as C_0016268916386101512883
|
||||
@@ -200,44 +241,7 @@ class C_0014087955399074584137 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Proposal" as C_0017759964713298103839
|
||||
class C_0017759964713298103839 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Proposal(torch::Tensor & pDataset, std::vector<std::string> & features_, std::string & className_) : void
|
||||
+~Proposal() : void
|
||||
..
|
||||
#checkInput(const torch::Tensor & X, const torch::Tensor & y) : void
|
||||
#fit_local_discretization(const torch::Tensor & y) : std::map<std::string,std::vector<int>>
|
||||
#localDiscretizationProposal(const std::map<std::string,std::vector<int>> & states, Network & model) : std::map<std::string,std::vector<int>>
|
||||
#prepareX(torch::Tensor & X) : torch::Tensor
|
||||
__
|
||||
#Xf : torch::Tensor
|
||||
#discretizers : map<std::string,mdlp::CPPFImdlp *>
|
||||
#y : torch::Tensor
|
||||
}
|
||||
class "bayesnet::KDBLd" as C_0002756018222998454702
|
||||
class C_0002756018222998454702 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDBLd(int k) : void
|
||||
+~KDBLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : KDBLd &
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODELd" as C_0010957245114062042836
|
||||
class C_0010957245114062042836 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODELd(int root) : void
|
||||
+~SPODELd() = default : void
|
||||
..
|
||||
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+graph(const std::string & name = "SPODELd") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::TANLd" as C_0013350632773616302678
|
||||
@@ -250,6 +254,64 @@ class C_0013350632773616302678 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
__
|
||||
}
|
||||
class "bayesnet::XSp2de" as C_0007640742442325463418
|
||||
class C_0007640742442325463418 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+XSp2de(int spIndex1, int spIndex2) : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fitx(torch::Tensor & X, torch::Tensor & y, torch::Tensor & weights_, const Smoothing_t smoothing) : void
|
||||
+getClassNumStates() const : int
|
||||
+getNFeatures() const : int
|
||||
+getNumberOfEdges() const : int
|
||||
+getNumberOfNodes() const : int
|
||||
+getNumberOfStates() const : int
|
||||
+graph(const std::string & title) const : std::vector<std::string>
|
||||
+predict(const std::vector<int> & instance) const : int
|
||||
+predict(std::vector<std::vector<int>> & test_data) : std::vector<int>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
+predict_proba(const std::vector<int> & instance) const : std::vector<double>
|
||||
+predict_proba(std::vector<std::vector<int>> & test_data) : std::vector<std::vector<double>>
|
||||
+predict_proba(torch::Tensor & X) : torch::Tensor
|
||||
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
|
||||
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
+to_string() const : std::string
|
||||
#trainModel(const torch::Tensor & weights, const bayesnet::Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::XSpode" as C_0015654113248178830206
|
||||
class C_0015654113248178830206 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+XSpode(int spIndex) : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fitx(torch::Tensor & X, torch::Tensor & y, torch::Tensor & weights_, const Smoothing_t smoothing) : void
|
||||
+getClassNumStates() const : int
|
||||
+getNFeatures() const : int
|
||||
+getNumberOfEdges() const : int
|
||||
+getNumberOfNodes() const : int
|
||||
+getNumberOfStates() const : int
|
||||
+getStates() : std::vector<int> &
|
||||
+graph(const std::string & title) const : std::vector<std::string>
|
||||
+normalize(std::vector<double> & v) const : void
|
||||
+predict(const std::vector<int> & instance) const : int
|
||||
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||
+predict_proba(torch::Tensor & X) : torch::Tensor
|
||||
+predict_proba(const std::vector<int> & instance) const : std::vector<double>
|
||||
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
+to_string() const : std::string
|
||||
#trainModel(const torch::Tensor & weights, const bayesnet::Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::TensorUtils" as C_0010304804115474100819
|
||||
class C_0010304804115474100819 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
{static} +to_matrix(const torch::Tensor & X) : std::vector<std::vector<int>>
|
||||
{static} +to_vector<T>(const torch::Tensor & y) : std::vector<T>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Ensemble" as C_0015881931090842884611
|
||||
class C_0015881931090842884611 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Ensemble(bool predict_voting = true) : void
|
||||
@@ -302,6 +364,17 @@ class C_0006288892608974306258 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODELd" as C_0003898187834670349177
|
||||
class C_0003898187834670349177 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODELd(bool predict_voting = true) : void
|
||||
+~AODELd() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_, const Smoothing_t smoothing) : AODELd &
|
||||
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
abstract "bayesnet::FeatureSelect" as C_0013562609546004646591
|
||||
abstract C_0013562609546004646591 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FeatureSelect(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
@@ -324,15 +397,15 @@ __
|
||||
#suLabels : std::vector<double>
|
||||
#weights : const torch::Tensor &
|
||||
}
|
||||
class "bayesnet::(anonymous_60342586)" as C_0005584545181746538542
|
||||
class C_0005584545181746538542 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60357672)" as C_0006397015156479549697
|
||||
class C_0006397015156479549697 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60343240)" as C_0016227156982041949444
|
||||
class C_0016227156982041949444 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60358326)" as C_0013066254331852347304
|
||||
class C_0013066254331852347304 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
@@ -343,14 +416,17 @@ class C_0009819322948617116148 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Boost(bool predict_voting = false) : void
|
||||
+~Boost() = default : void
|
||||
..
|
||||
#add_model(std::unique_ptr<Classifier> model, double significance) : void
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
#featureSelection(torch::Tensor & weights_) : std::vector<int>
|
||||
#remove_last_model() : void
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
#update_weights(torch::Tensor & ytrain, torch::Tensor & ypred, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
|
||||
#update_weights_block(int k, torch::Tensor & ytrain, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
|
||||
__
|
||||
#X_test : torch::Tensor
|
||||
#X_train : torch::Tensor
|
||||
#alpha_block : bool
|
||||
#bisection : bool
|
||||
#block_update : bool
|
||||
#convergence : bool
|
||||
@@ -364,31 +440,6 @@ __
|
||||
#y_test : torch::Tensor
|
||||
#y_train : torch::Tensor
|
||||
}
|
||||
class "bayesnet::AODELd" as C_0003898187834670349177
|
||||
class C_0003898187834670349177 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODELd(bool predict_voting = true) : void
|
||||
+~AODELd() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_, const Smoothing_t smoothing) : AODELd &
|
||||
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60275628)" as C_0009086919615463763584
|
||||
class C_0009086919615463763584 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60276282)" as C_0015251985607563196159
|
||||
class C_0015251985607563196159 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostA2DE" as C_0000272055465257861326
|
||||
class C_0000272055465257861326 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+BoostA2DE(bool predict_voting = false) : void
|
||||
@@ -398,15 +449,15 @@ class C_0000272055465257861326 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60275502)" as C_0016033655851510053155
|
||||
class C_0016033655851510053155 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60425028)" as C_0000461144706913711531
|
||||
class C_0000461144706913711531 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60276156)" as C_0000379522761622473555
|
||||
class C_0000379522761622473555 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60425682)" as C_0014849589915262463453
|
||||
class C_0014849589915262463453 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
@@ -421,6 +472,38 @@ class C_0002867772739198819061 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::XBA2DE" as C_0008480973840710001141
|
||||
class C_0008480973840710001141 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+XBA2DE(bool predict_voting = false) : void
|
||||
+~XBA2DE() = default : void
|
||||
..
|
||||
+getVersion() : std::string
|
||||
+graph(const std::string & title = "XBA2DE") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60414016)" as C_0008746994658440620779
|
||||
class C_0008746994658440620779 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60414670)" as C_0008030559132212449356
|
||||
class C_0008030559132212449356 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::XBAODE" as C_0005198482342493966768
|
||||
class C_0005198482342493966768 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+XBAODE() : void
|
||||
..
|
||||
+getVersion() : std::string
|
||||
#trainModel(const torch::Tensor & weights, const bayesnet::Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::CFS" as C_0000093018845530739957
|
||||
class C_0000093018845530739957 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+CFS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
@@ -445,43 +528,43 @@ class C_0000066148117395428429 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60730495)" as C_0004857727320042830573
|
||||
class C_0004857727320042830573 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60810808)" as C_0012002108046995621535
|
||||
class C_0012002108046995621535 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60731150)" as C_0000076541533312623385
|
||||
class C_0000076541533312623385 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60811462)" as C_0004735044229422764240
|
||||
class C_0004735044229422764240 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60653004)" as C_0001444063444142949758
|
||||
class C_0001444063444142949758 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60804220)" as C_0007082100550474633839
|
||||
class C_0007082100550474633839 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60653658)" as C_0007139277546931322856
|
||||
class C_0007139277546931322856 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60804874)" as C_0003669430095936529648
|
||||
class C_0003669430095936529648 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60731375)" as C_0010493853592456211189
|
||||
class C_0010493853592456211189 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60809706)" as C_0012336951062058157227
|
||||
class C_0012336951062058157227 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60732030)" as C_0007011438637915849564
|
||||
class C_0007011438637915849564 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60810360)" as C_0002435892998884329673
|
||||
class C_0002435892998884329673 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
@@ -513,37 +596,43 @@ C_0010428199432536647474 --> C_0010428199432536647474 : -children
|
||||
C_0009493661199123436603 ..> C_0013393078277439680282
|
||||
C_0009493661199123436603 o-- C_0010428199432536647474 : -nodes
|
||||
C_0002617087915615796317 ..> C_0013393078277439680282
|
||||
C_0002617087915615796317 ..> C_0005907365846270811004
|
||||
C_0002617087915615796317 o-- C_0005907365846270811004 : #status
|
||||
C_0016351972983202413152 ..> C_0013393078277439680282
|
||||
C_0016351972983202413152 ..> C_0005907365846270811004
|
||||
C_0016351972983202413152 o-- C_0009493661199123436603 : #model
|
||||
C_0016351972983202413152 o-- C_0005895723015084986588 : #metrics
|
||||
C_0016351972983202413152 o-- C_0005907365846270811004 : #status
|
||||
C_0002617087915615796317 <|-- C_0016351972983202413152
|
||||
|
||||
C_0017759964713298103839 ..> C_0009493661199123436603
|
||||
C_0016351972983202413152 <|-- C_0008902920152122000044
|
||||
|
||||
C_0016351972983202413152 <|-- C_0004096182510460307610
|
||||
|
||||
C_0016351972983202413152 <|-- C_0016268916386101512883
|
||||
|
||||
C_0016351972983202413152 <|-- C_0014087955399074584137
|
||||
|
||||
C_0017759964713298103839 ..> C_0009493661199123436603
|
||||
C_0002756018222998454702 ..> C_0013393078277439680282
|
||||
C_0008902920152122000044 <|-- C_0002756018222998454702
|
||||
|
||||
C_0017759964713298103839 <|-- C_0002756018222998454702
|
||||
|
||||
C_0016351972983202413152 <|-- C_0004096182510460307610
|
||||
|
||||
C_0010957245114062042836 ..> C_0013393078277439680282
|
||||
C_0004096182510460307610 <|-- C_0010957245114062042836
|
||||
|
||||
C_0017759964713298103839 <|-- C_0010957245114062042836
|
||||
|
||||
C_0016351972983202413152 <|-- C_0016268916386101512883
|
||||
|
||||
C_0016351972983202413152 <|-- C_0014087955399074584137
|
||||
|
||||
C_0013350632773616302678 ..> C_0013393078277439680282
|
||||
C_0014087955399074584137 <|-- C_0013350632773616302678
|
||||
|
||||
C_0017759964713298103839 <|-- C_0013350632773616302678
|
||||
|
||||
C_0007640742442325463418 ..> C_0013393078277439680282
|
||||
C_0016351972983202413152 <|-- C_0007640742442325463418
|
||||
|
||||
C_0015654113248178830206 ..> C_0013393078277439680282
|
||||
C_0016351972983202413152 <|-- C_0015654113248178830206
|
||||
|
||||
C_0015881931090842884611 ..> C_0013393078277439680282
|
||||
C_0015881931090842884611 o-- C_0016351972983202413152 : #models
|
||||
C_0016351972983202413152 <|-- C_0015881931090842884611
|
||||
@@ -552,22 +641,29 @@ C_0015881931090842884611 <|-- C_0001410789567057647859
|
||||
|
||||
C_0015881931090842884611 <|-- C_0006288892608974306258
|
||||
|
||||
C_0005895723015084986588 <|-- C_0013562609546004646591
|
||||
|
||||
C_0009819322948617116148 --> C_0013562609546004646591 : #featureSelector
|
||||
C_0015881931090842884611 <|-- C_0009819322948617116148
|
||||
|
||||
C_0003898187834670349177 ..> C_0013393078277439680282
|
||||
C_0015881931090842884611 <|-- C_0003898187834670349177
|
||||
|
||||
C_0017759964713298103839 <|-- C_0003898187834670349177
|
||||
|
||||
C_0005895723015084986588 <|-- C_0013562609546004646591
|
||||
|
||||
C_0009819322948617116148 ..> C_0016351972983202413152
|
||||
C_0009819322948617116148 --> C_0013562609546004646591 : #featureSelector
|
||||
C_0015881931090842884611 <|-- C_0009819322948617116148
|
||||
|
||||
C_0000272055465257861326 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0000272055465257861326
|
||||
|
||||
C_0002867772739198819061 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0002867772739198819061
|
||||
|
||||
C_0008480973840710001141 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0008480973840710001141
|
||||
|
||||
C_0005198482342493966768 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0005198482342493966768
|
||||
|
||||
C_0013562609546004646591 <|-- C_0000093018845530739957
|
||||
|
||||
C_0013562609546004646591 <|-- C_0001157456122733975432
|
||||
|
File diff suppressed because one or more lines are too long
Before Width: | Height: | Size: 196 KiB After Width: | Height: | Size: 229 KiB |
1
lib/catch2
Submodule
1
lib/catch2
Submodule
Submodule lib/catch2 added at 029fe3b460
@@ -18,8 +18,10 @@ include_directories(
|
||||
../tests/lib/Files
|
||||
lib/json/include
|
||||
/usr/local/include
|
||||
${FImdlp_INCLUDE_DIRS}
|
||||
/usr/local/include/fimdlp/
|
||||
)
|
||||
|
||||
add_executable(bayesnet_sample sample.cc)
|
||||
target_link_libraries(bayesnet_sample fimdlp "${TORCH_LIBRARIES}" "${BayesNet}")
|
||||
target_link_libraries(bayesnet_sample ${FImdlp} "${TORCH_LIBRARIES}" "${BayesNet}")
|
||||
add_executable(bayesnet_sample_xspode sample_xspode.cc)
|
||||
target_link_libraries(bayesnet_sample_xspode ${FImdlp} "${TORCH_LIBRARIES}" "${BayesNet}")
|
@@ -6,7 +6,7 @@
|
||||
|
||||
#include <ArffFiles.hpp>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <bayesnet/ensembles/BoostAODE.h>
|
||||
#include <bayesnet/ensembles/XBAODE.h>
|
||||
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
{
|
||||
@@ -57,10 +57,23 @@ int main(int argc, char* argv[])
|
||||
std::vector<std::string> features;
|
||||
std::string className;
|
||||
map<std::string, std::vector<int>> states;
|
||||
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
|
||||
auto clf = bayesnet::XBAODE(); // false for not using voting in predict
|
||||
std::cout << "Library version: " << clf.getVersion() << std::endl;
|
||||
tie(X, y, features, className, states) = loadDataset(file_name, true);
|
||||
clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE);
|
||||
torch::Tensor weights = torch::full({ X.size(1) }, 15, torch::kDouble);
|
||||
torch::Tensor dataset;
|
||||
try {
|
||||
auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
|
||||
dataset = torch::cat({ X, yresized }, 0);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::stringstream oss;
|
||||
oss << "* Error in X and y dimensions *\n";
|
||||
oss << "X dimensions: " << dataset.sizes() << "\n";
|
||||
oss << "y dimensions: " << y.sizes();
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
clf.fit(dataset, features, className, states, weights, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto score = clf.score(X, y);
|
||||
std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
|
||||
return 0;
|
||||
|
65
sample/sample_xspode.cc
Normal file
65
sample/sample_xspode.cc
Normal file
@@ -0,0 +1,65 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.hpp>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <bayesnet/ensembles/BoostAODE.h>
|
||||
#include <bayesnet/classifiers/XSPODE.h>
|
||||
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
{
|
||||
std::vector<mdlp::labels_t> Xd;
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
for (int i = 0; i < X.size(); i++) {
|
||||
fimdlp.fit(X[i], y);
|
||||
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
||||
Xd.push_back(xd);
|
||||
}
|
||||
return Xd;
|
||||
}
|
||||
tuple<std::vector<std::vector<int>>, std::vector<int>, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last)
|
||||
{
|
||||
auto handler = ArffFiles();
|
||||
handler.load(name, class_last);
|
||||
// Get Dataset X, y
|
||||
std::vector<mdlp::samples_t>& X = handler.getX();
|
||||
mdlp::labels_t y = handler.getY();
|
||||
// Get className & Features
|
||||
auto className = handler.getClassName();
|
||||
std::vector<std::string> features;
|
||||
auto attributes = handler.getAttributes();
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
|
||||
torch::Tensor Xd;
|
||||
auto states = map<std::string, std::vector<int>>();
|
||||
auto Xr = discretizeDataset(X, y);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
||||
auto item = states.at(features[i]);
|
||||
iota(begin(item), end(item), 0);
|
||||
}
|
||||
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
|
||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
return { Xr, y, features, className, states };
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
std::cerr << "Usage: " << argv[0] << " <file_name>" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::string file_name = argv[1];
|
||||
bayesnet::BaseClassifier* clf = new bayesnet::XSpode(0);
|
||||
std::cout << "Library version: " << clf->getVersion() << std::endl;
|
||||
auto [X, y, features, className, states] = loadDataset(file_name, true);
|
||||
torch::Tensor weights = torch::full({ static_cast<long>(X[0].size()) }, 1.0 / X[0].size(), torch::kDouble);
|
||||
clf->fit(X, y, features, className, states, bayesnet::Smoothing_t::ORIGINAL);
|
||||
auto score = clf->score(X, y);
|
||||
std::cout << "File: " << file_name << " Model: XSpode(0) score: " << score << std::endl;
|
||||
delete clf;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -3,19 +3,24 @@ if(ENABLE_TESTING)
|
||||
${BayesNet_SOURCE_DIR}/tests/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||
${BayesNet_SOURCE_DIR}/lib/log
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${BayesNet_SOURCE_DIR}
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc
|
||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestA2DE.cc
|
||||
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc ${BayesNet_SOURCES})
|
||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestXSPnDE.cc TestXBA2DE.cc
|
||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestXBAODE.cc TestA2DE.cc
|
||||
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc TestXSPODE.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp PRIVATE Catch2::Catch2WithMain)
|
||||
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
||||
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
|
||||
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")
|
||||
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
|
||||
add_test(NAME XSPODE COMMAND TestBayesNet "[XSPODE]")
|
||||
add_test(NAME XSPnDE COMMAND TestBayesNet "[XSPnDE]")
|
||||
add_test(NAME XBAODE COMMAND TestBayesNet "[XBAODE]")
|
||||
add_test(NAME XBA2DE COMMAND TestBayesNet "[XBA2DE]")
|
||||
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
|
||||
add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
|
||||
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
|
||||
|
@@ -28,7 +28,7 @@ TEST_CASE("Dump CPT", "[Ensemble]")
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto dump = clf.dump_cpt();
|
||||
REQUIRE(dump == "");
|
||||
REQUIRE(dump.size() == 39916);
|
||||
}
|
||||
TEST_CASE("Number of States", "[Ensemble]")
|
||||
{
|
||||
|
@@ -4,48 +4,81 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/classifiers/KDB.h"
|
||||
#include "bayesnet/classifiers/TAN.h"
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
#include "bayesnet/classifiers/TANLd.h"
|
||||
#include "bayesnet/classifiers/KDBLd.h"
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
#include "bayesnet/classifiers/SPODELd.h"
|
||||
#include "bayesnet/classifiers/TAN.h"
|
||||
#include "bayesnet/classifiers/TANLd.h"
|
||||
#include "bayesnet/classifiers/XSPODE.h"
|
||||
#include "bayesnet/ensembles/AODE.h"
|
||||
#include "bayesnet/ensembles/AODELd.h"
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
const std::string ACTUAL_VERSION = "1.0.6";
|
||||
const std::string ACTUAL_VERSION = "1.0.7";
|
||||
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
{
|
||||
map <pair<std::string, std::string>, float> scores{
|
||||
// Diabetes
|
||||
{{"diabetes", "AODE"}, 0.82161}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
|
||||
{{"diabetes", "AODELd"}, 0.8125f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.7890625f}, {{"diabetes", "TANLd"}, 0.803385437f}, {{"diabetes", "BoostAODE"}, 0.83984f},
|
||||
map<pair<std::string, std::string>, float> scores{// Diabetes
|
||||
{{"diabetes", "AODE"}, 0.82161},
|
||||
{{"diabetes", "KDB"}, 0.852865},
|
||||
{{"diabetes", "XSPODE"}, 0.631510437f},
|
||||
{{"diabetes", "SPODE"}, 0.802083},
|
||||
{{"diabetes", "TAN"}, 0.821615},
|
||||
{{"diabetes", "AODELd"}, 0.8125f},
|
||||
{{"diabetes", "KDBLd"}, 0.80208f},
|
||||
{{"diabetes", "SPODELd"}, 0.7890625f},
|
||||
{{"diabetes", "TANLd"}, 0.803385437f},
|
||||
{{"diabetes", "BoostAODE"}, 0.83984f},
|
||||
// Ecoli
|
||||
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
|
||||
{{"ecoli", "AODELd"}, 0.875f}, {{"ecoli", "KDBLd"}, 0.880952358f}, {{"ecoli", "SPODELd"}, 0.839285731f}, {{"ecoli", "TANLd"}, 0.848214269f}, {{"ecoli", "BoostAODE"}, 0.89583f},
|
||||
{{"ecoli", "AODE"}, 0.889881},
|
||||
{{"ecoli", "KDB"}, 0.889881},
|
||||
{{"ecoli", "XSPODE"}, 0.696428597f},
|
||||
{{"ecoli", "SPODE"}, 0.880952},
|
||||
{{"ecoli", "TAN"}, 0.892857},
|
||||
{{"ecoli", "AODELd"}, 0.875f},
|
||||
{{"ecoli", "KDBLd"}, 0.880952358f},
|
||||
{{"ecoli", "SPODELd"}, 0.839285731f},
|
||||
{{"ecoli", "TANLd"}, 0.848214269f},
|
||||
{{"ecoli", "BoostAODE"}, 0.89583f},
|
||||
// Glass
|
||||
{{"glass", "AODE"}, 0.79439}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
|
||||
{{"glass", "AODELd"}, 0.799065411f}, {{"glass", "KDBLd"}, 0.82710278f}, {{"glass", "SPODELd"}, 0.780373812f}, {{"glass", "TANLd"}, 0.869158864f}, {{"glass", "BoostAODE"}, 0.84579f},
|
||||
{{"glass", "AODE"}, 0.79439},
|
||||
{{"glass", "KDB"}, 0.827103},
|
||||
{{"glass", "XSPODE"}, 0.775701},
|
||||
{{"glass", "SPODE"}, 0.775701},
|
||||
{{"glass", "TAN"}, 0.827103},
|
||||
{{"glass", "AODELd"}, 0.799065411f},
|
||||
{{"glass", "KDBLd"}, 0.82710278f},
|
||||
{{"glass", "SPODELd"}, 0.780373812f},
|
||||
{{"glass", "TANLd"}, 0.869158864f},
|
||||
{{"glass", "BoostAODE"}, 0.84579f},
|
||||
// Iris
|
||||
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
|
||||
{{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f}
|
||||
};
|
||||
std::map<std::string, bayesnet::BaseClassifier*> models{
|
||||
{"AODE", new bayesnet::AODE()}, {"AODELd", new bayesnet::AODELd()},
|
||||
{{"iris", "AODE"}, 0.973333},
|
||||
{{"iris", "KDB"}, 0.973333},
|
||||
{{"iris", "XSPODE"}, 0.853333354f},
|
||||
{{"iris", "SPODE"}, 0.973333},
|
||||
{{"iris", "TAN"}, 0.973333},
|
||||
{{"iris", "AODELd"}, 0.973333},
|
||||
{{"iris", "KDBLd"}, 0.973333},
|
||||
{{"iris", "SPODELd"}, 0.96f},
|
||||
{{"iris", "TANLd"}, 0.97333f},
|
||||
{{"iris", "BoostAODE"}, 0.98f} };
|
||||
std::map<std::string, bayesnet::BaseClassifier*> models{ {"AODE", new bayesnet::AODE()},
|
||||
{"AODELd", new bayesnet::AODELd()},
|
||||
{"BoostAODE", new bayesnet::BoostAODE()},
|
||||
{"KDB", new bayesnet::KDB(2)}, {"KDBLd", new bayesnet::KDBLd(2)},
|
||||
{"SPODE", new bayesnet::SPODE(1)}, {"SPODELd", new bayesnet::SPODELd(1)},
|
||||
{"TAN", new bayesnet::TAN()}, {"TANLd", new bayesnet::TANLd()}
|
||||
};
|
||||
std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "SPODELd", "TAN", "TANLd");
|
||||
{"KDB", new bayesnet::KDB(2)},
|
||||
{"KDBLd", new bayesnet::KDBLd(2)},
|
||||
{"XSPODE", new bayesnet::XSpode(1)},
|
||||
{"SPODE", new bayesnet::SPODE(1)},
|
||||
{"SPODELd", new bayesnet::SPODELd(1)},
|
||||
{"TAN", new bayesnet::TAN()},
|
||||
{"TANLd", new bayesnet::TANLd()} };
|
||||
std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "XSPODE", "SPODELd", "TAN", "TANLd");
|
||||
auto clf = models[name];
|
||||
|
||||
SECTION("Test " + name + " classifier")
|
||||
@@ -56,6 +89,8 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
auto raw = RawDatasets(file_name, discretize);
|
||||
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf->score(raw.Xt, raw.yt);
|
||||
// std::cout << "Classifier: " << name << " File: " << file_name << " Score: " << score << " expected = " <<
|
||||
// scores[{file_name, name}] << std::endl;
|
||||
INFO("Classifier: " << name << " File: " << file_name);
|
||||
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
||||
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
|
||||
@@ -70,13 +105,13 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
}
|
||||
TEST_CASE("Models features & Graph", "[Models]")
|
||||
{
|
||||
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
|
||||
auto graph = std::vector<std::string>(
|
||||
{ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
|
||||
"\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
||||
"\"class\" -> \"sepallength\"", "\"class\" -> \"sepalwidth\"", "\"class\" -> \"petallength\"", "\"class\" -> \"petalwidth\"", "\"petallength\" [shape=circle] \n",
|
||||
"\"petallength\" -> \"sepallength\"", "\"petalwidth\" [shape=circle] \n", "\"sepallength\" [shape=circle] \n",
|
||||
"\"sepallength\" -> \"sepalwidth\"", "\"sepalwidth\" [shape=circle] \n", "\"sepalwidth\" -> \"petalwidth\"", "}\n"
|
||||
}
|
||||
);
|
||||
"\"class\" -> \"sepallength\"", "\"class\" -> \"sepalwidth\"", "\"class\" -> \"petallength\"",
|
||||
"\"class\" -> \"petalwidth\"", "\"petallength\" [shape=circle] \n", "\"petallength\" -> \"sepallength\"",
|
||||
"\"petalwidth\" [shape=circle] \n", "\"sepallength\" [shape=circle] \n", "\"sepallength\" -> \"sepalwidth\"",
|
||||
"\"sepalwidth\" [shape=circle] \n", "\"sepalwidth\" -> \"petalwidth\"", "}\n" });
|
||||
SECTION("Test TAN")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
@@ -86,7 +121,9 @@ TEST_CASE("Models features & Graph", "[Models]")
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 19);
|
||||
REQUIRE(clf.getClassNumStates() == 3);
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||
"petallength -> sepallength, ", "petalwidth -> ",
|
||||
"sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
}
|
||||
SECTION("Test TANLd")
|
||||
@@ -98,7 +135,9 @@ TEST_CASE("Models features & Graph", "[Models]")
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 27);
|
||||
REQUIRE(clf.getClassNumStates() == 3);
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||
"petallength -> sepallength, ", "petalwidth -> ",
|
||||
"sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
}
|
||||
}
|
||||
@@ -114,8 +153,7 @@ TEST_CASE("Get num features & num edges", "[Models]")
|
||||
TEST_CASE("Model predict_proba", "[Models]")
|
||||
{
|
||||
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
|
||||
auto res_prob_tan = std::vector<std::vector<double>>({
|
||||
{ 0.00375671, 0.994457, 0.00178621 },
|
||||
auto res_prob_tan = std::vector<std::vector<double>>({ {0.00375671, 0.994457, 0.00178621},
|
||||
{0.00137462, 0.992734, 0.00589123},
|
||||
{0.00137462, 0.992734, 0.00589123},
|
||||
{0.00137462, 0.992734, 0.00589123},
|
||||
@@ -123,10 +161,8 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
{0.00494209, 0.0978534, 0.897205},
|
||||
{0.0054192, 0.974275, 0.0203054},
|
||||
{0.00433012, 0.985054, 0.0106159},
|
||||
{ 0.000860806, 0.996922, 0.00221698 }
|
||||
});
|
||||
auto res_prob_spode = std::vector<std::vector<double>>({
|
||||
{0.00419032, 0.994247, 0.00156265},
|
||||
{0.000860806, 0.996922, 0.00221698} });
|
||||
auto res_prob_spode = std::vector<std::vector<double>>({ {0.00419032, 0.994247, 0.00156265},
|
||||
{0.00172808, 0.993433, 0.00483862},
|
||||
{0.00172808, 0.993433, 0.00483862},
|
||||
{0.00172808, 0.993433, 0.00483862},
|
||||
@@ -134,10 +170,8 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
{0.0120674, 0.357909, 0.630024},
|
||||
{0.00386239, 0.913919, 0.0822185},
|
||||
{0.0244389, 0.966447, 0.00911374},
|
||||
{0.003135, 0.991799, 0.0050661}
|
||||
});
|
||||
auto res_prob_baode = std::vector<std::vector<double>>({
|
||||
{0.0112349, 0.962274, 0.0264907},
|
||||
{0.003135, 0.991799, 0.0050661} });
|
||||
auto res_prob_baode = std::vector<std::vector<double>>({ {0.0112349, 0.962274, 0.0264907},
|
||||
{0.00371025, 0.950592, 0.0456973},
|
||||
{0.00371025, 0.950592, 0.0456973},
|
||||
{0.00371025, 0.950592, 0.0456973},
|
||||
@@ -145,21 +179,17 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
{0.0252205, 0.113564, 0.861215},
|
||||
{0.0284828, 0.770524, 0.200993},
|
||||
{0.0213182, 0.857189, 0.121493},
|
||||
{0.00868436, 0.949494, 0.0418215}
|
||||
});
|
||||
auto res_prob_voting = std::vector<std::vector<double>>({
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 0, 1},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0},
|
||||
{0, 1, 0}
|
||||
});
|
||||
std::map<std::string, std::vector<std::vector<double>>> res_prob{ {"TAN", res_prob_tan}, {"SPODE", res_prob_spode} , {"BoostAODEproba", res_prob_baode }, {"BoostAODEvoting", res_prob_voting } };
|
||||
std::map<std::string, bayesnet::BaseClassifier*> models{ {"TAN", new bayesnet::TAN()}, {"SPODE", new bayesnet::SPODE(0)}, {"BoostAODEproba", new bayesnet::BoostAODE(false)}, {"BoostAODEvoting", new bayesnet::BoostAODE(true)} };
|
||||
{0.00868436, 0.949494, 0.0418215} });
|
||||
auto res_prob_voting = std::vector<std::vector<double>>(
|
||||
{ {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 0}, {0, 1, 0} });
|
||||
std::map<std::string, std::vector<std::vector<double>>> res_prob{ {"TAN", res_prob_tan},
|
||||
{"SPODE", res_prob_spode},
|
||||
{"BoostAODEproba", res_prob_baode},
|
||||
{"BoostAODEvoting", res_prob_voting} };
|
||||
std::map<std::string, bayesnet::BaseClassifier*> models{ {"TAN", new bayesnet::TAN()},
|
||||
{"SPODE", new bayesnet::SPODE(0)},
|
||||
{"BoostAODEproba", new bayesnet::BoostAODE(false)},
|
||||
{"BoostAODEvoting", new bayesnet::BoostAODE(true)} };
|
||||
int init_index = 78;
|
||||
auto raw = RawDatasets("iris", true);
|
||||
|
||||
@@ -192,7 +222,8 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
REQUIRE(y_pred[i] == yt_pred[i].item<int>());
|
||||
for (int j = 0; j < 3; j++) {
|
||||
REQUIRE(res_prob[model][i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon));
|
||||
REQUIRE(res_prob[model][i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item<double>()).epsilon(raw.epsilon));
|
||||
REQUIRE(res_prob[model][i][j] ==
|
||||
Catch::Approx(yt_pred_proba[i + init_index][j].item<double>()).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
delete clf;
|
||||
@@ -293,10 +324,58 @@ TEST_CASE("TAN & SPODE with invalid hyperparameters", "[Models]")
|
||||
clf.setHyperparameters({
|
||||
{"parent", 5},
|
||||
});
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
|
||||
std::invalid_argument);
|
||||
auto clf2 = bayesnet::SPODE(0);
|
||||
clf2.setHyperparameters({
|
||||
{"parent", 5},
|
||||
});
|
||||
REQUIRE_THROWS_AS(clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_AS(clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
|
||||
std::invalid_argument);
|
||||
}
|
||||
TEST_CASE("Check proposal checkInput", "[Models]")
|
||||
{
|
||||
class testProposal : public bayesnet::Proposal {
|
||||
public:
|
||||
testProposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_)
|
||||
: Proposal(dataset_, features_, className_)
|
||||
{
|
||||
}
|
||||
void test_X_y(const torch::Tensor& X, const torch::Tensor& y) { checkInput(X, y); }
|
||||
};
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = testProposal(raw.dataset, raw.features, raw.className);
|
||||
torch::Tensor X = torch::randint(0, 3, { 10, 4 });
|
||||
torch::Tensor y = torch::rand({ 10 });
|
||||
INFO("Check X is not float");
|
||||
REQUIRE_THROWS_AS(clf.test_X_y(X, y), std::invalid_argument);
|
||||
X = torch::rand({ 10, 4 });
|
||||
INFO("Check y is not integer");
|
||||
REQUIRE_THROWS_AS(clf.test_X_y(X, y), std::invalid_argument);
|
||||
y = torch::randint(0, 3, { 10 });
|
||||
INFO("X and y are correct");
|
||||
REQUIRE_NOTHROW(clf.test_X_y(X, y));
|
||||
}
|
||||
TEST_CASE("Check KDB loop detection", "[Models]")
|
||||
{
|
||||
class testKDB : public bayesnet::KDB {
|
||||
public:
|
||||
testKDB() : KDB(2, 0) {}
|
||||
void test_add_m_edges(std::vector<std::string> features_, int idx, std::vector<int>& S, torch::Tensor& weights)
|
||||
{
|
||||
features = features_;
|
||||
add_m_edges(idx, S, weights);
|
||||
}
|
||||
};
|
||||
auto clf = testKDB();
|
||||
auto features = std::vector<std::string>{ "A", "B", "C" };
|
||||
int idx = 0;
|
||||
std::vector<int> S = { 0 };
|
||||
torch::Tensor weights = torch::tensor({
|
||||
{ 1.0, 10.0, 0.0 }, // row0 -> picks col1
|
||||
{ 0.0, 1.0, 10.0 }, // row1 -> picks col2
|
||||
{ 10.0, 0.0, 1.0 }, // row2 -> picks col0
|
||||
});
|
||||
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 0, S, weights));
|
||||
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 1, S, weights));
|
||||
}
|
@@ -110,14 +110,14 @@ TEST_CASE("Test Node computeCPT", "[Node]")
|
||||
// Oddities
|
||||
auto features_back = features;
|
||||
// Remove a parent from features
|
||||
features.pop_back();
|
||||
REQUIRE_THROWS_AS(nodes[0].computeCPT(dataset, features, 0.0, weights), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(nodes[0].computeCPT(dataset, features, 0.0, weights), "Feature parent Class not found in dataset");
|
||||
// features.pop_back();
|
||||
// REQUIRE_THROWS_AS(nodes[0].computeCPT(dataset, features, 0.0, weights), std::logic_error);
|
||||
// REQUIRE_THROWS_WITH(nodes[0].computeCPT(dataset, features, 0.0, weights), "Feature parent Class not found in dataset");
|
||||
// Remove a feature from features
|
||||
features = features_back;
|
||||
features.erase(features.begin());
|
||||
REQUIRE_THROWS_AS(nodes[0].computeCPT(dataset, features, 0.0, weights), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(nodes[0].computeCPT(dataset, features, 0.0, weights), "Feature F1 not found in dataset");
|
||||
// features = features_back;
|
||||
// features.erase(features.begin());
|
||||
// REQUIRE_THROWS_AS(nodes[0].computeCPT(dataset, features, 0.0, weights), std::logic_error);
|
||||
// REQUIRE_THROWS_WITH(nodes[0].computeCPT(dataset, features, 0.0, weights), "Feature F1 not found in dataset");
|
||||
}
|
||||
TEST_CASE("TEST MinFill method", "[Node]")
|
||||
{
|
||||
|
@@ -90,7 +90,7 @@ TEST_CASE("Voting vs proba", "[BoostA2DE]")
|
||||
REQUIRE(score_voting == Catch::Approx(0.946667).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[83][2] == Catch::Approx(0.53508).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.48394).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.dump_cpt() == "");
|
||||
REQUIRE(clf.dump_cpt().size() == 7742);
|
||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
}
|
||||
TEST_CASE("Order asc, desc & random", "[BoostA2DE]")
|
||||
@@ -123,7 +123,7 @@ TEST_CASE("Oddities2", "[BoostA2DE]")
|
||||
{ { "order", "duck" } },
|
||||
{ { "select_features", "duck" } },
|
||||
{ { "maxTolerance", 0 } },
|
||||
{ { "maxTolerance", 5 } },
|
||||
{ { "maxTolerance", 7 } },
|
||||
};
|
||||
for (const auto& hyper : bad_hyper.items()) {
|
||||
INFO("BoostA2DE hyper: " + hyper.value().dump());
|
||||
@@ -162,7 +162,7 @@ TEST_CASE("Bisection Best", "[BoostA2DE]")
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"block_update", false},
|
||||
{"convergence_best", false},
|
||||
{"convergence_best", true},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 480);
|
||||
|
@@ -4,17 +4,14 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
|
||||
|
||||
TEST_CASE("Feature_select CFS", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Feature_select CFS", "[BoostAODE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({{"select_features", "CFS"}});
|
||||
@@ -25,8 +22,7 @@ TEST_CASE("Feature_select CFS", "[BoostAODE]")
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("Feature_select IWSS", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Feature_select IWSS", "[BoostAODE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}});
|
||||
@@ -37,8 +33,7 @@ TEST_CASE("Feature_select IWSS", "[BoostAODE]")
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("Feature_select FCBF", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Feature_select FCBF", "[BoostAODE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}});
|
||||
@@ -49,8 +44,7 @@ TEST_CASE("Feature_select FCBF", "[BoostAODE]")
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("Test used features in train note and score", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Test used features in train note and score", "[BoostAODE]") {
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::BoostAODE(true);
|
||||
clf.setHyperparameters({
|
||||
@@ -69,8 +63,7 @@ TEST_CASE("Test used features in train note and score", "[BoostAODE]")
|
||||
REQUIRE(score == Catch::Approx(0.809895813).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.809895813).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Voting vs proba", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Voting vs proba", "[BoostAODE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostAODE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
@@ -85,15 +78,12 @@ TEST_CASE("Voting vs proba", "[BoostAODE]")
|
||||
REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[83][2] == Catch::Approx(1.0).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.86121525).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.dump_cpt() == "");
|
||||
REQUIRE(clf.dump_cpt().size() == 7004);
|
||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
}
|
||||
TEST_CASE("Order asc, desc & random", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Order asc, desc & random", "[BoostAODE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<std::string, double> scores{
|
||||
{"asc", 0.83645f }, { "desc", 0.84579f }, { "rand", 0.84112 }
|
||||
};
|
||||
std::map<std::string, double> scores{{"asc", 0.83645f}, {"desc", 0.84579f}, {"rand", 0.84112}};
|
||||
for (const std::string &order : {"asc", "desc", "rand"}) {
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({
|
||||
@@ -110,15 +100,14 @@ TEST_CASE("Order asc, desc & random", "[BoostAODE]")
|
||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
TEST_CASE("Oddities", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Oddities", "[BoostAODE]") {
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto bad_hyper = nlohmann::json{
|
||||
{{"order", "duck"}},
|
||||
{{"select_features", "duck"}},
|
||||
{{"maxTolerance", 0}},
|
||||
{ { "maxTolerance", 5 } },
|
||||
{{"maxTolerance", 7}},
|
||||
};
|
||||
for (const auto &hyper : bad_hyper.items()) {
|
||||
INFO("BoostAODE hyper: " << hyper.value().dump());
|
||||
@@ -134,7 +123,8 @@ TEST_CASE("Oddities", "[BoostAODE]")
|
||||
for (const auto &hyper : bad_hyper_fit.items()) {
|
||||
INFO("BoostAODE hyper: " << hyper.value().dump());
|
||||
clf.setHyperparameters(hyper.value());
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
|
||||
std::invalid_argument);
|
||||
}
|
||||
|
||||
auto bad_hyper_fit2 = nlohmann::json{
|
||||
@@ -146,8 +136,7 @@ TEST_CASE("Oddities", "[BoostAODE]")
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
}
|
||||
}
|
||||
TEST_CASE("Bisection Best", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Bisection Best", "[BoostAODE]") {
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
|
||||
clf.setHyperparameters({
|
||||
@@ -167,8 +156,7 @@ TEST_CASE("Bisection Best", "[BoostAODE]")
|
||||
REQUIRE(score == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Bisection Best vs Last", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Bisection Best vs Last", "[BoostAODE]") {
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
|
||||
auto clf = bayesnet::BoostAODE(true);
|
||||
auto hyperparameters = nlohmann::json{
|
||||
@@ -188,8 +176,7 @@ TEST_CASE("Bisection Best vs Last", "[BoostAODE]")
|
||||
auto score_last = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Block Update", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Block Update", "[BoostAODE]") {
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("mfeat-factors", true, 500);
|
||||
clf.setHyperparameters({
|
||||
@@ -218,8 +205,7 @@ TEST_CASE("Block Update", "[BoostAODE]")
|
||||
// }
|
||||
// std::cout << "Score " << score << std::endl;
|
||||
}
|
||||
TEST_CASE("Alphablock", "[BoostAODE]")
|
||||
{
|
||||
TEST_CASE("Alphablock", "[BoostAODE]") {
|
||||
auto clf_alpha = bayesnet::BoostAODE();
|
||||
auto clf_no_alpha = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
@@ -233,3 +219,4 @@ TEST_CASE("Alphablock", "[BoostAODE]")
|
||||
REQUIRE(score_alpha == Catch::Approx(0.720779f).epsilon(raw.epsilon));
|
||||
REQUIRE(score_no_alpha == Catch::Approx(0.733766f).epsilon(raw.epsilon));
|
||||
}
|
||||
|
||||
|
237
tests/TestXBA2DE.cc
Normal file
237
tests/TestXBA2DE.cc
Normal file
@@ -0,0 +1,237 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/ensembles/XBA2DE.h"
|
||||
|
||||
TEST_CASE("Normal test", "[XBA2DE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 8);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getVersion() == "0.9.7");
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 13 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 1");
|
||||
REQUIRE(clf.getNumberOfStates() == 64);
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(1.0f));
|
||||
REQUIRE(clf.graph().size() == 1);
|
||||
}
|
||||
TEST_CASE("Feature_select CFS", "[XBA2DE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
clf.setHyperparameters({{"select_features", "CFS"}});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 220);
|
||||
REQUIRE(clf.getNumberOfEdges() == 506);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 22");
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219));
|
||||
}
|
||||
TEST_CASE("Feature_select IWSS", "[XBA2DE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 220);
|
||||
REQUIRE(clf.getNumberOfEdges() == 506);
|
||||
REQUIRE(clf.getNotes().size() == 4);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
||||
REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2");
|
||||
REQUIRE(clf.getNotes()[3] == "Number of models: 22");
|
||||
REQUIRE(clf.getNumberOfStates() == 5346);
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.72093));
|
||||
}
|
||||
TEST_CASE("Feature_select FCBF", "[XBA2DE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 290);
|
||||
REQUIRE(clf.getNumberOfEdges() == 667);
|
||||
REQUIRE(clf.getNumberOfStates() == 7047);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
|
||||
REQUIRE(clf.getNotes()[1] == "Pairs not used in train: 2");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 29");
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.744186));
|
||||
}
|
||||
TEST_CASE("Test used features in train note and score", "[XBA2DE]") {
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
clf.setHyperparameters({
|
||||
{"order", "asc"},
|
||||
{"convergence", true},
|
||||
{"select_features", "CFS"},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 144);
|
||||
REQUIRE(clf.getNumberOfEdges() == 320);
|
||||
REQUIRE(clf.getNumberOfStates() == 5504);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 16");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.850260437f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.850260437f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Order asc, desc & random", "[XBA2DE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<std::string, double> scores{{"asc", 0.827103}, {"desc", 0.808411}, {"rand", 0.827103}};
|
||||
for (const std::string &order : {"asc", "desc", "rand"}) {
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
clf.setHyperparameters({
|
||||
{"order", order},
|
||||
{"bisection", false},
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", true},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("XBA2DE order: " << order);
|
||||
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
TEST_CASE("Oddities", "[XBA2DE]") {
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto bad_hyper = nlohmann::json{
|
||||
{{"order", "duck"}},
|
||||
{{"select_features", "duck"}},
|
||||
{{"maxTolerance", 0}},
|
||||
{{"maxTolerance", 7}},
|
||||
};
|
||||
for (const auto &hyper : bad_hyper.items()) {
|
||||
INFO("XBA2DE hyper: " << hyper.value().dump());
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
}
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters({{"maxTolerance", 0}}), std::invalid_argument);
|
||||
auto bad_hyper_fit = nlohmann::json{
|
||||
{{"select_features", "IWSS"}, {"threshold", -0.01}},
|
||||
{{"select_features", "IWSS"}, {"threshold", 0.51}},
|
||||
{{"select_features", "FCBF"}, {"threshold", 1e-8}},
|
||||
{{"select_features", "FCBF"}, {"threshold", 1.01}},
|
||||
};
|
||||
for (const auto &hyper : bad_hyper_fit.items()) {
|
||||
INFO("XBA2DE hyper: " << hyper.value().dump());
|
||||
clf.setHyperparameters(hyper.value());
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
|
||||
std::invalid_argument);
|
||||
}
|
||||
auto bad_hyper_fit2 = nlohmann::json{
|
||||
{{"alpha_block", true}, {"block_update", true}},
|
||||
{{"bisection", false}, {"block_update", true}},
|
||||
};
|
||||
for (const auto &hyper : bad_hyper_fit2.items()) {
|
||||
INFO("XBA2DE hyper: " << hyper.value().dump());
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
}
|
||||
// Check not enough selected features
|
||||
raw.Xv.pop_back();
|
||||
raw.Xv.pop_back();
|
||||
raw.Xv.pop_back();
|
||||
raw.features.pop_back();
|
||||
raw.features.pop_back();
|
||||
raw.features.pop_back();
|
||||
clf.setHyperparameters({{"select_features", "CFS"}, {"alpha_block", false}, {"block_update", false}});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNotes().size() == 1);
|
||||
REQUIRE(clf.getNotes()[0] == "No features selected in initialization");
|
||||
}
|
||||
TEST_CASE("Bisection Best", "[XBA2DE]") {
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"convergence_best", false},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 330);
|
||||
REQUIRE(clf.getNumberOfEdges() == 836);
|
||||
REQUIRE(clf.getNumberOfStates() == 31108);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes().at(0) == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes().at(1) == "Pairs not used in train: 83");
|
||||
REQUIRE(clf.getNotes().at(2) == "Number of models: 22");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(0.975).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.975).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Bisection Best vs Last", "[XBA2DE]") {
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
auto hyperparameters = nlohmann::json{
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"convergence_best", true},
|
||||
};
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_best = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_best == Catch::Approx(0.983333).epsilon(raw.epsilon));
|
||||
// Now we will set the hyperparameter to use the last accuracy
|
||||
hyperparameters["convergence_best"] = false;
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_last = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_last == Catch::Approx(0.99).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Block Update", "[XBA2DE]") {
|
||||
auto clf = bayesnet::XBA2DE();
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"block_update", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 120);
|
||||
REQUIRE(clf.getNumberOfEdges() == 304);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Pairs not used in train: 83");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 8");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(0.963333).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.963333).epsilon(raw.epsilon));
|
||||
/*std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;*/
|
||||
/*std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;*/
|
||||
/*std::cout << "Notes size " << clf.getNotes().size() << std::endl;*/
|
||||
/*for (auto note : clf.getNotes()) {*/
|
||||
/* std::cout << note << std::endl;*/
|
||||
/*}*/
|
||||
/*std::cout << "Score " << score << std::endl;*/
|
||||
}
|
||||
TEST_CASE("Alphablock", "[XBA2DE]") {
|
||||
auto clf_alpha = bayesnet::XBA2DE();
|
||||
auto clf_no_alpha = bayesnet::XBA2DE();
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
clf_alpha.setHyperparameters({
|
||||
{"alpha_block", true},
|
||||
});
|
||||
clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
clf_no_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_alpha = clf_alpha.score(raw.X_test, raw.y_test);
|
||||
auto score_no_alpha = clf_no_alpha.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_alpha == Catch::Approx(0.714286).epsilon(raw.epsilon));
|
||||
REQUIRE(score_no_alpha == Catch::Approx(0.714286).epsilon(raw.epsilon));
|
||||
}
|
216
tests/TestXBAODE.cc
Normal file
216
tests/TestXBAODE.cc
Normal file
@@ -0,0 +1,216 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/ensembles/XBAODE.h"
|
||||
|
||||
TEST_CASE("Normal test", "[XBAODE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::XBAODE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 20);
|
||||
REQUIRE(clf.getNumberOfEdges() == 36);
|
||||
REQUIRE(clf.getNotes().size() == 1);
|
||||
REQUIRE(clf.getVersion() == "0.9.7");
|
||||
REQUIRE(clf.getNotes()[0] == "Number of models: 4");
|
||||
REQUIRE(clf.getNumberOfStates() == 256);
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.933333));
|
||||
}
|
||||
TEST_CASE("Feature_select CFS", "[XBAODE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::XBAODE();
|
||||
clf.setHyperparameters({{"select_features", "CFS"}});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 171);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219));
|
||||
}
|
||||
TEST_CASE("Feature_select IWSS", "[XBAODE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::XBAODE();
|
||||
clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 171);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.697674394));
|
||||
}
|
||||
TEST_CASE("Feature_select FCBF", "[XBAODE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::XBAODE();
|
||||
clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 171);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219));
|
||||
}
|
||||
TEST_CASE("Test used features in train note and score", "[XBAODE]") {
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::XBAODE();
|
||||
clf.setHyperparameters({
|
||||
{"order", "asc"},
|
||||
{"convergence", true},
|
||||
{"select_features", "CFS"},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 72);
|
||||
REQUIRE(clf.getNumberOfEdges() == 136);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.819010437f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.819010437f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Order asc, desc & random", "[XBAODE]") {
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<std::string, double> scores{{"asc", 0.83645f}, {"desc", 0.84579f}, {"rand", 0.84112}};
|
||||
for (const std::string &order : {"asc", "desc", "rand"}) {
|
||||
auto clf = bayesnet::XBAODE();
|
||||
clf.setHyperparameters({
|
||||
{"order", order},
|
||||
{"bisection", false},
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("XBAODE order: " << order);
|
||||
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
TEST_CASE("Oddities", "[XBAODE]") {
|
||||
auto clf = bayesnet::XBAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto bad_hyper = nlohmann::json{
|
||||
{{"order", "duck"}},
|
||||
{{"select_features", "duck"}},
|
||||
{{"maxTolerance", 0}},
|
||||
{{"maxTolerance", 7}},
|
||||
};
|
||||
for (const auto &hyper : bad_hyper.items()) {
|
||||
INFO("XBAODE hyper: " << hyper.value().dump());
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
}
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters({{"maxTolerance", 0}}), std::invalid_argument);
|
||||
auto bad_hyper_fit = nlohmann::json{
|
||||
{{"select_features", "IWSS"}, {"threshold", -0.01}},
|
||||
{{"select_features", "IWSS"}, {"threshold", 0.51}},
|
||||
{{"select_features", "FCBF"}, {"threshold", 1e-8}},
|
||||
{{"select_features", "FCBF"}, {"threshold", 1.01}},
|
||||
};
|
||||
for (const auto &hyper : bad_hyper_fit.items()) {
|
||||
INFO("XBAODE hyper: " << hyper.value().dump());
|
||||
clf.setHyperparameters(hyper.value());
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing),
|
||||
std::invalid_argument);
|
||||
}
|
||||
auto bad_hyper_fit2 = nlohmann::json{
|
||||
{{"alpha_block", true}, {"block_update", true}},
|
||||
{{"bisection", false}, {"block_update", true}},
|
||||
};
|
||||
for (const auto &hyper : bad_hyper_fit2.items()) {
|
||||
INFO("XBAODE hyper: " << hyper.value().dump());
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
}
|
||||
}
|
||||
TEST_CASE("Bisection Best", "[XBAODE]") {
|
||||
auto clf = bayesnet::XBAODE();
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"convergence_best", false},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 210);
|
||||
REQUIRE(clf.getNumberOfEdges() == 406);
|
||||
REQUIRE(clf.getNotes().size() == 1);
|
||||
REQUIRE(clf.getNotes().at(0) == "Number of models: 14");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Bisection Best vs Last", "[XBAODE]") {
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
|
||||
auto clf = bayesnet::XBAODE();
|
||||
auto hyperparameters = nlohmann::json{
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"convergence_best", true},
|
||||
};
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_best = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_best == Catch::Approx(0.973333359f).epsilon(raw.epsilon));
|
||||
// Now we will set the hyperparameter to use the last accuracy
|
||||
hyperparameters["convergence_best"] = false;
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_last = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Block Update", "[XBAODE]") {
|
||||
auto clf = bayesnet::XBAODE();
|
||||
auto raw = RawDatasets("mfeat-factors", true, 500);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"block_update", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 1085);
|
||||
REQUIRE(clf.getNumberOfEdges() == 2165);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Used features in train: 20 of 216");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 5");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
//
|
||||
// std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;
|
||||
// std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;
|
||||
// std::cout << "Notes size " << clf.getNotes().size() << std::endl;
|
||||
// for (auto note : clf.getNotes()) {
|
||||
// std::cout << note << std::endl;
|
||||
// }
|
||||
// std::cout << "Score " << score << std::endl;
|
||||
}
|
||||
TEST_CASE("Alphablock", "[XBAODE]") {
|
||||
auto clf_alpha = bayesnet::XBAODE();
|
||||
auto clf_no_alpha = bayesnet::XBAODE();
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
clf_alpha.setHyperparameters({
|
||||
{"alpha_block", true},
|
||||
});
|
||||
clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
clf_no_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_alpha = clf_alpha.score(raw.X_test, raw.y_test);
|
||||
auto score_no_alpha = clf_no_alpha.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_alpha == Catch::Approx(0.720779f).epsilon(raw.epsilon));
|
||||
REQUIRE(score_no_alpha == Catch::Approx(0.733766f).epsilon(raw.epsilon));
|
||||
}
|
126
tests/TestXSPODE.cc
Normal file
126
tests/TestXSPODE.cc
Normal file
@@ -0,0 +1,126 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <stdexcept>
|
||||
#include "bayesnet/classifiers/XSPODE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
TEST_CASE("fit vector test", "[XSPODE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto scores = std::vector<float>({0.966667, 0.9333333, 0.966667, 0.966667});
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto clf = bayesnet::XSpode(i);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
|
||||
raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||
REQUIRE(clf.getNotes().size() == 0);
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||
}
|
||||
}
|
||||
TEST_CASE("fit dataset test", "[XSPODE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto scores = std::vector<float>({0.966667, 0.9333333, 0.966667, 0.966667});
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto clf = bayesnet::XSpode(i);
|
||||
clf.fit(raw.dataset, raw.features, raw.className, raw.states,
|
||||
raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||
REQUIRE(clf.getNotes().size() == 0);
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||
}
|
||||
}
|
||||
TEST_CASE("tensors dataset predict & predict_proba", "[XSPODE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto scores = std::vector<float>({0.966667, 0.9333333, 0.966667, 0.966667});
|
||||
auto probs_expected = std::vector<std::vector<float>>({
|
||||
{0.999017, 0.000306908, 0.000676449},
|
||||
{0.99831, 0.00119304, 0.000497099},
|
||||
{0.998432, 0.00078416, 0.00078416},
|
||||
{0.998801, 0.000599438, 0.000599438}
|
||||
});
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto clf = bayesnet::XSpode(i);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states,
|
||||
raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||
REQUIRE(clf.getNotes().size() == 0);
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||
// Get the first 4 lines of X_test to do predict_proba
|
||||
auto X_reduced = raw.X_test.slice(1, 0, 4);
|
||||
auto proba = clf.predict_proba(X_reduced);
|
||||
for (int p = 0; p < 3; ++p) {
|
||||
REQUIRE(proba[0][p].item<double>() == Catch::Approx(probs_expected.at(i).at(p)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("mfeat-factors dataset test", "[XSPODE]") {
|
||||
auto raw = RawDatasets("mfeat-factors", true);
|
||||
auto scores = std::vector<float>({0.9825, 0.9775, 0.9775, 0.99});
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto clf = bayesnet::XSpode(i);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||
REQUIRE(clf.getNumberOfEdges() == 433);
|
||||
REQUIRE(clf.getNotes().size() == 0);
|
||||
REQUIRE(clf.getNumberOfStates() == 652320);
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||
}
|
||||
}
|
||||
TEST_CASE("Laplace predict", "[XSPODE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto scores = std::vector<float>({0.966666639, 1.0f, 0.933333337, 1.0f});
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto clf = bayesnet::XSpode(0);
|
||||
clf.setHyperparameters({ {"parent", i} });
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::LAPLACE);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||
REQUIRE(clf.getNotes().size() == 0);
|
||||
REQUIRE(clf.getNumberOfStates() == 64);
|
||||
REQUIRE(clf.getNFeatures() == 4);
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||
}
|
||||
}
|
||||
TEST_CASE("Not fitted model predict", "[XSPODE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::XSpode(0);
|
||||
REQUIRE_THROWS_AS(clf.predict(std::vector<int>({1,2,3})), std::logic_error);
|
||||
}
|
||||
TEST_CASE("Test instance predict", "[XSPODE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::XSpode(0);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::ORIGINAL);
|
||||
REQUIRE(clf.predict(std::vector<int>({1,2,3,4})) == 1);
|
||||
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.973333359f));
|
||||
// Cestnik is not defined in the classifier so it should imply alpha_ = 0
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::CESTNIK);
|
||||
REQUIRE(clf.predict(std::vector<int>({1,2,3,4})) == 0);
|
||||
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.973333359f));
|
||||
}
|
||||
TEST_CASE("Test to_string and fitx", "[XSPODE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::XSpode(0);
|
||||
auto weights = torch::full({raw.Xt.size(1)}, 1.0 / raw.Xt.size(1), torch::kFloat64);
|
||||
clf.fitx(raw.Xt, raw.yt, weights, bayesnet::Smoothing_t::ORIGINAL);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||
REQUIRE(clf.getNotes().size() == 0);
|
||||
REQUIRE(clf.getNumberOfStates() == 64);
|
||||
REQUIRE(clf.getNFeatures() == 4);
|
||||
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.966666639f));
|
||||
REQUIRE(clf.to_string().size() == 1966);
|
||||
REQUIRE(clf.graph("Not yet implemented") == std::vector<std::string>({"Not yet implemented"}));
|
||||
}
|
141
tests/TestXSPnDE.cc
Normal file
141
tests/TestXSPnDE.cc
Normal file
@@ -0,0 +1,141 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include "bayesnet/classifiers/XSP2DE.h" // <-- your new 2-superparent classifier
|
||||
#include "TestUtils.h" // for RawDatasets, etc.
|
||||
|
||||
// Helper function to handle each (sp1, sp2) pair in tests
|
||||
static void check_spnde_pair(
|
||||
int sp1,
|
||||
int sp2,
|
||||
RawDatasets &raw,
|
||||
bool fitVector,
|
||||
bool fitTensor)
|
||||
{
|
||||
// Create our classifier
|
||||
bayesnet::XSp2de clf(sp1, sp2);
|
||||
|
||||
// Option A: fit with vector-based data
|
||||
if (fitVector) {
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
}
|
||||
// Option B: fit with the whole dataset in torch::Tensor form
|
||||
else if (fitTensor) {
|
||||
// your “tensor” version of fit
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
}
|
||||
// Option C: or you might do the “dataset” version:
|
||||
else {
|
||||
clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
}
|
||||
|
||||
// Basic checks
|
||||
REQUIRE(clf.getNumberOfNodes() == 5); // for iris: 4 features + 1 class
|
||||
REQUIRE(clf.getNumberOfEdges() == 8);
|
||||
REQUIRE(clf.getNotes().size() == 0);
|
||||
|
||||
// Evaluate on test set
|
||||
float sc = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(sc >= 0.93f);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// 1) Fit vector test
|
||||
// ------------------------------------------------------------
|
||||
TEST_CASE("fit vector test (XSP2DE)", "[XSP2DE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
|
||||
std::vector<std::pair<int,int>> parentPairs = {
|
||||
{0,1}, {2,3}
|
||||
};
|
||||
for (auto &p : parentPairs) {
|
||||
check_spnde_pair(p.first, p.second, raw, /*fitVector=*/true, /*fitTensor=*/false);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// 2) Fit dataset test
|
||||
// ------------------------------------------------------------
|
||||
TEST_CASE("fit dataset test (XSP2DE)", "[XSP2DE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
|
||||
// Again test multiple pairs:
|
||||
std::vector<std::pair<int,int>> parentPairs = {
|
||||
{0,2}, {1,3}
|
||||
};
|
||||
for (auto &p : parentPairs) {
|
||||
check_spnde_pair(p.first, p.second, raw, /*fitVector=*/false, /*fitTensor=*/false);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// 3) Tensors dataset predict & predict_proba
|
||||
// ------------------------------------------------------------
|
||||
TEST_CASE("tensors dataset predict & predict_proba (XSP2DE)", "[XSP2DE]") {
|
||||
auto raw = RawDatasets("iris", true);
|
||||
|
||||
std::vector<std::pair<int,int>> parentPairs = {
|
||||
{0,3}, {1,2}
|
||||
};
|
||||
|
||||
for (auto &p : parentPairs) {
|
||||
bayesnet::XSp2de clf(p.first, p.second);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 8);
|
||||
REQUIRE(clf.getNotes().size() == 0);
|
||||
|
||||
// Check the score
|
||||
float sc = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(sc >= 0.90f);
|
||||
|
||||
auto X_reduced = raw.X_test.slice(1, 0, 3);
|
||||
auto proba = clf.predict_proba(X_reduced);
|
||||
}
|
||||
}
|
||||
TEST_CASE("Check hyperparameters", "[XSP2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
|
||||
auto clf = bayesnet::XSp2de(0, 1);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto clf2 = bayesnet::XSp2de(2, 3);
|
||||
clf2.setHyperparameters({{"parent1", 0}, {"parent2", 1}});
|
||||
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.to_string() == clf2.to_string());
|
||||
}
|
||||
TEST_CASE("Check different smoothing", "[XSP2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
|
||||
auto clf = bayesnet::XSp2de(0, 1);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::ORIGINAL);
|
||||
auto clf2 = bayesnet::XSp2de(0, 1);
|
||||
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto clf3 = bayesnet::XSp2de(0, 1);
|
||||
clf3.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::NONE);
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto score2 = clf2.score(raw.X_test, raw.y_test);
|
||||
auto score3 = clf3.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(1.0).epsilon(raw.epsilon));
|
||||
REQUIRE(score2 == Catch::Approx(0.7333333).epsilon(raw.epsilon));
|
||||
REQUIRE(score3 == Catch::Approx(0.966667).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Check rest", "[XSP2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::XSp2de(0, 1);
|
||||
REQUIRE_THROWS_AS(clf.predict_proba(std::vector<int>({1,2,3,4})), std::logic_error);
|
||||
clf.fitx(raw.Xt, raw.yt, raw.weights, bayesnet::Smoothing_t::ORIGINAL);
|
||||
REQUIRE(clf.getNFeatures() == 4);
|
||||
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.973333359f).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.predict({1,2,3,4}) == 1);
|
||||
|
||||
}
|
Reference in New Issue
Block a user