Compare commits

...

137 Commits

Author SHA1 Message Date
3615a1463c Fix some issues in FeatureSelect 2025-05-31 14:36:51 +02:00
5f95117dd4 Merge pull request 'Replace git submodule dependencies for vcpg dependencies' (#35) from vcpkg into main
Reviewed-on: #35
2025-04-27 20:55:03 +00:00
2f5bc10b8e Update sample project and README 2025-04-27 21:25:21 +02:00
257f519641 Fix update_coverage.py mistake in url 2025-04-27 18:41:34 +02:00
5c5ecef3cf Update vcpkg private repo baseline 2025-04-27 18:37:46 +02:00
d0ebe596f6 Fix json module version in test 2025-04-27 18:34:15 +02:00
670b93d0a1 Remove git modules and add vcpkg configuration 2025-04-27 18:33:23 +02:00
306d3a4b55 Reformat source 2025-03-22 10:31:54 +01:00
bf08b0de89 Change clang-format braces position 2025-03-17 18:02:21 +01:00
b976db53c6 Add models to README 2025-03-17 13:13:06 +01:00
be39d2dedb Add ulm class diagram & update .clang-format 2025-03-17 13:06:15 +01:00
4ca770d16b Update README.md & .clang-format 2025-03-17 12:14:57 +01:00
6bf3b939bc Add items to .clang-format 2025-03-17 11:39:33 +01:00
7076efc2a1 Merge pull request 'Optimize BoostAODE -> XBAODE' (#33) from WA2DE into main
Reviewed-on: #33
2025-03-16 17:58:10 +00:00
9ee388561f Update version, changelog, and Xsp2de clf name 2025-03-16 18:55:24 +01:00
70c7d3dd3d Add test to 99.1% 2025-03-14 18:55:29 +01:00
400967b4e3 Add tests to 90% coverage 2025-03-14 14:53:22 +01:00
c234308701 Add SPnDE n=2 2025-03-13 10:58:43 +01:00
4ded6f51eb TestXBAODE complete, fix XBAODE error in no convergence & 99% coverage 2025-03-13 01:28:48 +01:00
b1d317d8f4 Add format and launch config 2025-03-12 16:29:29 +01:00
7876d1a370 Add test 2025-03-12 16:27:19 +01:00
3bdb14bd65 Tests XSpode & XBAODE 2025-03-12 13:46:04 +01:00
71b05cc1a7 Begin XBAODE tests 2025-03-11 18:16:50 +01:00
a59689272d Fix tests 2025-03-11 01:09:37 +01:00
3d8be79b37 Fix XSpode 2025-03-10 22:18:50 +01:00
619276a5ea Update sample_xpode 2025-03-10 21:44:12 +01:00
e681099360 Add sample_xspode 2025-03-10 21:37:14 +01:00
5919fbfd34 Fix Xspode 2025-03-10 21:29:47 +01:00
a26522e62f Fix XSPode 2025-03-10 15:55:48 +01:00
86cccb6c7b Fix XSpode 2025-03-10 14:23:47 +01:00
d1b235261e Fix XSpode 2025-03-10 14:21:01 +01:00
7a8e0391dc continue fixing xspode 2025-03-10 12:18:10 +01:00
6cfbc482d8 change launch.json 2025-03-10 11:20:36 +01:00
ca54f799ee Fix XSpode predict 2025-03-10 11:18:04 +01:00
06621ea361 Add XBAODE & XSpode classifiers 2025-03-09 19:15:00 +01:00
a70ac3e883 Add namespace to Smoothing.h 2025-03-09 11:21:31 +01:00
b987dcbcc4 Refactor Smoothing type to its own file
Add log to boost
2025-03-08 14:04:08 +01:00
81fd7df7f0 Update CHANGELOG 2025-02-13 01:18:43 +01:00
dd98cf159d ComputeCPT Optimization 2025-02-13 01:17:37 +01:00
f658149977 Add dump_cpt to Ensemble 2025-02-12 20:55:35 +01:00
fb957ac3fe First implemented aproximation 2025-01-31 13:55:46 +01:00
b90e558238 Hyperparameter *maxTolerance* in the BoostAODE class is now in [1, 6] range (it was in [1, 4] range before) 2025-01-23 00:56:18 +01:00
64970cf7f7 Merge pull request 'alphablock' (#32) from alphablock into main
Reviewed-on: #32
Added

- Add a new hyperparameter to the BoostAODE class, alphablock, to control the way α is computed, with the last model or with the ensmble built so far. Default value is false.
- Add a new hyperparameter to the SPODE class, parent, to set the root node of the model. If no value is set the root parameter of the constructor is used.
- Add a new hyperparameter to the TAN class, parent, to set the root node of the model. If not set the first feature is used as root.
2025-01-22 11:48:09 +00:00
b571a4da4d Fix typo in CHANGELOG 2025-01-22 12:43:40 +01:00
8a9f329ff9 Remove typo in README 2024-12-18 14:29:12 +01:00
e2781ee525 Add parent hyperparameter to TAN & SPODE 2024-12-17 10:14:14 +01:00
56a2d3ead0 remove uneeded submodule 2024-12-14 20:27:07 +01:00
dc32a0fc47 Fix tests & update dependencies versions 2024-12-14 14:32:51 +01:00
3d6b4f0614 Implement the functionality of the hyperparameter alpha_block with test 2024-12-14 14:02:45 +01:00
18844c7da7 Add hyperparameter to ChangeLog and Boost class 2024-12-14 14:02:10 +01:00
43ceefd2c9 Fix comment in AODELd 2024-12-10 13:35:23 +01:00
e6501502d1 Update docs and help 2024-11-23 20:28:16 +01:00
d84adf6172 Add model to changelog 2024-11-23 19:13:54 +01:00
268a86cbe0 Actualiza Changelog 2024-11-23 19:11:00 +01:00
fc4c93b299 Fix Mst test 2024-11-23 19:07:35 +01:00
86f2bc44fc libmdlp (#31)
Add mdlp as library in lib/
Fix tests to reach 99.1% of coverage

Reviewed-on: #31
2024-11-23 17:22:41 +00:00
f0f3d9ad6e Fix CUDA and mdlp library issues 2024-11-20 21:02:56 +01:00
9a323cd7a3 Remove mdlp submodule 2024-11-20 20:15:49 +01:00
cb949ac7e5 Update dependecies versions 2024-09-29 13:17:44 +02:00
2c297ea15d Control optional doxygen dependency 2024-09-29 12:48:15 +02:00
4e4b6e67f4 Add env parallel variable to Makefile 2024-09-18 11:05:19 +02:00
82847774ee Update Dockerfile 2024-09-13 09:42:06 +02:00
d0955d9369 Merge pull request 'smoothing' (#30) from smoothing into main
Reviewed-on: #30
2024-09-12 20:28:33 +00:00
2d34eb8c89 Update Makefile to get parallel info from env 2024-08-31 12:43:39 +02:00
0159c397fa Update optimization flag in CMakeLists 2024-07-11 12:29:57 +02:00
0bbc8328a9 Change cpt table type to float 2024-07-08 13:27:55 +02:00
35ca862eca Don't allow add node nor add edge on fitted networks 2024-07-07 21:06:59 +02:00
26eb58b104 Forbids to insert the same edge twice 2024-07-04 18:52:41 +02:00
6fcc15d39a Upgrade mdlp library 2024-06-24 12:38:44 +02:00
9a14133be5 Add thread control to vectors predict 2024-06-23 13:02:40 +02:00
59c1cf5b3b Fix number of threads spawned 2024-06-21 19:56:35 +02:00
8e9090d283 Fix tests 2024-06-21 13:58:42 +02:00
02bcab01be Refactor CountingSemaphore as singleton 2024-06-21 09:30:24 +02:00
716748e18c Add Counting Semaphore class
Fix threading in Network
2024-06-20 10:36:09 +02:00
0b31780d39 Add Thread max spawning to Network 2024-06-18 23:18:24 +02:00
fa26aa80f7 Rename OLD_LAPLACE to ORIGINAL 2024-06-13 15:04:15 +02:00
3eb61905fb Upgrade ArffFiles Module version 2024-06-13 12:33:54 +02:00
ca0ae4dacf Refactor Cestnik smoothin factor assuming m=1 2024-06-13 09:11:47 +02:00
b34869cc61 Set smoothing as fit parameter 2024-06-11 11:40:45 +02:00
27a3e5a5e0 Implement 3 types of smoothing 2024-06-10 15:49:01 +02:00
684443a788 Implement Cestnik & Laplace smoothing 2024-06-09 17:19:38 +02:00
6d9badc33b Merge pull request 'BoostA2DE' (#29) from BoostA2DE into main
Reviewed-on: #29
2024-06-09 10:02:47 +00:00
015b1b0c0f Fix diagram size in manual 2024-05-28 11:43:39 +02:00
7bb8e4df01 Fix back to manual link 2024-05-23 18:59:08 +00:00
53710378de Fix manual generation and deploy 2024-05-23 17:34:48 +00:00
c833e9ba32 Remove coverage report from html folder and integrate in doc 2024-05-23 16:27:02 +02:00
f5cb46ee29 Add doc-install to Makefile 2024-05-22 12:09:58 +02:00
fa35681abe Add documentation link to readme 2024-05-22 11:39:33 +02:00
b0bd0e6eee Create doc target to build documentation 2024-05-22 11:10:21 +02:00
d43be27821 Remove manual and doc pages 2024-05-22 10:17:49 +02:00
a2853dd2e5 Add Doxygen to generate man and manual pages 2024-05-21 23:38:10 +02:00
0341bd5648 Refactor ArffFiles library as a git submodule only for tests 2024-05-21 11:50:19 +00:00
22b742f068 Convert ArffFile library to header only library 2024-05-21 10:11:33 +02:00
2584e8294d Force mutual information methods to be at least 0
There were cases where a tiny negative number was returned (less than -1e-7)
Fix mst glass test that is affected with this change
2024-05-17 11:15:45 +02:00
291ba0fb0e First functional BoostA2DE with its 1st test 2024-05-16 16:33:33 +02:00
80043d5181 First approach to BoostA2DE::trainModel 2024-05-16 14:32:59 +02:00
677ec5613d Add features used to selectKPairs 2024-05-16 14:18:45 +02:00
cccaa6e0af Complete selectKPairs method & test 2024-05-16 13:46:38 +02:00
2e3e0e0fc2 Add selectKParis method 2024-05-16 11:17:21 +02:00
8784a24898 Extract buildModel method to parent class in Boost 2024-05-15 20:00:44 +02:00
54496c68f1 Create Boost class as Boost<x> classifiers parent 2024-05-15 19:49:15 +02:00
1f236a70db Create BoostA2DE base class 2024-05-15 11:53:17 +02:00
ef3c74633c Conditional Entropy test 2024-05-15 11:28:09 +02:00
7efd95095c Merge pull request 'AnDE' (#28) from AnDE into main
Reviewed-on: #28
2024-05-15 09:16:12 +00:00
0e24135d46 Complete Conditional Mutual Information and test 2024-05-15 11:09:23 +02:00
521bfd2a8e Remove unoptimized implementation of conditionalEntropy 2024-05-15 01:24:27 +02:00
e2e0fb0c40 Implement Conditional Mutual Information 2024-05-15 00:48:02 +02:00
56b62a67cc Change BoostAODE tests results because folding upgrade 2024-05-12 20:23:05 +02:00
c0fc107abb Fix catch2 submodule config 2024-05-12 19:05:36 +02:00
d8c44b3b7c Add tests to check the correct version of the mdlp, folding and json libraries 2024-05-12 12:22:44 +02:00
6ab7cd2cbd Remove submodule catch from tests/lib 2024-05-12 11:05:53 +02:00
b578ea8a2d Remove module lib/catch2 2024-05-12 11:04:42 +02:00
9a752d15dc Change build cmake folder names to Debug & Release 2024-05-09 10:51:52 +02:00
4992685e94 Add devcontainer to repository
Fix update_coverage.py with lcov2.1 output
2024-05-08 06:42:19 +00:00
346b693c79 Update pdf coverage report 2024-05-06 18:28:15 +02:00
164c8bd90c Update changelog 2024-05-06 18:02:18 +02:00
ced29a2c2e Refactor coverage report generation
Add some tests to reach 99%
2024-05-06 17:56:00 +02:00
0ec53f405f Fix mistakes in feature selection in SPnDE
Complete the first A2DE test
Update version number
2024-05-05 11:14:01 +02:00
f806015b29 Implement SPnDE and A2DE 2024-05-05 01:35:17 +02:00
8115f25c06 Fix mispell mistake in doc 2024-05-02 10:53:15 +02:00
618a1e539c Return File Library to /lib as it is needed by Local Discretization (factorize) 2024-04-30 20:31:14 +02:00
7aeffba740 Add list of models to README 2024-04-30 18:59:38 +02:00
e79ea63afb Merge pull request 'convergence_best' (#27) from convergence_best into main
Add convergence_best as hyperparameter to allow to take the last or the best accuracy as the accuracy to compare to in convergence

Reviewed-on: #27
2024-04-30 16:22:08 +00:00
3c7382a93a Enhance tests coverage and report output 2024-04-30 14:00:24 +02:00
b4a222b100 Update gcovr configuration 2024-04-30 12:06:32 +02:00
23ef0cc5f7 Remove catch2 as submodule
Add link to pdf coverage report
2024-04-30 11:02:23 +02:00
793b2d3cd5 Refactor TestUtils to allow partial and shuffle dataset load 2024-04-30 02:11:14 +02:00
ae469b8146 Add hyperparameter convergence_best
move test libraries to test folder
2024-04-30 00:52:09 +02:00
f014928411 Update Makefile actions for coverage 2024-04-21 18:54:13 +02:00
c4b563a339 Add link to the coverage report in the README.md coverage label 2024-04-21 16:44:35 +02:00
49bb0582e6 Add Library Logo 2024-04-21 11:31:27 +02:00
b4c5261e01 Delete .github/workflows/main.yml 2024-04-20 17:54:56 +00:00
b956aa3873 Upgrade version number to 1.0.5
Fix dependency graph
Remove loguru library
2024-04-20 18:00:40 +02:00
1f06631f69 Add check dependencies in make diagrams endpoint 2024-04-19 19:47:37 +02:00
6dd589bd61 Add diagram changes to CHANGELOG 2024-04-19 18:29:43 +02:00
6475f10825 Add class and dependency diagrams 2024-04-19 14:33:00 +02:00
7d906b24d1 Merge pull request 'block_update' (#26) from block_update into main
Reviewed-on: #26
2024-04-15 10:26:50 +00:00
175 changed files with 16509 additions and 30181 deletions

10
.clang-format Normal file
View File

@@ -0,0 +1,10 @@
# .clang-format
---
BasedOnStyle: LLVM
AccessModifierOffset: -4
BreakBeforeBraces: Linux
ColumnLimit: 0
FixNamespaceComments: false
IndentWidth: 4
NamespaceIndentation: All
TabWidth: 4

39
.clang-uml Normal file
View File

@@ -0,0 +1,39 @@
compilation_database_dir: build_Debug
output_directory: diagrams
diagrams:
BayesNet:
type: class
glob:
- bayesnet/*.h
- bayesnet/classifiers/*.h
- bayesnet/classifiers/*.cc
- bayesnet/ensembles/*.h
- bayesnet/ensembles/*.cc
- bayesnet/feature_selection/*.h
- bayesnet/feature_selection/*.cc
- bayesnet/network/*.h
- bayesnet/network/*.cc
- bayesnet/utils/*.h
- bayesnet/utils/*.cc
include:
# Only include entities from the following namespaces
namespaces:
- bayesnet
exclude:
access:
- private
plantuml:
style:
# Apply this style to all classes in the diagram
class: "#aliceblue;line:blue;line.dotted;text:blue"
# Apply this style to all packages in the diagram
package: "#back:grey"
# Make all template instantiation relations point upwards and draw them
# as green and dotted lines
instantiation: "up[#green,dotted]"
cmd: "/usr/bin/plantuml -tsvg \"diagrams/{}.puml\""
before:
- 'title clang-uml class diagram model'
mermaid:
before:
- 'classDiagram'

57
.devcontainer/Dockerfile Normal file
View File

@@ -0,0 +1,57 @@
FROM mcr.microsoft.com/devcontainers/cpp:ubuntu22.04
ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.29.3"
# Optionally install the cmake for vcpkg
COPY ./reinstall-cmake.sh /tmp/
RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \
chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \
fi \
&& rm -f /tmp/reinstall-cmake.sh
# [Optional] Uncomment this section to install additional vcpkg ports.
# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install <your-port-name-here>"
# [Optional] Uncomment this section to install additional packages.
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends wget software-properties-common libdatetime-perl libcapture-tiny-perl libdatetime-format-dateparse-perl libgd-perl
# Add PPA for GCC 13
RUN add-apt-repository ppa:ubuntu-toolchain-r/test
RUN apt-get update
# Install GCC 13.1
RUN apt-get install -y gcc-13 g++-13 doxygen
# Install lcov 2.1
RUN wget --quiet https://github.com/linux-test-project/lcov/releases/download/v2.1/lcov-2.1.tar.gz && \
tar -xvf lcov-2.1.tar.gz && \
cd lcov-2.1 && \
make install
RUN rm lcov-2.1.tar.gz
RUN rm -fr lcov-2.1
# Install Miniconda
RUN mkdir -p /opt/conda
RUN wget --quiet "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh" -O /opt/conda/miniconda.sh && \
bash /opt/conda/miniconda.sh -b -p /opt/miniconda
# Add conda to PATH
ENV PATH=/opt/miniconda/bin:$PATH
# add CXX and CC to the environment with gcc 13
ENV CXX=/usr/bin/g++-13
ENV CC=/usr/bin/gcc-13
# link the last gcov version
RUN rm /usr/bin/gcov
RUN ln -s /usr/bin/gcov-13 /usr/bin/gcov
# change ownership of /opt/miniconda to vscode user
RUN chown -R vscode:vscode /opt/miniconda
USER vscode
RUN conda init
RUN conda install -y -c conda-forge yaml pytorch

View File

@@ -0,0 +1,37 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/cpp
{
"name": "C++",
"build": {
"dockerfile": "Dockerfile"
},
// "features": {
// "ghcr.io/devcontainers/features/conda:1": {}
// }
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "make release && make debug && echo 'Done!'",
// Configure tool-specific properties.
// "customizations": {},
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
"settings": {},
"extensions": [
"ms-vscode.cpptools",
"ms-vscode.cpptools-extension-pack",
"ms-vscode.cpptools-themes",
"ms-vscode.cmake-tools",
"ms-azuretools.vscode-docker",
"jbenden.c-cpp-flylint",
"matepek.vscode-catch2-test-adapter",
"GitHub.copilot"
]
}
}
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}

View File

@@ -0,0 +1,59 @@
#!/usr/bin/env bash
#-------------------------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information.
#-------------------------------------------------------------------------------------------------------------
#
set -e
CMAKE_VERSION=${1:-"none"}
if [ "${CMAKE_VERSION}" = "none" ]; then
echo "No CMake version specified, skipping CMake reinstallation"
exit 0
fi
# Cleanup temporary directory and associated files when exiting the script.
cleanup() {
EXIT_CODE=$?
set +e
if [[ -n "${TMP_DIR}" ]]; then
echo "Executing cleanup of tmp files"
rm -Rf "${TMP_DIR}"
fi
exit $EXIT_CODE
}
trap cleanup EXIT
echo "Installing CMake..."
apt-get -y purge --auto-remove cmake
mkdir -p /opt/cmake
architecture=$(dpkg --print-architecture)
case "${architecture}" in
arm64)
ARCH=aarch64 ;;
amd64)
ARCH=x86_64 ;;
*)
echo "Unsupported architecture ${architecture}."
exit 1
;;
esac
CMAKE_BINARY_NAME="cmake-${CMAKE_VERSION}-linux-${ARCH}.sh"
CMAKE_CHECKSUM_NAME="cmake-${CMAKE_VERSION}-SHA-256.txt"
TMP_DIR=$(mktemp -d -t cmake-XXXXXXXXXX)
echo "${TMP_DIR}"
cd "${TMP_DIR}"
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_BINARY_NAME}" -O
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_CHECKSUM_NAME}" -O
sha256sum -c --ignore-missing "${CMAKE_CHECKSUM_NAME}"
sh "${TMP_DIR}/${CMAKE_BINARY_NAME}" --prefix=/opt/cmake --skip-license
ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
ln -s /opt/cmake/bin/ctest /usr/local/bin/ctest

12
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,12 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for more information:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
# https://containers.dev/guide/dependabot
version: 2
updates:
- package-ecosystem: "devcontainers"
directory: "/"
schedule:
interval: weekly

8
.gitignore vendored
View File

@@ -39,4 +39,10 @@ cmake-build*/**
puml/** puml/**
.vscode/settings.json .vscode/settings.json
sample/build sample/build
**/.DS_Store
docs/manual
docs/man3
docs/man
docs/Doxyfile
.cache
vcpkg_installed

20
.gitmodules vendored
View File

@@ -1,20 +0,0 @@
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"]
path = lib/catch2
main = v2.x
update = merge
url = https://github.com/catchorg/Catch2.git
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
master = master
update = merge
[submodule "lib/folding"]
path = lib/folding
url = https://github.com/rmontanana/folding
main = main
update = merge

View File

@@ -0,0 +1,4 @@
{
"sonarCloudOrganization": "rmontanana",
"projectKey": "rmontanana_BayesNet"
}

8
.vscode/launch.json vendored
View File

@@ -5,7 +5,7 @@
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "sample", "name": "sample",
"program": "${workspaceFolder}/build_release/sample/bayesnet_sample", "program": "${workspaceFolder}/sample/build/bayesnet_sample",
"args": [ "args": [
"${workspaceFolder}/tests/data/glass.arff" "${workspaceFolder}/tests/data/glass.arff"
] ]
@@ -14,11 +14,11 @@
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "test", "name": "test",
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet", "program": "${workspaceFolder}/build_Debug/tests/TestBayesNet",
"args": [ "args": [
"Block Update" "[XBAODE]"
], ],
"cwd": "${workspaceFolder}/build_debug/tests" "cwd": "${workspaceFolder}/build_Debug/tests"
}, },
{ {
"name": "(gdb) Launch", "name": "(gdb) Launch",

View File

@@ -5,7 +5,86 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [unreleased] ## [Unreleased]
## [1.1.0] - 2025-04-27
### Internal
- Add changes to .clang-format to ajust to vscode format style thanks to <https://clang-format-configurator.site/>
- Remove all the dependencies as git submodules and add them as vcpkg dependencies.
- Fix the dependencies versions for this specific BayesNet version.
## [1.0.7] 2025-03-16
### Added
- A new hyperparameter to the BoostAODE class, *alphablock*, to control the way &alpha; is computed, with the last model or with the ensmble built so far. Default value is *false*.
- A new hyperparameter to the SPODE class, *parent*, to set the root node of the model. If no value is set the root parameter of the constructor is used.
- A new hyperparameter to the TAN class, *parent*, to set the root node of the model. If not set the first feature is used as root.
- A new model named XSPODE, an optimized for speed averaged one dependence estimator.
- A new model named XSP2DE, an optimized for speed averaged two dependence estimator.
- A new model named XBAODE, an optimized for speed BoostAODE model.
- A new model named XBA2DE, an optimized for speed BoostA2DE model.
### Internal
- Optimize ComputeCPT method in the Node class.
- Add methods getCount and getMaxCount to the CountingSemaphore class, returning the current count and the maximum count of threads respectively.
### Changed
- Hyperparameter *maxTolerance* in the BoostAODE class is now in [1, 6] range (it was in [1, 4] range before).
## [1.0.6] 2024-11-23
### Fixed
- Prevent existing edges to be added to the network in the `add_edge` method.
- Don't allow to add nodes or edges on already fiited networks.
- Number of threads spawned
- Network class tests
### Added
- Library logo generated with <https://openart.ai> to README.md
- Link to the coverage report in the README.md coverage label.
- *convergence_best* hyperparameter to the BoostAODE class, to control the way the prior accuracy is computed if convergence is set. Default value is *false*.
- SPnDE model.
- A2DE model.
- BoostA2DE model.
- A2DE & SPnDE tests.
- Add tests to reach 99% of coverage.
- Add tests to check the correct version of the mdlp, folding and json libraries.
- Library documentation generated with Doxygen.
- Link to documentation in the README.md.
- Three types of smoothing the Bayesian Network ORIGINAL, LAPLACE and CESTNIK.
### Internal
- Fixed doxygen optional dependency
- Add env parallel variable to Makefile
- Add CountingSemaphore class to manage the number of threads spawned.
- Ignore CUDA language in CMake CodeCoverage module.
- Update mdlp library as a git submodule.
- Create library ShuffleArffFile to limit the number of samples with a parameter and shuffle them.
- Refactor catch2 library location to test/lib
- Refactor loadDataset function in tests.
- Remove conditionalEdgeWeights method in BayesMetrics.
- Refactor Coverage Report generation.
- Add devcontainer to work on apple silicon.
- Change build cmake folder names to Debug & Release.
- Add a Makefile target (doc) to generate the documentation.
- Add a Makefile target (doc-install) to install the documentation.
### Libraries versions
- mdlp: 2.0.1
- Folding: 1.1.0
- json: 3.11
- ArffFiles: 1.1.0
## [1.0.5] 2024-04-20
### Added ### Added
@@ -16,6 +95,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Badges of coverage and code quality (codacy) in README.md. Coverage badge is updated with *make viewcoverage* - Badges of coverage and code quality (codacy) in README.md. Coverage badge is updated with *make viewcoverage*
- Tests to reach 97% of coverage. - Tests to reach 97% of coverage.
- Copyright header to source files. - Copyright header to source files.
- Diagrams to README.md: UML class diagram & dependency diagram
- Action to create diagrams to Makefile: *make diagrams*
### Changed ### Changed
@@ -23,6 +104,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy. - The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy.
- Default hyperparameter values in BoostAODE: bisection is true, maxTolerance is 3, convergence is true - Default hyperparameter values in BoostAODE: bisection is true, maxTolerance is 3, convergence is true
### Removed
- The 'predict_single' hyperparameter from the BoostAODE class.
- The 'repeatSparent' hyperparameter from the BoostAODE class.
## [1.0.4] 2024-03-06 ## [1.0.4] 2024-03-06
### Added ### Added

View File

@@ -0,0 +1,5 @@
# Set the default graph title
set(GRAPHVIZ_GRAPH_NAME "BayesNet dependency graph")
set(GRAPHVIZ_SHARED_LIBS OFF)
set(GRAPHVIZ_STATIC_LIBS ON)

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.20)
project(BayesNet project(BayesNet
VERSION 1.0.4.1 VERSION 1.1.0
DESCRIPTION "Bayesian Network and basic classifiers Library." DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX LANGUAGES CXX
@@ -25,8 +25,12 @@ set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -O0 -g") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
endif()
# Options # Options
# ------- # -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF) option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
@@ -37,7 +41,6 @@ option(INSTALL_GTEST "Enable installation of googletest." OFF)
# CMakes modules # CMakes modules
# -------------- # --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule)
if (CMAKE_BUILD_TYPE STREQUAL "Debug") if (CMAKE_BUILD_TYPE STREQUAL "Debug")
MESSAGE("Debug mode") MESSAGE("Debug mode")
@@ -45,11 +48,12 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CODE_COVERAGE ON) set(CODE_COVERAGE ON)
endif (CMAKE_BUILD_TYPE STREQUAL "Debug") endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
message(STATUS "Languages=${LANGUAGES}")
if (CODE_COVERAGE) if (CODE_COVERAGE)
enable_testing() enable_testing()
include(CodeCoverage) include(CodeCoverage)
MESSAGE("Code coverage enabled") MESSAGE(STATUS "Code coverage enabled")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage") SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE) endif (CODE_COVERAGE)
@@ -59,21 +63,22 @@ endif (ENABLE_CLANG_TIDY)
# External libraries - dependencies of BayesNet # External libraries - dependencies of BayesNet
# --------------------------------------------- # ---------------------------------------------
# include(FetchContent)
add_git_submodule("lib/mdlp") find_package(Torch CONFIG REQUIRED)
add_git_submodule("lib/json") find_package(fimdlp CONFIG REQUIRED)
find_package(nlohmann_json CONFIG REQUIRED)
find_package(folding CONFIG REQUIRED)
# Subdirectories # Subdirectories
# -------------- # --------------
add_subdirectory(config) add_subdirectory(config)
add_subdirectory(lib/Files)
add_subdirectory(bayesnet) add_subdirectory(bayesnet)
# Testing # Testing
# ------- # -------
if (ENABLE_TESTING) if (ENABLE_TESTING)
MESSAGE("Testing enabled") MESSAGE(STATUS "Testing enabled")
add_git_submodule("lib/catch2") find_package(Catch2 CONFIG REQUIRED)
include(CTest) include(CTest)
add_subdirectory(tests) add_subdirectory(tests)
endif (ENABLE_TESTING) endif (ENABLE_TESTING)
@@ -86,3 +91,18 @@ install(TARGETS BayesNet
CONFIGURATIONS Release) CONFIGURATIONS Release)
install(DIRECTORY bayesnet/ DESTINATION include/bayesnet FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h") install(DIRECTORY bayesnet/ DESTINATION include/bayesnet FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h")
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h DESTINATION include/bayesnet CONFIGURATIONS Release) install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h DESTINATION include/bayesnet CONFIGURATIONS Release)
# Documentation
# -------------
find_package(Doxygen)
if (Doxygen_FOUND)
set(DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs)
set(doxyfile_in ${DOC_DIR}/Doxyfile.in)
set(doxyfile ${DOC_DIR}/Doxyfile)
configure_file(${doxyfile_in} ${doxyfile} @ONLY)
doxygen_add_docs(doxygen
WORKING_DIRECTORY ${DOC_DIR}
CONFIG_FILE ${doxyfile})
else (Doxygen_FOUND)
MESSAGE("* Doxygen not found")
endif (Doxygen_FOUND)

143
Makefile
View File

@@ -1,12 +1,22 @@
SHELL := /bin/bash SHELL := /bin/bash
.DEFAULT_GOAL := help .DEFAULT_GOAL := help
.PHONY: viewcoverage coverage setup help install uninstall buildr buildd test clean debug release sample updatebadge .PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean debug release sample updatebadge doc doc-install init clean-test
f_release = build_release f_release = build_Release
f_debug = build_debug f_debug = build_Debug
f_diagrams = diagrams
app_targets = BayesNet app_targets = BayesNet
test_targets = TestBayesNet test_targets = TestBayesNet
n_procs = -j 16 clang-uml = clang-uml
plantuml = plantuml
lcov = lcov
genhtml = genhtml
dot = dot
docsrcdir = docs/manual
mansrcdir = docs/man3
mandestdir = /usr/local/share/man
sed_command_link = 's/e">LCOV -/e"><a href="https:\/\/rmontanana.github.io\/bayesnet">Back to manual<\/a> LCOV -/g'
sed_command_diagram = 's/Diagram"/Diagram" width="100%" height="100%" /g'
define ClearTests define ClearTests
@for t in $(test_targets); do \ @for t in $(test_targets); do \
@@ -31,19 +41,29 @@ setup: ## Install dependencies for tests and coverage
pip install gcovr; \ pip install gcovr; \
sudo dnf install lcov;\ sudo dnf install lcov;\
fi fi
@echo "* You should install plantuml & graphviz for the diagrams"
dependency: ## Create a dependency graph diagram of the project (build/dependency.png) diagrams: ## Create an UML class diagram & dependency of the project (diagrams/BayesNet.png)
@which $(plantuml) || (echo ">>> Please install plantuml"; exit 1)
@which $(dot) || (echo ">>> Please install graphviz"; exit 1)
@which $(clang-uml) || (echo ">>> Please install clang-uml"; exit 1)
@export PLANTUML_LIMIT_SIZE=16384
@echo ">>> Creating UML class diagram of the project...";
@$(clang-uml) -p
@cd $(f_diagrams); \
$(plantuml) -tsvg BayesNet.puml
@echo ">>> Creating dependency graph diagram of the project..."; @echo ">>> Creating dependency graph diagram of the project...";
$(MAKE) debug $(MAKE) debug
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png cd $(f_debug) && cmake .. --graphviz=dependency.dot
@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
buildd: ## Build the debug targets buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) $(n_procs) cmake --build $(f_debug) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
buildr: ## Build the release targets buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) $(n_procs) cmake --build $(f_release) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
clean: ## Clean the tests info clean-test: ## Clean the tests info
@echo ">>> Cleaning Debug BayesNet tests..."; @echo ">>> Cleaning Debug BayesNet tests...";
$(call ClearTests) $(call ClearTests)
@echo ">>> Done"; @echo ">>> Done";
@@ -59,33 +79,56 @@ install: ## Install library
@cmake --install $(f_release) --prefix $(prefix) @cmake --install $(f_release) --prefix $(prefix)
@echo ">>> Done"; @echo ">>> Done";
init: ## Initialize the project installing dependencies
@echo ">>> Installing dependencies"
@vcpkg install
@echo ">>> Done";
clean: ## Clean the project
@echo ">>> Cleaning the project..."
@if test -d build_Debug ; then echo "- Deleting build_Debug folder" ; rm -rf build_Debug; fi
@if test -d build_Release ; then echo "- Deleting build_Release folder" ; rm -rf build_Release; fi
@if test -f CMakeCache.txt ; then echo "- Deleting CMakeCache.txt"; rm -f CMakeCache.txt; fi
@if test -d vcpkg_installed ; then echo "- Deleting vcpkg_installed folder" ; rm -rf vcpkg_installed; fi
@$(MAKE) clean-test
@echo ">>> Done";
debug: ## Build a debug version of the project debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet..."; @echo ">>> Building Debug BayesNet...";
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi @if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@mkdir $(f_debug); @mkdir $(f_debug);
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON @cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
@echo ">>> Done"; @echo ">>> Done";
release: ## Build a Release version of the project release: ## Build a Release version of the project
@echo ">>> Building Release BayesNet..."; @echo ">>> Building Release BayesNet...";
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi @if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release); @mkdir $(f_release);
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release @cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
@echo ">>> Done"; @echo ">>> Done";
fname = "tests/data/iris.arff" fname = "tests/data/iris.arff"
sample: ## Build sample sample: ## Build sample
@echo ">>> Building Sample..."; @echo ">>> Building Sample...";
@if [ -d ./sample/build ]; then rm -rf ./sample/build; fi @if [ -d ./sample/build ]; then rm -rf ./sample/build; fi
@cd sample && cmake -B build -S . && cmake --build build -t bayesnet_sample @cd sample && cmake -B build -S . -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake && \
cmake --build build -t bayesnet_sample
sample/build/bayesnet_sample $(fname) sample/build/bayesnet_sample $(fname)
@echo ">>> Done"; @echo ">>> Done";
fname = "tests/data/iris.arff"
sample2: ## Build sample2
@echo ">>> Building Sample...";
@if [ -d ./sample/build ]; then rm -rf ./sample/build; fi
@cd sample && cmake -B build -S . -D CMAKE_BUILD_TYPE=Debug && cmake --build build -t bayesnet_sample_xspode
sample/build/bayesnet_sample_xspode $(fname)
@echo ">>> Done";
opt = "" opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet & Platform tests..."; @echo ">>> Running BayesNet tests...";
@$(MAKE) clean @$(MAKE) clean-test
@cmake --build $(f_debug) -t $(test_targets) $(n_procs) @cmake --build $(f_debug) -t $(test_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
@for t in $(test_targets); do \ @for t in $(test_targets); do \
echo ">>> Running $$t...";\ echo ">>> Running $$t...";\
if [ -f $(f_debug)/tests/$$t ]; then \ if [ -f $(f_debug)/tests/$$t ]; then \
@@ -98,31 +141,71 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
coverage: ## Run tests and generate coverage report (build/index.html) coverage: ## Run tests and generate coverage report (build/index.html)
@echo ">>> Building tests with coverage..." @echo ">>> Building tests with coverage..."
@$(MAKE) test @which $(lcov) || (echo ">>ease install lcov"; exit 1)
@gcovr $(f_debug)/tests @if [ ! -f $(f_debug)/tests/coverage.info ] ; then $(MAKE) test ; fi
@echo ">>> Done";
viewcoverage: ## Run tests, generate coverage report and upload it to codecov (build/index.html)
@echo ">>> Building tests with coverage..."
@$(MAKE) coverage
@echo ">>> Building report..." @echo ">>> Building report..."
@cd $(f_debug)/tests; \ @cd $(f_debug)/tests; \
lcov --directory . --capture --output-file coverage.info >/dev/null 2>&1; \ $(lcov) --directory CMakeFiles --capture --demangle-cpp --ignore-errors source,source --output-file coverage.info >/dev/null 2>&1; \
lcov --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \ $(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
lcov --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \ $(lcov) --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \ $(lcov) --remove coverage.info 'include/*' --output-file coverage.info >/dev/null 2>&1; \
lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \ $(lcov) --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \ $(lcov) --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
genhtml coverage.info --output-directory coverage >/dev/null 2>&1; $(lcov) --remove coverage.info 'bayesnet/utils/loguru.*' --ignore-errors unused --output-file coverage.info >/dev/null 2>&1; \
$(lcov) --remove coverage.info '/opt/miniconda/*' --ignore-errors unused --output-file coverage.info >/dev/null 2>&1; \
$(lcov) --summary coverage.info
@$(MAKE) updatebadge @$(MAKE) updatebadge
@xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null @echo ">>> Done";
viewcoverage: ## View the html coverage report
@which $(genhtml) >/dev/null || (echo ">>> Please install lcov (genhtml not found)"; exit 1)
@if [ ! -d $(docsrcdir)/coverage ]; then mkdir -p $(docsrcdir)/coverage; fi
@if [ ! -f $(f_debug)/tests/coverage.info ]; then \
echo ">>> No coverage.info file found. Run make coverage first!"; \
exit 1; \
fi
@$(genhtml) $(f_debug)/tests/coverage.info --demangle-cpp --output-directory $(docsrcdir)/coverage --title "BayesNet Coverage Report" -s -k -f --legend >/dev/null 2>&1;
@xdg-open $(docsrcdir)/coverage/index.html || open $(docsrcdir)/coverage/index.html 2>/dev/null
@echo ">>> Done"; @echo ">>> Done";
updatebadge: ## Update the coverage badge in README.md updatebadge: ## Update the coverage badge in README.md
@which python || (echo ">>> Please install python"; exit 1)
@if [ ! -f $(f_debug)/tests/coverage.info ]; then \
echo ">>> No coverage.info file found. Run make coverage first!"; \
exit 1; \
fi
@echo ">>> Updating coverage badge..." @echo ">>> Updating coverage badge..."
@env python update_coverage.py $(f_debug)/tests @env python update_coverage.py $(f_debug)/tests
@echo ">>> Done"; @echo ">>> Done";
doc: ## Generate documentation
@echo ">>> Generating documentation..."
@cmake --build $(f_release) -t doxygen
@cp -rp diagrams $(docsrcdir)
@
@if [ "$(shell uname)" = "Darwin" ]; then \
sed -i "" $(sed_command_link) $(docsrcdir)/coverage/index.html ; \
sed -i "" $(sed_command_diagram) $(docsrcdir)/index.html ; \
else \
sed -i $(sed_command_link) $(docsrcdir)/coverage/index.html ; \
sed -i $(sed_command_diagram) $(docsrcdir)/index.html ; \
fi
@echo ">>> Done";
docdir = ""
doc-install: ## Install documentation
@echo ">>> Installing documentation..."
@if [ "$(docdir)" = "" ]; then \
echo "docdir parameter has to be set when calling doc-install, i.e. docdir=../bayesnet_help"; \
exit 1; \
fi
@if [ ! -d $(docdir) ]; then \
@$(MAKE) doc; \
fi
@cp -rp $(docsrcdir)/* $(docdir)
@sudo cp -rp $(mansrcdir) $(mandestdir)
@echo ">>> Done";
help: ## Show help message help: ## Show help message
@IFS=$$'\n' ; \ @IFS=$$'\n' ; \
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \ help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \

161
README.md
View File

@@ -1,39 +1,114 @@
# BayesNet # <img src="logo.png" alt="logo" width="50"/> BayesNet
![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=flat&logo=c%2B%2B&logoColor=white) ![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=flat&logo=c%2B%2B&logoColor=white)
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](<https://opensource.org/licenses/MIT>) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](<https://opensource.org/licenses/MIT>)
![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000) ![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea) [![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
![Static Badge](https://img.shields.io/badge/Coverage-97,2%25-green) [![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es&logo=gitea)
[![Coverage Badge](https://img.shields.io/badge/Coverage-99,1%25-green)](https://gitea.rmontanana.es/rmontanana/BayesNet)
[![DOI](https://zenodo.org/badge/667782806.svg)](https://doi.org/10.5281/zenodo.14210344)
Bayesian Network Classifiers using libtorch from scratch Bayesian Network Classifiers library
## Dependencies
The only external dependency is [libtorch](https://pytorch.org/cppdocs/installing.html) which can be installed with the following commands:
```bash
wget https://download.pytorch.org/libtorch/nightly/cpu/libtorch-shared-with-deps-latest.zip
unzip libtorch-shared-with-deps-latest.zips
```
## Setup ## Setup
### Using the vcpkg library
You can use the library with the vcpkg library manager. In your project you have to add the following files:
#### vcpkg.json
```json
{
"name": "sample-project",
"version-string": "0.1.0",
"dependencies": [
"bayesnet"
]
}
```
#### vcpkg-configuration.json
```json
{
"registries": [
{
"kind": "git",
"repository": "https://github.com/rmontanana/vcpkg-stash",
"baseline": "393efa4e74e053b6f02c4ab03738c8fe796b28e5",
"packages": [
"folding",
"bayesnet",
"arff-files",
"fimdlp",
"libtorch-bin"
]
}
],
"default-registry": {
"kind": "git",
"repository": "https://github.com/microsoft/vcpkg",
"baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605"
}
}
```
#### CMakeLists.txt
You have to include the following lines in your `CMakeLists.txt` file:
```cmake
find_package(bayesnet CONFIG REQUIRED)
add_executable(myapp main.cpp)
target_link_libraries(myapp PRIVATE bayesnet::bayesnet)
```
After that, you can use the `vcpkg` command to install the dependencies:
```bash
vcpkg install
```
**Note: In the `sample` folder you can find a sample application that uses the library. You can use it as a reference to create your own application.**
## Playing with the library
The dependencies are managed with [vcpkg](https://vcpkg.io/) and supported by a private vcpkg repository in [https://github.com/rmontanana/vcpkg-stash](https://github.com/rmontanana/vcpkg-stash).
### Getting the code
```bash
git clone https://github.com/doctorado-ml/bayesnet
```
Once you have the code, you can use the `make` command to build the project. The `Makefile` is used to manage the build process and it will automatically download and install the dependencies.
### Release ### Release
```bash ```bash
make release make init # Install dependencies
make buildr make release # Build the release version
sudo make install make buildr # compile and link the release version
``` ```
### Debug & Tests ### Debug & Tests
```bash ```bash
make debug make init # Install dependencies
make test make debug # Build the debug version
make coverage make test # Run the tests
```
### Coverage
```bash
make coverage # Run the tests with coverage
make viewcoverage # View the coverage report in the browser
``` ```
### Sample app ### Sample app
@@ -47,4 +122,48 @@ make sample fname=tests/data/glass.arff
## Models ## Models
### [BoostAODE](docs/BoostAODE.md) #### - TAN
#### - KDB
#### - SPODE
#### - SPnDE
#### - AODE
#### - A2DE
#### - [BoostAODE](docs/BoostAODE.md)
#### - XBAODE
#### - BoostA2DE
#### - XBA2DE
### With Local Discretization
#### - TANLd
#### - KDBLd
#### - SPODELd
#### - AODELd
## Documentation
### [Manual](https://rmontanana.github.io/bayesnet/)
### [Coverage report](https://rmontanana.github.io/bayesnet/coverage/index.html)
## Diagrams
### UML Class Diagram
![BayesNet UML Class Diagram](diagrams/BayesNet.svg)
### Dependency Diagram
![BayesNet Dependency Diagram](diagrams/dependency.svg)

View File

@@ -8,17 +8,19 @@
#include <vector> #include <vector>
#include <torch/torch.h> #include <torch/torch.h>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "bayesnet/network/Network.h"
namespace bayesnet { namespace bayesnet {
enum status_t { NORMAL, WARNING, ERROR }; enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier { class BaseClassifier {
public: public:
// X is nxm std::vector, y is nx1 std::vector
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default; virtual ~BaseClassifier() = default;
// X is nxm std::vector, y is nx1 std::vector
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
// X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
torch::Tensor virtual predict(torch::Tensor& X) = 0; torch::Tensor virtual predict(torch::Tensor& X) = 0;
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0; std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
torch::Tensor virtual predict_proba(torch::Tensor& X) = 0; torch::Tensor virtual predict_proba(torch::Tensor& X) = 0;
@@ -26,8 +28,8 @@ namespace bayesnet {
status_t virtual getStatus() const = 0; status_t virtual getStatus() const = 0;
float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0; float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0; float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes()const = 0; int virtual getNumberOfNodes() const = 0;
int virtual getNumberOfEdges()const = 0; int virtual getNumberOfEdges() const = 0;
int virtual getNumberOfStates() const = 0; int virtual getNumberOfStates() const = 0;
int virtual getClassNumStates() const = 0; int virtual getClassNumStates() const = 0;
std::vector<std::string> virtual show() const = 0; std::vector<std::string> virtual show() const = 0;
@@ -35,11 +37,13 @@ namespace bayesnet {
virtual std::string getVersion() = 0; virtual std::string getVersion() = 0;
std::vector<std::string> virtual topological_order() = 0; std::vector<std::string> virtual topological_order() = 0;
std::vector<std::string> virtual getNotes() const = 0; std::vector<std::string> virtual getNotes() const = 0;
std::string virtual dump_cpt()const = 0; std::string virtual dump_cpt() const = 0;
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0; virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; } std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
protected: protected:
virtual void trainModel(const torch::Tensor& weights) = 0; virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
std::vector<std::string> validHyperparameters; std::vector<std::string> validHyperparameters;
std::vector<std::string> notes; // Used to store messages occurred during the fit process
status_t status = NORMAL;
}; };
} }

View File

@@ -1,6 +1,6 @@
include_directories( include_directories(
${BayesNet_SOURCE_DIR}/lib/mdlp ${BayesNet_SOURCE_DIR}/lib/log
${BayesNet_SOURCE_DIR}/lib/Files ${BayesNet_SOURCE_DIR}/lib/mdlp/src
${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include ${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR} ${BayesNet_SOURCE_DIR}
@@ -10,4 +10,4 @@ include_directories(
file(GLOB_RECURSE Sources "*.cc") file(GLOB_RECURSE Sources "*.cc")
add_library(BayesNet ${Sources}) add_library(BayesNet ${Sources})
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}") target_link_libraries(BayesNet fimdlp "${TORCH_LIBRARIES}")

View File

@@ -10,8 +10,7 @@
namespace bayesnet { namespace bayesnet {
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {} Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted"; Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{ {
this->features = features; this->features = features;
this->className = className; this->className = className;
@@ -23,7 +22,7 @@ namespace bayesnet {
metrics = Metrics(dataset, features, className, n_classes); metrics = Metrics(dataset, features, className, n_classes);
model.initialize(); model.initialize();
buildModel(weights); buildModel(weights);
trainModel(weights); trainModel(weights, smoothing);
fitted = true; fitted = true;
return *this; return *this;
} }
@@ -41,20 +40,20 @@ namespace bayesnet {
throw std::runtime_error(oss.str()); throw std::runtime_error(oss.str());
} }
} }
void Classifier::trainModel(const torch::Tensor& weights) void Classifier::trainModel(const torch::Tensor& weights, Smoothing_t smoothing)
{ {
model.fit(dataset, weights, features, className, states); model.fit(dataset, weights, features, className, states, smoothing);
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
dataset = X; dataset = X;
buildDataset(y); buildDataset(y);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights, smoothing);
} }
// X is nxm where n is the number of features and m the number of samples // X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32); dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
for (int i = 0; i < X.size(); ++i) { for (int i = 0; i < X.size(); ++i) {
@@ -63,18 +62,18 @@ namespace bayesnet {
auto ytmp = torch::tensor(y, torch::kInt32); auto ytmp = torch::tensor(y, torch::kInt32);
buildDataset(ytmp); buildDataset(ytmp);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights, smoothing);
} }
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
this->dataset = dataset; this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights); return build(features, className, states, weights, smoothing);
} }
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
this->dataset = dataset; this->dataset = dataset;
return build(features, className, states, weights); return build(features, className, states, weights, smoothing);
} }
void Classifier::checkFitParameters() void Classifier::checkFitParameters()
{ {

View File

@@ -8,7 +8,6 @@
#define CLASSIFIER_H #define CLASSIFIER_H
#include <torch/torch.h> #include <torch/torch.h>
#include "bayesnet/utils/BayesMetrics.h" #include "bayesnet/utils/BayesMetrics.h"
#include "bayesnet/network/Network.h"
#include "bayesnet/BaseClassifier.h" #include "bayesnet/BaseClassifier.h"
namespace bayesnet { namespace bayesnet {
@@ -16,10 +15,10 @@ namespace bayesnet {
public: public:
Classifier(Network model); Classifier(Network model);
virtual ~Classifier() = default; virtual ~Classifier() = default;
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override; Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override;
void addNodes(); void addNodes();
int getNumberOfNodes() const override; int getNumberOfNodes() const override;
int getNumberOfEdges() const override; int getNumberOfEdges() const override;
@@ -47,14 +46,13 @@ namespace bayesnet {
std::string className; std::string className;
std::map<std::string, std::vector<int>> states; std::map<std::string, std::vector<int>> states;
torch::Tensor dataset; // (n+1)xm tensor torch::Tensor dataset; // (n+1)xm tensor
status_t status = NORMAL;
std::vector<std::string> notes; // Used to store messages occurred during the fit process
void checkFitParameters(); void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0; virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
void buildDataset(torch::Tensor& y); void buildDataset(torch::Tensor& y);
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
private: private:
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights); Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
}; };
} }
#endif #endif

View File

@@ -3,7 +3,7 @@
// SPDX-FileType: SOURCE // SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// *************************************************************** // ***************************************************************
#include "bayesnet/utils/bayesnetUtils.h"
#include "KDB.h" #include "KDB.h"
namespace bayesnet { namespace bayesnet {

View File

@@ -7,15 +7,14 @@
#ifndef KDB_H #ifndef KDB_H
#define KDB_H #define KDB_H
#include <torch/torch.h> #include <torch/torch.h>
#include "bayesnet/utils/bayesnetUtils.h"
#include "Classifier.h" #include "Classifier.h"
namespace bayesnet { namespace bayesnet {
class KDB : public Classifier { class KDB : public Classifier {
private: private:
int k; int k;
float theta; float theta;
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
protected: protected:
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
public: public:
explicit KDB(int k, float theta = 0.03); explicit KDB(int k, float theta = 0.03);

View File

@@ -8,7 +8,7 @@
namespace bayesnet { namespace bayesnet {
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {} KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
@@ -19,7 +19,7 @@ namespace bayesnet {
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
KDB::fit(dataset, features, className, states); KDB::fit(dataset, features, className, states, smoothing);
states = localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
return *this; return *this;
} }

View File

@@ -15,7 +15,7 @@ namespace bayesnet {
public: public:
explicit KDBLd(int k); explicit KDBLd(int k);
virtual ~KDBLd() = default; virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override; std::vector<std::string> graph(const std::string& name = "KDB") const override;
torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; }; static inline std::string version() { return "0.0.1"; };

View File

@@ -4,7 +4,6 @@
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// *************************************************************** // ***************************************************************
#include <ArffFiles.h>
#include "Proposal.h" #include "Proposal.h"
namespace bayesnet { namespace bayesnet {
@@ -54,8 +53,7 @@ namespace bayesnet {
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>()); yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
} }
} }
auto arff = ArffFiles(); auto yxv = factorize(yJoinParents);
auto yxv = arff.factorize(yJoinParents);
auto xvf_ptr = Xf.index({ index }).data_ptr<float>(); auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1)); auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
discretizers[feature]->fit(xvf, yxv); discretizers[feature]->fit(xvf, yxv);
@@ -72,7 +70,7 @@ namespace bayesnet {
states[pFeatures[index]] = xStates; states[pFeatures[index]] = xStates;
} }
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble); const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
model.fit(pDataset, weights, pFeatures, pClassName, states); model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::ORIGINAL);
} }
return states; return states;
} }
@@ -113,4 +111,19 @@ namespace bayesnet {
} }
return Xtd; return Xtd;
} }
std::vector<int> Proposal::factorize(const std::vector<std::string>& labels_t)
{
std::vector<int> yy;
yy.reserve(labels_t.size());
std::map<std::string, int> labelMap;
int i = 0;
for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
bool allDigits = std::all_of(label.begin(), label.end(), ::isdigit);
}
yy.push_back(labelMap[label]);
}
return yy;
}
} }

View File

@@ -9,7 +9,7 @@
#include <string> #include <string>
#include <map> #include <map>
#include <torch/torch.h> #include <torch/torch.h>
#include <CPPFImdlp.h> #include <fimdlp/CPPFImdlp.h>
#include "bayesnet/network/Network.h" #include "bayesnet/network/Network.h"
#include "Classifier.h" #include "Classifier.h"
@@ -27,6 +27,7 @@ namespace bayesnet {
torch::Tensor y; // y discrete nx1 tensor torch::Tensor y; // y discrete nx1 tensor
map<std::string, mdlp::CPPFImdlp*> discretizers; map<std::string, mdlp::CPPFImdlp*> discretizers;
private: private:
std::vector<int> factorize(const std::vector<std::string>& labels_t);
torch::Tensor& pDataset; // (n+1)xm tensor torch::Tensor& pDataset; // (n+1)xm tensor
std::vector<std::string>& pFeatures; std::vector<std::string>& pFeatures;
std::string& pClassName; std::string& pClassName;

View File

@@ -8,14 +8,29 @@
namespace bayesnet { namespace bayesnet {
SPODE::SPODE(int root) : Classifier(Network()), root(root) {} SPODE::SPODE(int root) : Classifier(Network()), root(root)
{
validHyperparameters = { "parent" };
}
void SPODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("parent")) {
root = hyperparameters["parent"];
hyperparameters.erase("parent");
}
Classifier::setHyperparameters(hyperparameters);
}
void SPODE::buildModel(const torch::Tensor& weights) void SPODE::buildModel(const torch::Tensor& weights)
{ {
// 0. Add all nodes to the model // 0. Add all nodes to the model
addNodes(); addNodes();
// 1. Add edges from the class node to all other nodes // 1. Add edges from the class node to all other nodes
// 2. Add edges from the root node to all other nodes // 2. Add edges from the root node to all other nodes
if (root >= static_cast<int>(features.size())) {
throw std::invalid_argument("The parent node is not in the dataset");
}
for (int i = 0; i < static_cast<int>(features.size()); ++i) { for (int i = 0; i < static_cast<int>(features.size()); ++i) {
model.addEdge(className, features[i]); model.addEdge(className, features[i]);
if (i != root) { if (i != root) {

View File

@@ -10,14 +10,15 @@
namespace bayesnet { namespace bayesnet {
class SPODE : public Classifier { class SPODE : public Classifier {
private:
int root;
protected:
void buildModel(const torch::Tensor& weights) override;
public: public:
explicit SPODE(int root); explicit SPODE(int root);
virtual ~SPODE() = default; virtual ~SPODE() = default;
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
std::vector<std::string> graph(const std::string& name = "SPODE") const override; std::vector<std::string> graph(const std::string& name = "SPODE") const override;
protected:
void buildModel(const torch::Tensor& weights) override;
private:
int root;
}; };
} }
#endif #endif

View File

@@ -8,25 +8,25 @@
namespace bayesnet { namespace bayesnet {
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {} SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
checkInput(X_, y_); checkInput(X_, y_);
Xf = X_; Xf = X_;
y = y_; y = y_;
return commonFit(features_, className_, states_); return commonFit(features_, className_, states_, smoothing);
} }
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
if (!torch::is_floating_point(dataset)) { if (!torch::is_floating_point(dataset)) {
throw std::runtime_error("Dataset must be a floating point tensor"); throw std::runtime_error("Dataset must be a floating point tensor");
} }
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone(); Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32); y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
return commonFit(features_, className_, states_); return commonFit(features_, className_, states_, smoothing);
} }
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
features = features_; features = features_;
className = className_; className = className_;
@@ -34,7 +34,7 @@ namespace bayesnet {
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(dataset, features, className, states); SPODE::fit(dataset, features, className, states, smoothing);
states = localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
return *this; return *this;
} }

View File

@@ -14,10 +14,10 @@ namespace bayesnet {
public: public:
explicit SPODELd(int root); explicit SPODELd(int root);
virtual ~SPODELd() = default; virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states); SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
std::vector<std::string> graph(const std::string& name = "SPODE") const override; std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; }; static inline std::string version() { return "0.0.1"; };
}; };

View File

@@ -0,0 +1,38 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include "SPnDE.h"
namespace bayesnet {
SPnDE::SPnDE(std::vector<int> parents) : Classifier(Network()), parents(parents) {}
void SPnDE::buildModel(const torch::Tensor& weights)
{
// 0. Add all nodes to the model
addNodes();
std::vector<int> attributes;
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
if (std::find(parents.begin(), parents.end(), i) == parents.end()) {
attributes.push_back(i);
}
}
// 1. Add edges from the class node to all other nodes
// 2. Add edges from the parents nodes to all other nodes
for (const auto& attribute : attributes) {
model.addEdge(className, features[attribute]);
for (const auto& root : parents) {
model.addEdge(features[root], features[attribute]);
}
}
}
std::vector<std::string> SPnDE::graph(const std::string& name) const
{
return model.graph(name);
}
}

View File

@@ -0,0 +1,26 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef SPnDE_H
#define SPnDE_H
#include <vector>
#include "Classifier.h"
namespace bayesnet {
class SPnDE : public Classifier {
public:
explicit SPnDE(std::vector<int> parents);
virtual ~SPnDE() = default;
std::vector<std::string> graph(const std::string& name = "SPnDE") const override;
protected:
void buildModel(const torch::Tensor& weights) override;
private:
std::vector<int> parents;
};
}
#endif

View File

@@ -7,8 +7,20 @@
#include "TAN.h" #include "TAN.h"
namespace bayesnet { namespace bayesnet {
TAN::TAN() : Classifier(Network()) {} TAN::TAN() : Classifier(Network())
{
validHyperparameters = { "parent" };
}
void TAN::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("parent")) {
parent = hyperparameters["parent"];
hyperparameters.erase("parent");
}
Classifier::setHyperparameters(hyperparameters);
}
void TAN::buildModel(const torch::Tensor& weights) void TAN::buildModel(const torch::Tensor& weights)
{ {
// 0. Add all nodes to the model // 0. Add all nodes to the model
@@ -23,7 +35,10 @@ namespace bayesnet {
mi.push_back({ i, mi_value }); mi.push_back({ i, mi_value });
} }
sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;}); sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;});
auto root = mi[mi.size() - 1].first; auto root = parent == -1 ? mi[mi.size() - 1].first : parent;
if (root >= static_cast<int>(features.size())) {
throw std::invalid_argument("The parent node is not in the dataset");
}
// 2. Compute mutual information between each feature and the class // 2. Compute mutual information between each feature and the class
auto weights_matrix = metrics.conditionalEdge(weights); auto weights_matrix = metrics.conditionalEdge(weights);
// 3. Compute the maximum spanning tree // 3. Compute the maximum spanning tree

View File

@@ -9,13 +9,15 @@
#include "Classifier.h" #include "Classifier.h"
namespace bayesnet { namespace bayesnet {
class TAN : public Classifier { class TAN : public Classifier {
private:
protected:
void buildModel(const torch::Tensor& weights) override;
public: public:
TAN(); TAN();
virtual ~TAN() = default; virtual ~TAN() = default;
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
std::vector<std::string> graph(const std::string& name = "TAN") const override; std::vector<std::string> graph(const std::string& name = "TAN") const override;
protected:
void buildModel(const torch::Tensor& weights) override;
private:
int parent = -1;
}; };
} }
#endif #endif

View File

@@ -8,7 +8,7 @@
namespace bayesnet { namespace bayesnet {
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {} TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
@@ -19,7 +19,7 @@ namespace bayesnet {
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
TAN::fit(dataset, features, className, states); TAN::fit(dataset, features, className, states, smoothing);
states = localDiscretizationProposal(states, model); states = localDiscretizationProposal(states, model);
return *this; return *this;

View File

@@ -15,10 +15,9 @@ namespace bayesnet {
public: public:
TANLd(); TANLd();
virtual ~TANLd() = default; virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override; TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
std::vector<std::string> graph(const std::string& name = "TAN") const override; std::vector<std::string> graph(const std::string& name = "TANLd") const override;
torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };
}; };
} }
#endif // !TANLD_H #endif // !TANLD_H

View File

@@ -0,0 +1,575 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include "XSP2DE.h"
#include <pthread.h> // for pthread_setname_np on linux
#include <cassert>
#include <cmath>
#include <limits>
#include <stdexcept>
#include <iostream>
#include "bayesnet/utils/TensorUtils.h"
namespace bayesnet {
// --------------------------------------
// Constructor
// --------------------------------------
XSp2de::XSp2de(int spIndex1, int spIndex2)
: superParent1_{ spIndex1 }
, superParent2_{ spIndex2 }
, nFeatures_{0}
, statesClass_{0}
, alpha_{1.0}
, initializer_{1.0}
, semaphore_{ CountingSemaphore::getInstance() }
, Classifier(Network())
{
validHyperparameters = { "parent1", "parent2" };
}
// --------------------------------------
// setHyperparameters
// --------------------------------------
void XSp2de::setHyperparameters(const nlohmann::json &hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("parent1")) {
superParent1_ = hyperparameters["parent1"];
hyperparameters.erase("parent1");
}
if (hyperparameters.contains("parent2")) {
superParent2_ = hyperparameters["parent2"];
hyperparameters.erase("parent2");
}
// Hand off anything else to base Classifier
Classifier::setHyperparameters(hyperparameters);
}
// --------------------------------------
// fitx
// --------------------------------------
void XSp2de::fitx(torch::Tensor & X, torch::Tensor & y,
torch::Tensor & weights_, const Smoothing_t smoothing)
{
m = X.size(1); // number of samples
n = X.size(0); // number of features
dataset = X;
// Build the dataset in your environment if needed:
buildDataset(y);
// Construct the data structures needed for counting
buildModel(weights_);
// Accumulate counts & convert to probabilities
trainModel(weights_, smoothing);
fitted = true;
}
// --------------------------------------
// buildModel
// --------------------------------------
void XSp2de::buildModel(const torch::Tensor &weights)
{
nFeatures_ = n;
// Derive the number of states for each feature from the dataset
// states_[f] = max value in dataset[f] + 1.
states_.resize(nFeatures_);
for (int f = 0; f < nFeatures_; f++) {
// This is naive: we take max in feature f. You might adapt for real data.
states_[f] = dataset[f].max().item<int>() + 1;
}
// Class states:
statesClass_ = dataset[-1].max().item<int>() + 1;
// Initialize the class counts
classCounts_.resize(statesClass_, 0.0);
// For sp1 -> p(sp1Val| c)
sp1FeatureCounts_.resize(states_[superParent1_] * statesClass_, 0.0);
// For sp2 -> p(sp2Val| c)
sp2FeatureCounts_.resize(states_[superParent2_] * statesClass_, 0.0);
// For child features, we store p(childVal | c, sp1Val, sp2Val).
// childCounts_ will hold raw counts. Well gather them in one big vector.
// We need an offset for each feature.
childOffsets_.resize(nFeatures_, -1);
int totalSize = 0;
for (int f = 0; f < nFeatures_; f++) {
if (f == superParent1_ || f == superParent2_) {
// skip the superparents
childOffsets_[f] = -1;
continue;
}
childOffsets_[f] = totalSize;
// block size for a single child f: states_[f] * statesClass_
// * states_[superParent1_]
// * states_[superParent2_].
totalSize += (states_[f] * statesClass_
* states_[superParent1_]
* states_[superParent2_]);
}
childCounts_.resize(totalSize, 0.0);
}
// --------------------------------------
// trainModel
// --------------------------------------
void XSp2de::trainModel(const torch::Tensor &weights,
const bayesnet::Smoothing_t smoothing)
{
// Accumulate raw counts
for (int i = 0; i < m; i++) {
std::vector<int> instance(nFeatures_ + 1);
for (int f = 0; f < nFeatures_; f++) {
instance[f] = dataset[f][i].item<int>();
}
instance[nFeatures_] = dataset[-1][i].item<int>(); // class
double w = weights[i].item<double>();
addSample(instance, w);
}
// Choose alpha based on smoothing:
switch (smoothing) {
case bayesnet::Smoothing_t::ORIGINAL:
alpha_ = 1.0 / m;
break;
case bayesnet::Smoothing_t::LAPLACE:
alpha_ = 1.0;
break;
default:
alpha_ = 0.0; // no smoothing
}
// Large initializer factor for numerical stability
initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
// Convert raw counts to probabilities
computeProbabilities();
}
// --------------------------------------
// addSample
// --------------------------------------
void XSp2de::addSample(const std::vector<int> &instance, double weight)
{
if (weight <= 0.0)
return;
int c = instance.back();
// increment classCounts
classCounts_[c] += weight;
int sp1Val = instance[superParent1_];
int sp2Val = instance[superParent2_];
// p(sp1|c)
sp1FeatureCounts_[sp1Val * statesClass_ + c] += weight;
// p(sp2|c)
sp2FeatureCounts_[sp2Val * statesClass_ + c] += weight;
// p(childVal| c, sp1Val, sp2Val)
for (int f = 0; f < nFeatures_; f++) {
if (f == superParent1_ || f == superParent2_)
continue;
int childVal = instance[f];
int offset = childOffsets_[f];
// block layout:
// offset + (sp1Val*(states_[sp2_]* states_[f]* statesClass_))
// + (sp2Val*(states_[f]* statesClass_))
// + childVal*(statesClass_)
// + c
int blockSizeSp2 = states_[superParent2_]
* states_[f]
* statesClass_;
int blockSizeChild = states_[f] * statesClass_;
int idx = offset
+ sp1Val*blockSizeSp2
+ sp2Val*blockSizeChild
+ childVal*statesClass_
+ c;
childCounts_[idx] += weight;
}
}
// --------------------------------------
// computeProbabilities
// --------------------------------------
void XSp2de::computeProbabilities()
{
double totalCount = std::accumulate(classCounts_.begin(),
classCounts_.end(), 0.0);
// classPriors_
classPriors_.resize(statesClass_, 0.0);
if (totalCount <= 0.0) {
// fallback => uniform
double unif = 1.0 / static_cast<double>(statesClass_);
for (int c = 0; c < statesClass_; c++) {
classPriors_[c] = unif;
}
} else {
for (int c = 0; c < statesClass_; c++) {
classPriors_[c] =
(classCounts_[c] + alpha_)
/ (totalCount + alpha_ * statesClass_);
}
}
// p(sp1Val| c)
sp1FeatureProbs_.resize(sp1FeatureCounts_.size());
int sp1Card = states_[superParent1_];
for (int spVal = 0; spVal < sp1Card; spVal++) {
for (int c = 0; c < statesClass_; c++) {
double denom = classCounts_[c] + alpha_ * sp1Card;
double num = sp1FeatureCounts_[spVal * statesClass_ + c] + alpha_;
sp1FeatureProbs_[spVal * statesClass_ + c] =
(denom <= 0.0 ? 0.0 : num / denom);
}
}
// p(sp2Val| c)
sp2FeatureProbs_.resize(sp2FeatureCounts_.size());
int sp2Card = states_[superParent2_];
for (int spVal = 0; spVal < sp2Card; spVal++) {
for (int c = 0; c < statesClass_; c++) {
double denom = classCounts_[c] + alpha_ * sp2Card;
double num = sp2FeatureCounts_[spVal * statesClass_ + c] + alpha_;
sp2FeatureProbs_[spVal * statesClass_ + c] =
(denom <= 0.0 ? 0.0 : num / denom);
}
}
// p(childVal| c, sp1Val, sp2Val)
childProbs_.resize(childCounts_.size());
int offset = 0;
for (int f = 0; f < nFeatures_; f++) {
if (f == superParent1_ || f == superParent2_)
continue;
int fCard = states_[f];
int sp1Card_ = states_[superParent1_];
int sp2Card_ = states_[superParent2_];
int childBlockSizeSp2 = sp2Card_ * fCard * statesClass_;
int childBlockSizeF = fCard * statesClass_;
int blockSize = fCard * sp1Card_ * sp2Card_ * statesClass_;
for (int sp1Val = 0; sp1Val < sp1Card_; sp1Val++) {
for (int sp2Val = 0; sp2Val < sp2Card_; sp2Val++) {
for (int childVal = 0; childVal < fCard; childVal++) {
for (int c = 0; c < statesClass_; c++) {
// index in childCounts_
int idx = offset
+ sp1Val*childBlockSizeSp2
+ sp2Val*childBlockSizeF
+ childVal*statesClass_
+ c;
double num = childCounts_[idx] + alpha_;
// denominator is the count of (sp1Val,sp2Val,c) plus alpha * fCard
// We can find that by summing childVal dimension, but we already
// have it in childCounts_[...] or we can re-check the superparent
// counts if your approach is purely hierarchical.
// Here we'll do it like the XSpode approach: sp1&sp2 are
// conditionally independent given c, so denominators come from
// summing the relevant block or we treat sp1,sp2 as "parents."
// A simpler approach:
double sumSp1Sp2C = 0.0;
// sum over all childVal:
for (int cv = 0; cv < fCard; cv++) {
int idx2 = offset
+ sp1Val*childBlockSizeSp2
+ sp2Val*childBlockSizeF
+ cv*statesClass_ + c;
sumSp1Sp2C += childCounts_[idx2];
}
double denom = sumSp1Sp2C + alpha_ * fCard;
childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
}
}
}
}
offset += blockSize;
}
}
// --------------------------------------
// predict_proba (single instance)
// --------------------------------------
std::vector<double> XSp2de::predict_proba(const std::vector<int> &instance) const
{
if (!fitted) {
throw std::logic_error(CLASSIFIER_NOT_FITTED);
}
std::vector<double> probs(statesClass_, 0.0);
int sp1Val = instance[superParent1_];
int sp2Val = instance[superParent2_];
// Start with p(c) * p(sp1Val| c) * p(sp2Val| c)
for (int c = 0; c < statesClass_; c++) {
double pC = classPriors_[c];
double pSp1C = sp1FeatureProbs_[sp1Val * statesClass_ + c];
double pSp2C = sp2FeatureProbs_[sp2Val * statesClass_ + c];
probs[c] = pC * pSp1C * pSp2C * initializer_;
}
// Multiply by each child feature f
int offset = 0;
for (int f = 0; f < nFeatures_; f++) {
if (f == superParent1_ || f == superParent2_)
continue;
int valF = instance[f];
int fCard = states_[f];
int sp1Card = states_[superParent1_];
int sp2Card = states_[superParent2_];
int blockSizeSp2 = sp2Card * fCard * statesClass_;
int blockSizeF = fCard * statesClass_;
// base index for childProbs_ for this child and sp1Val, sp2Val
int base = offset
+ sp1Val*blockSizeSp2
+ sp2Val*blockSizeF
+ valF*statesClass_;
for (int c = 0; c < statesClass_; c++) {
probs[c] *= childProbs_[base + c];
}
offset += (fCard * sp1Card * sp2Card * statesClass_);
}
// Normalize
normalize(probs);
return probs;
}
// --------------------------------------
// predict_proba (batch)
// --------------------------------------
std::vector<std::vector<double>> XSp2de::predict_proba(std::vector<std::vector<int>> &test_data)
{
int test_size = test_data[0].size(); // each feature is test_data[f], size = #samples
int sample_size = test_data.size(); // = nFeatures_
std::vector<std::vector<double>> probabilities(
test_size, std::vector<double>(statesClass_, 0.0));
// same concurrency approach
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
std::vector<std::thread> threads;
auto worker = [&](const std::vector<std::vector<int>> &samples,
int begin,
int chunk,
int sample_size,
std::vector<std::vector<double>> &predictions) {
std::string threadName =
"XSp2de-" + std::to_string(begin) + "-" + std::to_string(chunk);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
std::vector<int> instance(sample_size);
for (int sample = begin; sample < begin + chunk; ++sample) {
for (int feature = 0; feature < sample_size; ++feature) {
instance[feature] = samples[feature][sample];
}
predictions[sample] = predict_proba(instance);
}
semaphore_.release();
};
for (int begin = 0; begin < test_size; begin += chunk_size) {
int chunk = std::min(chunk_size, test_size - begin);
semaphore_.acquire();
threads.emplace_back(worker, test_data, begin, chunk, sample_size,
std::ref(probabilities));
}
for (auto &th : threads) {
th.join();
}
return probabilities;
}
// --------------------------------------
// predict (single instance)
// --------------------------------------
int XSp2de::predict(const std::vector<int> &instance) const
{
auto p = predict_proba(instance);
return static_cast<int>(
std::distance(p.begin(), std::max_element(p.begin(), p.end()))
);
}
// --------------------------------------
// predict (batch of data)
// --------------------------------------
std::vector<int> XSp2de::predict(std::vector<std::vector<int>> &test_data)
{
auto probabilities = predict_proba(test_data);
std::vector<int> predictions(probabilities.size(), 0);
for (size_t i = 0; i < probabilities.size(); i++) {
predictions[i] = static_cast<int>(
std::distance(probabilities[i].begin(),
std::max_element(probabilities[i].begin(),
probabilities[i].end()))
);
}
return predictions;
}
// --------------------------------------
// predict (torch::Tensor version)
// --------------------------------------
torch::Tensor XSp2de::predict(torch::Tensor &X)
{
auto X_ = TensorUtils::to_matrix(X);
auto result_v = predict(X_);
return torch::tensor(result_v, torch::kInt32);
}
// --------------------------------------
// predict_proba (torch::Tensor version)
// --------------------------------------
torch::Tensor XSp2de::predict_proba(torch::Tensor &X)
{
auto X_ = TensorUtils::to_matrix(X);
auto result_v = predict_proba(X_);
int n_samples = X.size(1);
torch::Tensor result =
torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
for (int i = 0; i < (int)result_v.size(); ++i) {
result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
}
return result;
}
// --------------------------------------
// score (torch::Tensor version)
// --------------------------------------
float XSp2de::score(torch::Tensor &X, torch::Tensor &y)
{
torch::Tensor y_pred = predict(X);
return (y_pred == y).sum().item<float>() / y.size(0);
}
// --------------------------------------
// score (vector version)
// --------------------------------------
float XSp2de::score(std::vector<std::vector<int>> &X, std::vector<int> &y)
{
auto y_pred = predict(X);
int correct = 0;
for (size_t i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == y[i]) {
correct++;
}
}
return static_cast<float>(correct) / static_cast<float>(y_pred.size());
}
// --------------------------------------
// Utility: normalize
// --------------------------------------
void XSp2de::normalize(std::vector<double> &v) const
{
double sum = 0.0;
for (auto &val : v) {
sum += val;
}
if (sum > 0.0) {
for (auto &val : v) {
val /= sum;
}
}
}
// --------------------------------------
// to_string
// --------------------------------------
std::string XSp2de::to_string() const
{
std::ostringstream oss;
oss << "----- XSp2de Model -----\n"
<< "nFeatures_ = " << nFeatures_ << "\n"
<< "superParent1_ = " << superParent1_ << "\n"
<< "superParent2_ = " << superParent2_ << "\n"
<< "statesClass_ = " << statesClass_ << "\n\n";
oss << "States: [";
for (auto s : states_) oss << s << " ";
oss << "]\n";
oss << "classCounts_:\n";
for (auto v : classCounts_) oss << v << " ";
oss << "\nclassPriors_:\n";
for (auto v : classPriors_) oss << v << " ";
oss << "\nsp1FeatureCounts_ (size=" << sp1FeatureCounts_.size() << ")\n";
for (auto v : sp1FeatureCounts_) oss << v << " ";
oss << "\nsp2FeatureCounts_ (size=" << sp2FeatureCounts_.size() << ")\n";
for (auto v : sp2FeatureCounts_) oss << v << " ";
oss << "\nchildCounts_ (size=" << childCounts_.size() << ")\n";
for (auto v : childCounts_) oss << v << " ";
oss << "\nchildOffsets_:\n";
for (auto c : childOffsets_) oss << c << " ";
oss << "\n----------------------------------------\n";
return oss.str();
}
// --------------------------------------
// Some introspection about the graph
// --------------------------------------
int XSp2de::getNumberOfNodes() const
{
// nFeatures + 1 class node
return nFeatures_ + 1;
}
int XSp2de::getClassNumStates() const
{
return statesClass_;
}
int XSp2de::getNFeatures() const
{
return nFeatures_;
}
int XSp2de::getNumberOfStates() const
{
// purely an example. Possibly you want to sum up actual
// cardinalities or something else.
return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
}
int XSp2de::getNumberOfEdges() const
{
// In an SPNDE with n=2, for each feature we have edges from class, sp1, sp2.
// So thats 3*(nFeatures_) edges, minus the ones for the superparents themselves,
// plus the edges from class->superparent1, class->superparent2.
// For a quick approximation:
// - class->sp1, class->sp2 => 2 edges
// - class->child => (nFeatures -2) edges
// - sp1->child, sp2->child => 2*(nFeatures -2) edges
// total = 2 + (nFeatures-2) + 2*(nFeatures-2) = 2 + 3*(nFeatures-2)
// = 3nFeatures - 4 (just an example).
// You can adapt to your liking:
return 3 * nFeatures_ - 4;
}
} // namespace bayesnet

View File

@@ -0,0 +1,75 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef XSP2DE_H
#define XSP2DE_H
#include "Classifier.h"
#include "bayesnet/utils/CountingSemaphore.h"
#include <torch/torch.h>
#include <vector>
namespace bayesnet {
class XSp2de : public Classifier {
public:
XSp2de(int spIndex1, int spIndex2);
void setHyperparameters(const nlohmann::json &hyperparameters_) override;
void fitx(torch::Tensor &X, torch::Tensor &y, torch::Tensor &weights_, const Smoothing_t smoothing);
std::vector<double> predict_proba(const std::vector<int> &instance) const;
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>> &test_data) override;
int predict(const std::vector<int> &instance) const;
std::vector<int> predict(std::vector<std::vector<int>> &test_data) override;
torch::Tensor predict(torch::Tensor &X) override;
torch::Tensor predict_proba(torch::Tensor &X) override;
float score(torch::Tensor &X, torch::Tensor &y) override;
float score(std::vector<std::vector<int>> &X, std::vector<int> &y) override;
std::string to_string() const;
std::vector<std::string> graph(const std::string &title) const override {
return std::vector<std::string>({title});
}
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNFeatures() const;
int getClassNumStates() const override;
int getNumberOfStates() const override;
protected:
void buildModel(const torch::Tensor &weights) override;
void trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) override;
private:
void addSample(const std::vector<int> &instance, double weight);
void normalize(std::vector<double> &v) const;
void computeProbabilities();
int superParent1_;
int superParent2_;
int nFeatures_;
int statesClass_;
double alpha_;
double initializer_;
std::vector<int> states_;
std::vector<double> classCounts_;
std::vector<double> classPriors_;
std::vector<double> sp1FeatureCounts_, sp1FeatureProbs_;
std::vector<double> sp2FeatureCounts_, sp2FeatureProbs_;
// childOffsets_[f] will be the offset into childCounts_ for feature f.
// If f is either superParent1 or superParent2, childOffsets_[f] = -1
std::vector<int> childOffsets_;
// For each child f, we store p(x_f | c, sp1Val, sp2Val). We'll store the raw
// counts in childCounts_, and the probabilities in childProbs_, with a
// dimension block of size: states_[f]* statesClass_* states_[sp1]* states_[sp2].
std::vector<double> childCounts_;
std::vector<double> childProbs_;
CountingSemaphore &semaphore_;
};
} // namespace bayesnet
#endif // XSP2DE_H

View File

@@ -0,0 +1,450 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <algorithm>
#include <cmath>
#include <limits>
#include <numeric>
#include <sstream>
#include <stdexcept>
#include "XSPODE.h"
#include "bayesnet/utils/TensorUtils.h"
namespace bayesnet {
// --------------------------------------
// Constructor
// --------------------------------------
XSpode::XSpode(int spIndex)
: superParent_{ spIndex }, nFeatures_{ 0 }, statesClass_{ 0 }, alpha_{ 1.0 },
initializer_{ 1.0 }, semaphore_{ CountingSemaphore::getInstance() },
Classifier(Network())
{
validHyperparameters = { "parent" };
}
void XSpode::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("parent")) {
superParent_ = hyperparameters["parent"];
hyperparameters.erase("parent");
}
Classifier::setHyperparameters(hyperparameters);
}
void XSpode::fitx(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
{
m = X.size(1);
n = X.size(0);
dataset = X;
buildDataset(y);
buildModel(weights_);
trainModel(weights_, smoothing);
fitted = true;
}
// --------------------------------------
// trainModel
// --------------------------------------
// Initialize storage needed for the super-parent and child features counts and
// probs.
// --------------------------------------
void XSpode::buildModel(const torch::Tensor& weights)
{
int numInstances = m;
nFeatures_ = n;
// Derive the number of states for each feature and for the class.
// (This is just one approach; adapt to match your environment.)
// Here, we assume the user also gave us the total #states per feature in e.g.
// statesMap. We'll simply reconstruct the integer states_ array. The last
// entry is statesClass_.
states_.resize(nFeatures_);
for (int f = 0; f < nFeatures_; f++) {
// Suppose you look up in “statesMap” by the feature name, or read directly
// from X. We'll assume states_[f] = max value in X[f] + 1.
states_[f] = dataset[f].max().item<int>() + 1;
}
// For the class: states_.back() = max(y)+1
statesClass_ = dataset[-1].max().item<int>() + 1;
// Initialize counts
classCounts_.resize(statesClass_, 0.0);
// p(x_sp = spVal | c)
// We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].
spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0);
// For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of
// childCounts_. childCounts_ will be sized as sum_{child≠sp} (states_[child]
// * statesClass_ * states_[sp]). We also need an offset for each child to
// index into childCounts_.
childOffsets_.resize(nFeatures_, -1);
int totalSize = 0;
for (int f = 0; f < nFeatures_; f++) {
if (f == superParent_)
continue; // skip sp
childOffsets_[f] = totalSize;
// block size for this child's counts: states_[f] * statesClass_ *
// states_[superParent_]
totalSize += (states_[f] * statesClass_ * states_[superParent_]);
}
childCounts_.resize(totalSize, 0.0);
}
// --------------------------------------
// buildModel
// --------------------------------------
//
// We only store conditional probabilities for:
// p(x_sp| c) (the super-parent feature)
// p(x_child| c, x_sp) for all child ≠ sp
//
// --------------------------------------
void XSpode::trainModel(const torch::Tensor& weights,
const bayesnet::Smoothing_t smoothing)
{
// Accumulate raw counts
for (int i = 0; i < m; i++) {
std::vector<int> instance(nFeatures_ + 1);
for (int f = 0; f < nFeatures_; f++) {
instance[f] = dataset[f][i].item<int>();
}
instance[nFeatures_] = dataset[-1][i].item<int>();
addSample(instance, weights[i].item<double>());
}
switch (smoothing) {
case bayesnet::Smoothing_t::ORIGINAL:
alpha_ = 1.0 / m;
break;
case bayesnet::Smoothing_t::LAPLACE:
alpha_ = 1.0;
break;
default:
alpha_ = 0.0; // No smoothing
}
initializer_ = std::numeric_limits<double>::max() /
(nFeatures_ * nFeatures_); // for numerical stability
// Convert raw counts to probabilities
computeProbabilities();
}
// --------------------------------------
// addSample
// --------------------------------------
//
// instance has size nFeatures_ + 1, with the class at the end.
// We add 1 to the appropriate counters for each (c, superParentVal, childVal).
//
void XSpode::addSample(const std::vector<int>& instance, double weight)
{
if (weight <= 0.0)
return;
int c = instance.back();
// (A) increment classCounts
classCounts_[c] += weight;
// (B) increment super-parent counts => p(x_sp | c)
int spVal = instance[superParent_];
spFeatureCounts_[spVal * statesClass_ + c] += weight;
// (C) increment child counts => p(childVal | c, x_sp)
for (int f = 0; f < nFeatures_; f++) {
if (f == superParent_)
continue;
int childVal = instance[f];
int offset = childOffsets_[f];
// Compute index in childCounts_.
// Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]
int blockSize = states_[f] * statesClass_;
int idx = offset + spVal * blockSize + childVal * statesClass_ + c;
childCounts_[idx] += weight;
}
}
// --------------------------------------
// computeProbabilities
// --------------------------------------
//
// Once all samples are added in COUNTS mode, call this to:
// p(c)
// p(x_sp = spVal | c)
// p(x_child = v | c, x_sp = s_sp)
//
// --------------------------------------
void XSpode::computeProbabilities()
{
double totalCount =
std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);
// p(c) => classPriors_
classPriors_.resize(statesClass_, 0.0);
if (totalCount <= 0.0) {
// fallback => uniform
double unif = 1.0 / static_cast<double>(statesClass_);
for (int c = 0; c < statesClass_; c++) {
classPriors_[c] = unif;
}
} else {
for (int c = 0; c < statesClass_; c++) {
classPriors_[c] =
(classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_);
}
}
// p(x_sp | c)
spFeatureProbs_.resize(spFeatureCounts_.size());
// denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ *
// (#states of sp)
int spCard = states_[superParent_];
for (int spVal = 0; spVal < spCard; spVal++) {
for (int c = 0; c < statesClass_; c++) {
double denom = classCounts_[c] + alpha_ * spCard;
double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_;
spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom);
}
}
// p(x_child | c, x_sp)
childProbs_.resize(childCounts_.size());
for (int f = 0; f < nFeatures_; f++) {
if (f == superParent_)
continue;
int offset = childOffsets_[f];
int childCard = states_[f];
// For each spVal, c, childVal in childCounts_:
for (int spVal = 0; spVal < spCard; spVal++) {
for (int childVal = 0; childVal < childCard; childVal++) {
for (int c = 0; c < statesClass_; c++) {
int idx = offset + spVal * (childCard * statesClass_) +
childVal * statesClass_ + c;
double num = childCounts_[idx] + alpha_;
// denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ *
// (#states of child)
double denom =
spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * childCard;
childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
}
}
}
}
}
// --------------------------------------
// predict_proba
// --------------------------------------
//
// For a single instance x of dimension nFeatures_:
// P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).
//
// --------------------------------------
std::vector<double> XSpode::predict_proba(const std::vector<int>& instance) const
{
if (!fitted) {
throw std::logic_error(CLASSIFIER_NOT_FITTED);
}
std::vector<double> probs(statesClass_, 0.0);
// Multiply p(c) × p(x_sp | c)
int spVal = instance[superParent_];
for (int c = 0; c < statesClass_; c++) {
double pc = classPriors_[c];
double pSpC = spFeatureProbs_[spVal * statesClass_ + c];
probs[c] = pc * pSpC * initializer_;
}
// Multiply by each childs probability p(x_child | c, x_sp)
for (int feature = 0; feature < nFeatures_; feature++) {
if (feature == superParent_)
continue; // skip sp
int sf = instance[feature];
int offset = childOffsets_[feature];
int childCard = states_[feature]; // not used directly, but for clarity
// Index into childProbs_ = offset + spVal*(childCard*statesClass_) +
// childVal*statesClass_ + c
int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_;
for (int c = 0; c < statesClass_; c++) {
probs[c] *= childProbs_[base + c];
}
}
// Normalize
normalize(probs);
return probs;
}
std::vector<std::vector<double>> XSpode::predict_proba(std::vector<std::vector<int>>& test_data)
{
int test_size = test_data[0].size();
int sample_size = test_data.size();
auto probabilities = std::vector<std::vector<double>>(
test_size, std::vector<double>(statesClass_));
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
std::vector<std::thread> threads;
auto worker = [&](const std::vector<std::vector<int>>& samples, int begin,
int chunk, int sample_size,
std::vector<std::vector<double>>& predictions) {
std::string threadName =
"(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
std::vector<int> instance(sample_size);
for (int sample = begin; sample < begin + chunk; ++sample) {
for (int feature = 0; feature < sample_size; ++feature) {
instance[feature] = samples[feature][sample];
}
predictions[sample] = predict_proba(instance);
}
semaphore_.release();
};
for (int begin = 0; begin < test_size; begin += chunk_size) {
int chunk = std::min(chunk_size, test_size - begin);
semaphore_.acquire();
threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
}
for (auto& thread : threads) {
thread.join();
}
return probabilities;
}
// --------------------------------------
// Utility: normalize
// --------------------------------------
void XSpode::normalize(std::vector<double>& v) const
{
double sum = 0.0;
for (auto val : v) {
sum += val;
}
if (sum <= 0.0) {
return;
}
for (auto& val : v) {
val /= sum;
}
}
// --------------------------------------
// representation of the model
// --------------------------------------
std::string XSpode::to_string() const
{
std::ostringstream oss;
oss << "----- XSpode Model -----" << std::endl
<< "nFeatures_ = " << nFeatures_ << std::endl
<< "superParent_ = " << superParent_ << std::endl
<< "statesClass_ = " << statesClass_ << std::endl
<< std::endl;
oss << "States: [";
for (int s : states_)
oss << s << " ";
oss << "]" << std::endl;
oss << "classCounts_: [";
for (double c : classCounts_)
oss << c << " ";
oss << "]" << std::endl;
oss << "classPriors_: [";
for (double c : classPriors_)
oss << c << " ";
oss << "]" << std::endl;
oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << std::endl
<< "[";
for (double c : spFeatureCounts_)
oss << c << " ";
oss << "]" << std::endl;
oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << std::endl
<< "[";
for (double c : spFeatureProbs_)
oss << c << " ";
oss << "]" << std::endl;
oss << "childCounts_: size = " << childCounts_.size() << std::endl << "[";
for (double cc : childCounts_)
oss << cc << " ";
oss << "]" << std::endl;
for (double cp : childProbs_)
oss << cp << " ";
oss << "]" << std::endl;
oss << "childOffsets_: [";
for (int co : childOffsets_)
oss << co << " ";
oss << "]" << std::endl;
oss << std::string(40,'-') << std::endl;
return oss.str();
}
int XSpode::getNumberOfNodes() const { return nFeatures_ + 1; }
int XSpode::getClassNumStates() const { return statesClass_; }
int XSpode::getNFeatures() const { return nFeatures_; }
int XSpode::getNumberOfStates() const
{
return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
}
int XSpode::getNumberOfEdges() const
{
return 2 * nFeatures_ + 1;
}
// ------------------------------------------------------
// Predict overrides (classifier interface)
// ------------------------------------------------------
int XSpode::predict(const std::vector<int>& instance) const
{
auto p = predict_proba(instance);
return static_cast<int>(std::distance(p.begin(), std::max_element(p.begin(), p.end())));
}
std::vector<int> XSpode::predict(std::vector<std::vector<int>>& test_data)
{
auto probabilities = predict_proba(test_data);
std::vector<int> predictions(probabilities.size(), 0);
for (size_t i = 0; i < probabilities.size(); i++) {
predictions[i] = std::distance(
probabilities[i].begin(),
std::max_element(probabilities[i].begin(), probabilities[i].end()));
}
return predictions;
}
torch::Tensor XSpode::predict(torch::Tensor& X)
{
auto X_ = TensorUtils::to_matrix(X);
auto result_v = predict(X_);
return torch::tensor(result_v, torch::kInt32);
}
torch::Tensor XSpode::predict_proba(torch::Tensor& X)
{
auto X_ = TensorUtils::to_matrix(X);
auto result_v = predict_proba(X_);
int n_samples = X.size(1);
torch::Tensor result =
torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
for (int i = 0; i < result_v.size(); ++i) {
result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
}
return result;
}
float XSpode::score(torch::Tensor& X, torch::Tensor& y)
{
torch::Tensor y_pred = predict(X);
return (y_pred == y).sum().item<float>() / y.size(0);
}
float XSpode::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{
auto y_pred = this->predict(X);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == y[i]) {
correct++;
}
}
return (double)correct / y_pred.size();
}
} // namespace bayesnet

View File

@@ -0,0 +1,76 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef XSPODE_H
#define XSPODE_H
#include <vector>
#include <torch/torch.h>
#include "Classifier.h"
#include "bayesnet/utils/CountingSemaphore.h"
namespace bayesnet {
class XSpode : public Classifier {
public:
explicit XSpode(int spIndex);
std::vector<double> predict_proba(const std::vector<int>& instance) const;
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
int predict(const std::vector<int>& instance) const;
void normalize(std::vector<double>& v) const;
std::string to_string() const;
int getNFeatures() const;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
int getClassNumStates() const override;
std::vector<int>& getStates();
std::vector<std::string> graph(const std::string& title) const override { return std::vector<std::string>({ title }); }
void fitx(torch::Tensor& X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing);
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
//
// Classifier interface
//
torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
torch::Tensor predict_proba(torch::Tensor& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
private:
void addSample(const std::vector<int>& instance, double weight);
void computeProbabilities();
int superParent_;
int nFeatures_;
int statesClass_;
std::vector<int> states_; // [states_feat0, ..., states_feat(N-1)] (class not included in this array)
// Class counts
std::vector<double> classCounts_; // [c], accumulative
std::vector<double> classPriors_; // [c], after normalization
// For p(x_sp = spVal | c)
std::vector<double> spFeatureCounts_; // [spVal * statesClass_ + c]
std::vector<double> spFeatureProbs_; // same shape, after normalization
// For p(x_child = childVal | x_sp = spVal, c)
// childCounts_ is big enough to hold all child features except sp:
// For each child f, we store childOffsets_[f] as the start index, then
// childVal, spVal, c => the data.
std::vector<double> childCounts_;
std::vector<double> childProbs_;
std::vector<int> childOffsets_;
double alpha_ = 1.0;
double initializer_; // for numerical stability
CountingSemaphore& semaphore_;
};
}
#endif // XSPODE_H

View File

@@ -0,0 +1,40 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include "A2DE.h"
namespace bayesnet {
A2DE::A2DE(bool predict_voting) : Ensemble(predict_voting)
{
validHyperparameters = { "predict_voting" };
}
void A2DE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
Classifier::setHyperparameters(hyperparameters);
}
void A2DE::buildModel(const torch::Tensor& weights)
{
models.clear();
significanceModels.clear();
for (int i = 0; i < features.size() - 1; ++i) {
for (int j = i + 1; j < features.size(); ++j) {
auto model = std::make_unique<SPnDE>(std::vector<int>({ i, j }));
models.push_back(std::move(model));
}
}
n_models = static_cast<unsigned>(models.size());
significanceModels = std::vector<double>(n_models, 1.0);
}
std::vector<std::string> A2DE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}
}

22
bayesnet/ensembles/A2DE.h Normal file
View File

@@ -0,0 +1,22 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef A2DE_H
#define A2DE_H
#include "bayesnet/classifiers/SPnDE.h"
#include "Ensemble.h"
namespace bayesnet {
class A2DE : public Ensemble {
public:
A2DE(bool predict_voting = false);
virtual ~A2DE() {};
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& title = "A2DE") const override;
protected:
void buildModel(const torch::Tensor& weights) override;
};
}
#endif

View File

@@ -10,7 +10,7 @@ namespace bayesnet {
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className) AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
{ {
} }
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
{ {
checkInput(X_, y_); checkInput(X_, y_);
features = features_; features = features_;
@@ -20,8 +20,9 @@ namespace bayesnet {
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y); states = fit_local_discretization(y);
// We have discretized the input data // We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network // 1st we need to fit the model to build the normal AODE structure, Ensemble::fit
Ensemble::fit(dataset, features, className, states); // calls buildModel to initialize the base models
Ensemble::fit(dataset, features, className, states, smoothing);
return *this; return *this;
} }
@@ -34,10 +35,10 @@ namespace bayesnet {
n_models = models.size(); n_models = models.size();
significanceModels = std::vector<double>(n_models, 1.0); significanceModels = std::vector<double>(n_models, 1.0);
} }
void AODELd::trainModel(const torch::Tensor& weights) void AODELd::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
for (const auto& model : models) { for (const auto& model : models) {
model->fit(Xf, y, features, className, states); model->fit(Xf, y, features, className, states, smoothing);
} }
} }
std::vector<std::string> AODELd::graph(const std::string& name) const std::vector<std::string> AODELd::graph(const std::string& name) const

View File

@@ -15,10 +15,10 @@ namespace bayesnet {
public: public:
AODELd(bool predict_voting = true); AODELd(bool predict_voting = true);
virtual ~AODELd() = default; virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override; AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing) override;
std::vector<std::string> graph(const std::string& name = "AODELd") const override; std::vector<std::string> graph(const std::string& name = "AODELd") const override;
protected: protected:
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
}; };
} }

268
bayesnet/ensembles/Boost.cc Normal file
View File

@@ -0,0 +1,268 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include "Boost.h"
#include "bayesnet/feature_selection/CFS.h"
#include "bayesnet/feature_selection/FCBF.h"
#include "bayesnet/feature_selection/IWSS.h"
#include <folding.hpp>
namespace bayesnet {
Boost::Boost(bool predict_voting) : Ensemble(predict_voting) {
validHyperparameters = {"alpha_block", "order", "convergence", "convergence_best", "bisection",
"threshold", "maxTolerance", "predict_voting", "select_features", "block_update"};
}
void Boost::setHyperparameters(const nlohmann::json &hyperparameters_) {
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("order")) {
std::vector<std::string> algos = {Orders.ASC, Orders.DESC, Orders.RAND};
order_algorithm = hyperparameters["order"];
if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC +
", " + Orders.RAND + "]");
}
hyperparameters.erase("order");
}
if (hyperparameters.contains("alpha_block")) {
alpha_block = hyperparameters["alpha_block"];
hyperparameters.erase("alpha_block");
}
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence");
}
if (hyperparameters.contains("convergence_best")) {
convergence_best = hyperparameters["convergence_best"];
hyperparameters.erase("convergence_best");
}
if (hyperparameters.contains("bisection")) {
bisection = hyperparameters["bisection"];
hyperparameters.erase("bisection");
}
if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold");
}
if (hyperparameters.contains("maxTolerance")) {
maxTolerance = hyperparameters["maxTolerance"];
if (maxTolerance < 1 || maxTolerance > 6)
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 6]");
hyperparameters.erase("maxTolerance");
}
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
if (hyperparameters.contains("select_features")) {
auto selectedAlgorithm = hyperparameters["select_features"];
std::vector<std::string> algos = {SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF};
selectFeatures = true;
select_features_algorithm = selectedAlgorithm;
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " +
SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
}
hyperparameters.erase("select_features");
}
if (hyperparameters.contains("block_update")) {
block_update = hyperparameters["block_update"];
hyperparameters.erase("block_update");
}
if (block_update && alpha_block) {
throw std::invalid_argument("alpha_block and block_update cannot be true at the same time");
}
if (block_update && !bisection) {
throw std::invalid_argument("block_update needs bisection to be true");
}
Classifier::setHyperparameters(hyperparameters);
}
void Boost::add_model(std::unique_ptr<Classifier> model, double significance) {
models.push_back(std::move(model));
n_models++;
significanceModels.push_back(significance);
}
void Boost::remove_last_model() {
models.pop_back();
significanceModels.pop_back();
n_models--;
}
void Boost::buildModel(const torch::Tensor &weights) {
// Models shall be built in trainModel
models.clear();
significanceModels.clear();
n_models = 0;
// Prepare the validation dataset
auto y_ = dataset.index({-1, "..."});
if (convergence) {
// Prepare train & validation sets from train data
auto fold = folding::StratifiedKFold(5, y_, 271);
auto [train, test] = fold.getFold(0);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
// Get train and validation sets
X_train = dataset.index({torch::indexing::Slice(0, dataset.size(0) - 1), train_t});
y_train = dataset.index({-1, train_t});
X_test = dataset.index({torch::indexing::Slice(0, dataset.size(0) - 1), test_t});
y_test = dataset.index({-1, test_t});
dataset = X_train;
m = X_train.size(1);
auto n_classes = states.at(className).size();
// Build dataset with train data
buildDataset(y_train);
metrics = Metrics(dataset, features, className, n_classes);
} else {
// Use all data to train
X_train = dataset.index({torch::indexing::Slice(0, dataset.size(0) - 1), "..."});
y_train = y_;
}
}
std::vector<int> Boost::featureSelection(torch::Tensor &weights_) {
int maxFeatures = 0;
if (select_features_algorithm == SelectFeatures.CFS) {
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
} else if (select_features_algorithm == SelectFeatures.IWSS) {
if (threshold < 0 || threshold > 0.5) {
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]");
}
featureSelector =
new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
} else if (select_features_algorithm == SelectFeatures.FCBF) {
if (threshold < 1e-7 || threshold > 1) {
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]");
}
featureSelector =
new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
}
featureSelector->fit();
auto featuresUsed = featureSelector->getFeatures();
delete featureSelector;
return featuresUsed;
}
std::tuple<torch::Tensor &, double, bool> Boost::update_weights(torch::Tensor &ytrain, torch::Tensor &ypred,
torch::Tensor &weights) {
bool terminate = false;
double alpha_t = 0;
auto mask_wrong = ypred != ytrain;
auto mask_right = ypred == ytrain;
auto masked_weights = weights * mask_wrong.to(weights.dtype());
double epsilon_t = masked_weights.sum().item<double>();
// std::cout << "epsilon_t: " << epsilon_t << " count wrong: " << mask_wrong.sum().item<int>() << " count right: "
// << mask_right.sum().item<int>() << std::endl;
if (epsilon_t > 0.5) {
// Inverse the weights policy (plot ln(wt))
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
// learner is better than random guess" (Zhi-Hua Zhou, 2012)
terminate = true;
} else {
double wt = (1 - epsilon_t) / epsilon_t;
alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights;
// Step 3.2.2: Update weights of right samples
weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights).item<double>();
weights = weights / totalWeights;
}
return {weights, alpha_t, terminate};
}
std::tuple<torch::Tensor &, double, bool> Boost::update_weights_block(int k, torch::Tensor &ytrain,
torch::Tensor &weights) {
/* Update Block algorithm
k = # of models in block
n_models = # of models in ensemble to make predictions
n_models_bak = # models saved
models = vector of models to make predictions
models_bak = models not used to make predictions
significances_bak = backup of significances vector
Case list
A) k = 1, n_models = 1 => n = 0 , n_models = n + k
B) k = 1, n_models = n + 1 => n_models = n + k
C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
D) k > 1, n_models = k => n = 0, n_models = n + k
E) k > 1, n_models = k + n => n_models = n + k
A, D) n=0, k > 0, n_models == k
1. n_models_bak <- n_models
2. significances_bak <- significances
3. significances = vector(k, 1)
4. Dont move any classifiers out of models
5. n_models <- k
6. Make prediction, compute alpha, update weights
7. Dont restore any classifiers to models
8. significances <- significances_bak
9. Update last k significances
10. n_models <- n_models_bak
B, C, E) n > 0, k > 0, n_models == n + k
1. n_models_bak <- n_models
2. significances_bak <- significances
3. significances = vector(k, 1)
4. Move first n classifiers to models_bak
5. n_models <- k
6. Make prediction, compute alpha, update weights
7. Insert classifiers in models_bak to be the first n models
8. significances <- significances_bak
9. Update last k significances
10. n_models <- n_models_bak
*/
//
// Make predict with only the last k models
//
std::unique_ptr<Classifier> model;
std::vector<std::unique_ptr<Classifier>> models_bak;
// 1. n_models_bak <- n_models 2. significances_bak <- significances
auto significance_bak = significanceModels;
auto n_models_bak = n_models;
// 3. significances = vector(k, 1)
significanceModels = std::vector<double>(k, 1.0);
// 4. Move first n classifiers to models_bak
// backup the first n_models - k models (if n_models == k, don't backup any)
for (int i = 0; i < n_models - k; ++i) {
model = std::move(models[0]);
models.erase(models.begin());
models_bak.push_back(std::move(model));
}
assert(models.size() == k);
// 5. n_models <- k
n_models = k;
// 6. Make prediction, compute alpha, update weights
auto ypred = predict(X_train);
//
// Update weights
//
double alpha_t;
bool terminate;
std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
//
// Restore the models if needed
//
// 7. Insert classifiers in models_bak to be the first n models
// if n_models_bak == k, don't restore any, because none of them were moved
if (k != n_models_bak) {
// Insert in the same order as they were extracted
int bak_size = models_bak.size();
for (int i = 0; i < bak_size; ++i) {
model = std::move(models_bak[bak_size - 1 - i]);
models_bak.erase(models_bak.end() - 1);
models.insert(models.begin(), std::move(model));
}
}
// 8. significances <- significances_bak
significanceModels = significance_bak;
//
// Update the significance of the last k models
//
// 9. Update last k significances
for (int i = 0; i < k; ++i) {
significanceModels[n_models_bak - k + i] = alpha_t;
}
// 10. n_models <- n_models_bak
n_models = n_models_bak;
return {weights, alpha_t, terminate};
}
} // namespace bayesnet

View File

@@ -0,0 +1,57 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef BOOST_H
#define BOOST_H
#include <string>
#include <tuple>
#include <vector>
#include <nlohmann/json.hpp>
#include <torch/torch.h>
#include "Ensemble.h"
#include "bayesnet/feature_selection/FeatureSelect.h"
namespace bayesnet {
const struct {
std::string CFS = "CFS";
std::string FCBF = "FCBF";
std::string IWSS = "IWSS";
}SelectFeatures;
const struct {
std::string ASC = "asc";
std::string DESC = "desc";
std::string RAND = "rand";
}Orders;
class Boost : public Ensemble {
public:
explicit Boost(bool predict_voting = false);
virtual ~Boost() override = default;
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
protected:
std::vector<int> featureSelection(torch::Tensor& weights_);
void buildModel(const torch::Tensor& weights) override;
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights);
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
void add_model(std::unique_ptr<Classifier> model, double significance);
void remove_last_model();
//
// Attributes
//
torch::Tensor X_train, y_train, X_test, y_test;
// Hyperparameters
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
int maxTolerance = 3;
std::string order_algorithm = Orders.DESC; // order to process the KBest features asc, desc, rand
bool convergence = true; //if true, stop when the model does not improve
bool convergence_best = false; // wether to keep the best accuracy to the moment or the last accuracy as prior accuracy
bool selectFeatures = false; // if true, use feature selection
std::string select_features_algorithm; // Selected feature selection algorithm
FeatureSelect* featureSelector = nullptr;
double threshold = -1;
bool block_update = false; // if true, use block update algorithm, only meaningful if bisection is true
bool alpha_block = false; // if true, the alpha is computed with the ensemble built so far and the new model
};
}
#endif

View File

@@ -0,0 +1,165 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <limits.h>
#include <tuple>
#include <folding.hpp>
#include "BoostA2DE.h"
namespace bayesnet {
BoostA2DE::BoostA2DE(bool predict_voting) : Boost(predict_voting)
{
}
std::vector<int> BoostA2DE::initializeModels(const Smoothing_t smoothing)
{
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
std::vector<int> featuresSelected = featureSelection(weights_);
if (featuresSelected.size() < 2) {
notes.push_back("No features selected in initialization");
status = ERROR;
return std::vector<int>();
}
for (int i = 0; i < featuresSelected.size() - 1; i++) {
for (int j = i + 1; j < featuresSelected.size(); j++) {
auto parents = { featuresSelected[i], featuresSelected[j] };
std::unique_ptr<Classifier> model = std::make_unique<SPnDE>(parents);
model->fit(dataset, features, className, states, weights_, smoothing);
models.push_back(std::move(model));
significanceModels.push_back(1.0); // They will be updated later in trainModel
n_models++;
}
}
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
return featuresSelected;
}
void BoostA2DE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
{
//
// Logging setup
//
// loguru::set_thread_name("BoostA2DE");
// loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
// loguru::add_file("boostA2DE.log", loguru::Truncate, loguru::Verbosity_MAX);
// Algorithm based on the adaboost algorithm for classification
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
fitted = true;
double alpha_t = 0;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool finished = false;
std::vector<int> featuresUsed;
if (selectFeatures) {
featuresUsed = initializeModels(smoothing);
if (featuresUsed.size() == 0) {
return;
}
auto ypred = predict(X_train);
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
// Update significance of the models
for (int i = 0; i < n_models; ++i) {
significanceModels[i] = alpha_t;
}
if (finished) {
return;
}
}
int numItemsPack = 0; // The counter of the models inserted in the current pack
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double improvement = 1.0;
double convergence_threshold = 1e-4;
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
// Step 0: Set the finish condition
// epsilon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing
// run out of features
bool ascending = order_algorithm == Orders.ASC;
std::mt19937 g{ 173 };
std::vector<std::pair<int, int>> pairSelection;
while (!finished) {
// Step 1: Build ranking with mutual information
pairSelection = metrics.SelectKPairs(weights_, featuresUsed, ascending, 0); // Get all the pairs sorted
if (order_algorithm == Orders.RAND) {
std::shuffle(pairSelection.begin(), pairSelection.end(), g);
}
int k = bisection ? pow(2, tolerance) : 1;
int counter = 0; // The model counter of the current pack
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
while (counter++ < k && pairSelection.size() > 0) {
auto feature_pair = pairSelection[0];
pairSelection.erase(pairSelection.begin());
std::unique_ptr<Classifier> model;
model = std::make_unique<SPnDE>(std::vector<int>({ feature_pair.first, feature_pair.second }));
model->fit(dataset, features, className, states, weights_, smoothing);
alpha_t = 0.0;
if (!block_update) {
auto ypred = model->predict(X_train);
// Step 3.1: Compute the classifier amout of say
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
}
// Step 3.4: Store classifier and its accuracy to weigh its future vote
numItemsPack++;
models.push_back(std::move(model));
significanceModels.push_back(alpha_t);
n_models++;
// VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
}
if (block_update) {
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
}
if (convergence && !finished) {
auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) {
priorAccuracy = accuracy;
} else {
improvement = accuracy - priorAccuracy;
}
if (improvement < convergence_threshold) {
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
tolerance++;
} else {
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
tolerance = 0; // Reset the counter if the model performs better
numItemsPack = 0;
}
if (convergence_best) {
// Keep the best accuracy until now as the prior accuracy
priorAccuracy = std::max(accuracy, priorAccuracy);
} else {
// Keep the last accuray obtained as the prior accuracy
priorAccuracy = accuracy;
}
}
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
finished = finished || tolerance > maxTolerance || pairSelection.size() == 0;
}
if (tolerance > maxTolerance) {
if (numItemsPack < n_models) {
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
for (int i = 0; i < numItemsPack; ++i) {
significanceModels.pop_back();
models.pop_back();
n_models--;
}
} else {
notes.push_back("Convergence threshold reached & 0 models eliminated");
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
}
}
if (pairSelection.size() > 0) {
notes.push_back("Pairs not used in train: " + std::to_string(pairSelection.size()));
status = WARNING;
}
notes.push_back("Number of models: " + std::to_string(n_models));
}
std::vector<std::string> BoostA2DE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}
}

View File

@@ -0,0 +1,25 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef BOOSTA2DE_H
#define BOOSTA2DE_H
#include <string>
#include <vector>
#include "bayesnet/classifiers/SPnDE.h"
#include "Boost.h"
namespace bayesnet {
class BoostA2DE : public Boost {
public:
explicit BoostA2DE(bool predict_voting = false);
virtual ~BoostA2DE() = default;
std::vector<std::string> graph(const std::string& title = "BoostA2DE") const override;
protected:
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
private:
std::vector<int> initializeModels(const Smoothing_t smoothing);
};
}
#endif

View File

@@ -4,276 +4,43 @@
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// *************************************************************** // ***************************************************************
#include <set>
#include <functional>
#include <limits.h>
#include <tuple>
#include <folding.hpp>
#include "bayesnet/feature_selection/CFS.h"
#include "bayesnet/feature_selection/FCBF.h"
#include "bayesnet/feature_selection/IWSS.h"
#include "BoostAODE.h" #include "BoostAODE.h"
#include "bayesnet/classifiers/SPODE.h"
#include "bayesnet/utils/loguru.cpp" #include <limits.h>
// #include <loguru.cpp>
// #include <loguru.hpp>
#include <random>
#include <set>
#include <tuple>
namespace bayesnet { namespace bayesnet {
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting) BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting)
{ {
validHyperparameters = {
"maxModels", "bisection", "order", "convergence", "threshold",
"select_features", "maxTolerance", "predict_voting", "block_update"
};
} }
void BoostAODE::buildModel(const torch::Tensor& weights) std::vector<int> BoostAODE::initializeModels(const Smoothing_t smoothing)
{ {
// Models shall be built in trainModel
models.clear();
significanceModels.clear();
n_models = 0;
// Prepare the validation dataset
auto y_ = dataset.index({ -1, "..." });
if (convergence) {
// Prepare train & validation sets from train data
auto fold = folding::StratifiedKFold(5, y_, 271);
auto [train, test] = fold.getFold(0);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
// Get train and validation sets
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
y_train = dataset.index({ -1, train_t });
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
y_test = dataset.index({ -1, test_t });
dataset = X_train;
m = X_train.size(1);
auto n_classes = states.at(className).size();
// Build dataset with train data
buildDataset(y_train);
metrics = Metrics(dataset, features, className, n_classes);
} else {
// Use all data to train
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
y_train = y_;
}
}
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("order")) {
std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
order_algorithm = hyperparameters["order"];
if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC + ", " + Orders.RAND + "]");
}
hyperparameters.erase("order");
}
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence");
}
if (hyperparameters.contains("bisection")) {
bisection = hyperparameters["bisection"];
hyperparameters.erase("bisection");
}
if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold");
}
if (hyperparameters.contains("maxTolerance")) {
maxTolerance = hyperparameters["maxTolerance"];
if (maxTolerance < 1 || maxTolerance > 4)
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]");
hyperparameters.erase("maxTolerance");
}
if (hyperparameters.contains("predict_voting")) {
predict_voting = hyperparameters["predict_voting"];
hyperparameters.erase("predict_voting");
}
if (hyperparameters.contains("select_features")) {
auto selectedAlgorithm = hyperparameters["select_features"];
std::vector<std::string> algos = { SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF };
selectFeatures = true;
select_features_algorithm = selectedAlgorithm;
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " + SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
}
hyperparameters.erase("select_features");
}
if (hyperparameters.contains("block_update")) {
block_update = hyperparameters["block_update"];
hyperparameters.erase("block_update");
}
Classifier::setHyperparameters(hyperparameters);
}
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
{
bool terminate = false;
double alpha_t = 0;
auto mask_wrong = ypred != ytrain;
auto mask_right = ypred == ytrain;
auto masked_weights = weights * mask_wrong.to(weights.dtype());
double epsilon_t = masked_weights.sum().item<double>();
if (epsilon_t > 0.5) {
// Inverse the weights policy (plot ln(wt))
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
// learner is better than random guess" (Zhi-Hua Zhou, 2012)
terminate = true;
} else {
double wt = (1 - epsilon_t) / epsilon_t;
alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights;
// Step 3.2.2: Update weights of right samples
weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights).item<double>();
weights = weights / totalWeights;
}
return { weights, alpha_t, terminate };
}
std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
{
/* Update Block algorithm
k = # of models in block
n_models = # of models in ensemble to make predictions
n_models_bak = # models saved
models = vector of models to make predictions
models_bak = models not used to make predictions
significances_bak = backup of significances vector
Case list
A) k = 1, n_models = 1 => n = 0 , n_models = n + k
B) k = 1, n_models = n + 1 => n_models = n + k
C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
D) k > 1, n_models = k => n = 0, n_models = n + k
E) k > 1, n_models = k + n => n_models = n + k
A, D) n=0, k > 0, n_models == k
1. n_models_bak <- n_models
2. significances_bak <- significances
3. significances = vector(k, 1)
4. Dont move any classifiers out of models
5. n_models <- k
6. Make prediction, compute alpha, update weights
7. Dont restore any classifiers to models
8. significances <- significances_bak
9. Update last k significances
10. n_models <- n_models_bak
B, C, E) n > 0, k > 0, n_models == n + k
1. n_models_bak <- n_models
2. significances_bak <- significances
3. significances = vector(k, 1)
4. Move first n classifiers to models_bak
5. n_models <- k
6. Make prediction, compute alpha, update weights
7. Insert classifiers in models_bak to be the first n models
8. significances <- significances_bak
9. Update last k significances
10. n_models <- n_models_bak
*/
//
// Make predict with only the last k models
//
std::unique_ptr<Classifier> model;
std::vector<std::unique_ptr<Classifier>> models_bak;
// 1. n_models_bak <- n_models 2. significances_bak <- significances
auto significance_bak = significanceModels;
auto n_models_bak = n_models;
// 3. significances = vector(k, 1)
significanceModels = std::vector<double>(k, 1.0);
// 4. Move first n classifiers to models_bak
// backup the first n_models - k models (if n_models == k, don't backup any)
VLOG_SCOPE_F(1, "upd_weights_block n_models=%d k=%d", n_models, k);
for (int i = 0; i < n_models - k; ++i) {
model = std::move(models[0]);
models.erase(models.begin());
models_bak.push_back(std::move(model));
}
assert(models.size() == k);
// 5. n_models <- k
n_models = k;
// 6. Make prediction, compute alpha, update weights
auto ypred = predict(X_train);
//
// Update weights
//
double alpha_t;
bool terminate;
std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
//
// Restore the models if needed
//
// 7. Insert classifiers in models_bak to be the first n models
// if n_models_bak == k, don't restore any, because none of them were moved
if (k != n_models_bak) {
// Insert in the same order as they were extracted
int bak_size = models_bak.size();
for (int i = 0; i < bak_size; ++i) {
model = std::move(models_bak[bak_size - 1 - i]);
models_bak.erase(models_bak.end() - 1);
models.insert(models.begin(), std::move(model));
}
}
// 8. significances <- significances_bak
significanceModels = significance_bak;
//
// Update the significance of the last k models
//
// 9. Update last k significances
for (int i = 0; i < k; ++i) {
significanceModels[n_models_bak - k + i] = alpha_t;
}
// 10. n_models <- n_models_bak
n_models = n_models_bak;
return { weights, alpha_t, terminate };
}
std::vector<int> BoostAODE::initializeModels()
{
std::vector<int> featuresUsed;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
int maxFeatures = 0; std::vector<int> featuresSelected = featureSelection(weights_);
if (select_features_algorithm == SelectFeatures.CFS) { for (const int& feature : featuresSelected) {
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
} else if (select_features_algorithm == SelectFeatures.IWSS) {
if (threshold < 0 || threshold >0.5) {
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]");
}
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
} else if (select_features_algorithm == SelectFeatures.FCBF) {
if (threshold < 1e-7 || threshold > 1) {
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]");
}
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
}
featureSelector->fit();
auto cfsFeatures = featureSelector->getFeatures();
auto scores = featureSelector->getScores();
for (int i = 0; i < cfsFeatures.size(); ++i) {
LOG_F(INFO, "Feature: %d Score: %f", cfsFeatures[i], scores[i]);
}
for (const int& feature : cfsFeatures) {
featuresUsed.push_back(feature);
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature); std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_, smoothing);
models.push_back(std::move(model)); models.push_back(std::move(model));
significanceModels.push_back(1.0); // They will be updated later in trainModel significanceModels.push_back(1.0); // They will be updated later in trainModel
n_models++; n_models++;
} }
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
delete featureSelector; return featuresSelected;
return featuresUsed;
} }
void BoostAODE::trainModel(const torch::Tensor& weights) void BoostAODE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
// //
// Logging setup // Logging setup
// //
loguru::set_thread_name("BoostAODE"); // loguru::set_thread_name("BoostAODE");
loguru::g_stderr_verbosity = loguru::Verbosity_OFF;; // loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX); // loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX);
// Algorithm based on the adaboost algorithm for classification // Algorithm based on the adaboost algorithm for classification
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012) // as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
fitted = true; fitted = true;
@@ -281,22 +48,19 @@ namespace bayesnet {
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool finished = false; bool finished = false;
std::vector<int> featuresUsed; std::vector<int> featuresUsed;
n_models = 0;
if (selectFeatures) { if (selectFeatures) {
featuresUsed = initializeModels(); featuresUsed = initializeModels(smoothing);
auto ypred = predict(X_train); auto ypred = predict(X_train);
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
// Update significance of the models // Update significance of the models
for (int i = 0; i < n_models; ++i) { for (int i = 0; i < n_models; ++i) {
significanceModels[i] = alpha_t; significanceModels.push_back(alpha_t);
} }
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models);
if (finished) { if (finished) {
return; return;
} }
LOG_F(INFO, "Initial models: %d", n_models);
LOG_F(INFO, "Significances: ");
for (int i = 0; i < n_models; ++i) {
LOG_F(INFO, "i=%d significance=%f", i, significanceModels[i]);
}
} }
int numItemsPack = 0; // The counter of the models inserted in the current pack int numItemsPack = 0; // The counter of the models inserted in the current pack
// Variables to control the accuracy finish condition // Variables to control the accuracy finish condition
@@ -313,33 +77,44 @@ namespace bayesnet {
while (!finished) { while (!finished) {
// Step 1: Build ranking with mutual information // Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
VLOG_SCOPE_F(1, "featureSelection.size: %zu featuresUsed.size: %zu", featureSelection.size(), featuresUsed.size());
if (order_algorithm == Orders.RAND) { if (order_algorithm == Orders.RAND) {
std::shuffle(featureSelection.begin(), featureSelection.end(), g); std::shuffle(featureSelection.begin(), featureSelection.end(), g);
} }
// Remove used features // Remove used features
featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x) featureSelection.erase(remove_if(begin(featureSelection), end(featureSelection), [&](auto x) { return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed); }),
{ return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}), end(featureSelection));
end(featureSelection) int k = bisection ? pow(2, tolerance) : 1;
);
int k = pow(2, tolerance);
int counter = 0; // The model counter of the current pack int counter = 0; // The model counter of the current pack
VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size()); // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
while (counter++ < k && featureSelection.size() > 0) { while (counter++ < k && featureSelection.size() > 0) {
auto feature = featureSelection[0]; auto feature = featureSelection[0];
featureSelection.erase(featureSelection.begin()); featureSelection.erase(featureSelection.begin());
std::unique_ptr<Classifier> model; std::unique_ptr<Classifier> model;
model = std::make_unique<SPODE>(feature); model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_); model->fit(dataset, features, className, states, weights_, smoothing);
alpha_t = 0.0; alpha_t = 0.0;
if (!block_update) { if (!block_update) {
auto ypred = model->predict(X_train); torch::Tensor ypred;
if (alpha_block) {
//
// Compute the prediction with the current ensemble + model
//
// Add the model to the ensemble
n_models++;
models.push_back(std::move(model));
significanceModels.push_back(1);
// Compute the prediction
ypred = predict(X_train);
// Remove the model from the ensemble
model = std::move(models.back());
models.pop_back();
significanceModels.pop_back();
n_models--;
} else {
ypred = model->predict(X_train);
}
// Step 3.1: Compute the classifier amout of say // Step 3.1: Compute the classifier amout of say
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_); std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
if (finished) {
VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **");
break;
}
} }
// Step 3.4: Store classifier and its accuracy to weigh its future vote // Step 3.4: Store classifier and its accuracy to weigh its future vote
numItemsPack++; numItemsPack++;
@@ -347,7 +122,7 @@ namespace bayesnet {
models.push_back(std::move(model)); models.push_back(std::move(model));
significanceModels.push_back(alpha_t); significanceModels.push_back(alpha_t);
n_models++; n_models++;
VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size()); // VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size());
} }
if (block_update) { if (block_update) {
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_); std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
@@ -357,37 +132,40 @@ namespace bayesnet {
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0); double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) { if (priorAccuracy == 0) {
priorAccuracy = accuracy; priorAccuracy = accuracy;
VLOG_SCOPE_F(3, "First accuracy: %f", priorAccuracy);
} else { } else {
improvement = accuracy - priorAccuracy; improvement = accuracy - priorAccuracy;
} }
if (improvement < convergence_threshold) { if (improvement < convergence_threshold) {
VLOG_SCOPE_F(3, "(improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy); // VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
tolerance++; tolerance++;
} else { } else {
VLOG_SCOPE_F(3, "*(improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy); // VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
tolerance = 0; // Reset the counter if the model performs better tolerance = 0; // Reset the counter if the model performs better
numItemsPack = 0; numItemsPack = 0;
} }
if (convergence_best) {
// Keep the best accuracy until now as the prior accuracy // Keep the best accuracy until now as the prior accuracy
priorAccuracy = std::max(accuracy, priorAccuracy); priorAccuracy = std::max(accuracy, priorAccuracy);
// priorAccuracy = accuracy; } else {
// Keep the last accuray obtained as the prior accuracy
priorAccuracy = accuracy;
} }
VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size()); }
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size(); finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
} }
if (tolerance > maxTolerance) { if (tolerance > maxTolerance) {
if (numItemsPack < n_models) { if (numItemsPack < n_models) {
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated"); notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models); // VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
for (int i = 0; i < numItemsPack; ++i) { for (int i = 0; i < numItemsPack; ++i) {
significanceModels.pop_back(); significanceModels.pop_back();
models.pop_back(); models.pop_back();
n_models--; n_models--;
} }
} else { } else {
VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
notes.push_back("Convergence threshold reached & 0 models eliminated"); notes.push_back("Convergence threshold reached & 0 models eliminated");
// VLG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
} }
} }
if (featuresUsed.size() != features.size()) { if (featuresUsed.size() != features.size()) {

View File

@@ -6,44 +6,20 @@
#ifndef BOOSTAODE_H #ifndef BOOSTAODE_H
#define BOOSTAODE_H #define BOOSTAODE_H
#include <map> #include <string>
#include "bayesnet/classifiers/SPODE.h" #include <vector>
#include "bayesnet/feature_selection/FeatureSelect.h" #include "Boost.h"
#include "Ensemble.h"
namespace bayesnet { namespace bayesnet {
struct { class BoostAODE : public Boost {
std::string CFS = "CFS";
std::string FCBF = "FCBF";
std::string IWSS = "IWSS";
}SelectFeatures;
struct {
std::string ASC = "asc";
std::string DESC = "desc";
std::string RAND = "rand";
}Orders;
class BoostAODE : public Ensemble {
public: public:
BoostAODE(bool predict_voting = false); explicit BoostAODE(bool predict_voting = false);
virtual ~BoostAODE() = default; virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override; std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
protected: protected:
void buildModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
void trainModel(const torch::Tensor& weights) override;
private: private:
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights); std::vector<int> initializeModels(const Smoothing_t smoothing);
std::vector<int> initializeModels();
torch::Tensor X_train, y_train, X_test, y_test;
// Hyperparameters
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
int maxTolerance = 3;
std::string order_algorithm; // order to process the KBest features asc, desc, rand
bool convergence = true; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
FeatureSelect* featureSelector = nullptr;
double threshold = -1;
bool block_update = false;
}; };
} }
#endif #endif

View File

@@ -3,22 +3,20 @@
// SPDX-FileType: SOURCE // SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// *************************************************************** // ***************************************************************
#include "Ensemble.h" #include "Ensemble.h"
namespace bayesnet { namespace bayesnet {
Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting) Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting)
{ {
}; };
const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted"; const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted";
void Ensemble::trainModel(const torch::Tensor& weights) void Ensemble::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
n_models = models.size(); n_models = models.size();
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
// fit with std::vectors // fit with std::vectors
models[i]->fit(dataset, features, className, states); models[i]->fit(dataset, features, className, states, smoothing);
} }
} }
std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X) std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X)
@@ -85,17 +83,10 @@ namespace bayesnet {
{ {
auto n_states = models[0]->getClassNumStates(); auto n_states = models[0]->getClassNumStates();
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32); torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict_proba(X); auto ypredict = models[i]->predict_proba(X);
std::lock_guard<std::mutex> lock(mtx); /*std::cout << "model " << i << " prediction: " << ypredict << " significance " << significanceModels[i] << std::endl;*/
y_pred += ypredict * significanceModels[i]; y_pred += ypredict * significanceModels[i];
}));
}
for (auto& thread : threads) {
thread.join();
} }
auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
y_pred /= sum; y_pred /= sum;
@@ -105,23 +96,15 @@ namespace bayesnet {
{ {
auto n_states = models[0]->getClassNumStates(); auto n_states = models[0]->getClassNumStates();
std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0)); std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0));
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict_proba(X); auto ypredict = models[i]->predict_proba(X);
assert(ypredict.size() == y_pred.size()); assert(ypredict.size() == y_pred.size());
assert(ypredict[0].size() == y_pred[0].size()); assert(ypredict[0].size() == y_pred[0].size());
std::lock_guard<std::mutex> lock(mtx);
// Multiply each prediction by the significance of the model and then add it to the final prediction // Multiply each prediction by the significance of the model and then add it to the final prediction
for (auto j = 0; j < ypredict.size(); ++j) { for (auto j = 0; j < ypredict.size(); ++j) {
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(), std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; }); [significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
} }
}));
}
for (auto& thread : threads) {
thread.join();
} }
auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
//Divide each element of the prediction by the sum of the significances //Divide each element of the prediction by the sum of the significances
@@ -141,17 +124,9 @@ namespace bayesnet {
{ {
// Build a m x n_models tensor with the predictions of each model // Build a m x n_models tensor with the predictions of each model
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32); torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) { for (auto i = 0; i < n_models; ++i) {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict(X); auto ypredict = models[i]->predict(X);
std::lock_guard<std::mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict); y_pred.index_put_({ "...", i }, ypredict);
}));
}
for (auto& thread : threads) {
thread.join();
} }
return voting(y_pred); return voting(y_pred);
} }

View File

@@ -33,9 +33,15 @@ namespace bayesnet {
} }
std::string dump_cpt() const override std::string dump_cpt() const override
{ {
return ""; std::string output;
for (auto& model : models) {
output += model->dump_cpt();
output += std::string(80, '-') + "\n";
}
return output;
} }
protected: protected:
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
torch::Tensor predict_average_voting(torch::Tensor& X); torch::Tensor predict_average_voting(torch::Tensor& X);
std::vector<std::vector<double>> predict_average_voting(std::vector<std::vector<int>>& X); std::vector<std::vector<double>> predict_average_voting(std::vector<std::vector<int>>& X);
torch::Tensor predict_average_proba(torch::Tensor& X); torch::Tensor predict_average_proba(torch::Tensor& X);
@@ -43,10 +49,10 @@ namespace bayesnet {
torch::Tensor compute_arg_max(torch::Tensor& X); torch::Tensor compute_arg_max(torch::Tensor& X);
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X); std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X);
torch::Tensor voting(torch::Tensor& votes); torch::Tensor voting(torch::Tensor& votes);
// Attributes
unsigned n_models; unsigned n_models;
std::vector<std::unique_ptr<Classifier>> models; std::vector<std::unique_ptr<Classifier>> models;
std::vector<double> significanceModels; std::vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override;
bool predict_voting; bool predict_voting;
}; };
} }

View File

@@ -0,0 +1,168 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <folding.hpp>
#include <limits.h>
#include "XBA2DE.h"
#include "bayesnet/classifiers/XSP2DE.h"
#include "bayesnet/utils/TensorUtils.h"
namespace bayesnet {
XBA2DE::XBA2DE(bool predict_voting) : Boost(predict_voting) {}
std::vector<int> XBA2DE::initializeModels(const Smoothing_t smoothing) {
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
std::vector<int> featuresSelected = featureSelection(weights_);
if (featuresSelected.size() < 2) {
notes.push_back("No features selected in initialization");
status = ERROR;
return std::vector<int>();
}
for (int i = 0; i < featuresSelected.size() - 1; i++) {
for (int j = i + 1; j < featuresSelected.size(); j++) {
std::unique_ptr<Classifier> model = std::make_unique<XSp2de>(featuresSelected[i], featuresSelected[j]);
model->fit(dataset, features, className, states, weights_, smoothing);
add_model(std::move(model), 1.0);
}
}
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " +
std::to_string(features.size()) + " with " + select_features_algorithm);
return featuresSelected;
}
void XBA2DE::trainModel(const torch::Tensor &weights, const Smoothing_t smoothing) {
//
// Logging setup
//
// loguru::set_thread_name("XBA2DE");
// loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
// loguru::add_file("boostA2DE.log", loguru::Truncate, loguru::Verbosity_MAX);
// Algorithm based on the adaboost algorithm for classification
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
X_train_ = TensorUtils::to_matrix(X_train);
y_train_ = TensorUtils::to_vector<int>(y_train);
if (convergence) {
X_test_ = TensorUtils::to_matrix(X_test);
y_test_ = TensorUtils::to_vector<int>(y_test);
}
fitted = true;
double alpha_t = 0;
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
bool finished = false;
std::vector<int> featuresUsed;
if (selectFeatures) {
featuresUsed = initializeModels(smoothing);
if (featuresUsed.size() == 0) {
return;
}
auto ypred = predict(X_train);
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
// Update significance of the models
for (int i = 0; i < n_models; ++i) {
significanceModels[i] = alpha_t;
}
if (finished) {
return;
}
}
int numItemsPack = 0; // The counter of the models inserted in the current pack
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double improvement = 1.0;
double convergence_threshold = 1e-4;
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
// Step 0: Set the finish condition
// epsilon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing
// run out of features
bool ascending = order_algorithm == Orders.ASC;
std::mt19937 g{173};
std::vector<std::pair<int, int>> pairSelection;
while (!finished) {
// Step 1: Build ranking with mutual information
pairSelection = metrics.SelectKPairs(weights_, featuresUsed, ascending, 0); // Get all the pairs sorted
if (order_algorithm == Orders.RAND) {
std::shuffle(pairSelection.begin(), pairSelection.end(), g);
}
int k = bisection ? pow(2, tolerance) : 1;
int counter = 0; // The model counter of the current pack
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
while (counter++ < k && pairSelection.size() > 0) {
auto feature_pair = pairSelection[0];
pairSelection.erase(pairSelection.begin());
std::unique_ptr<Classifier> model;
model = std::make_unique<XSp2de>(feature_pair.first, feature_pair.second);
model->fit(dataset, features, className, states, weights_, smoothing);
alpha_t = 0.0;
if (!block_update) {
auto ypred = model->predict(X_train);
// Step 3.1: Compute the classifier amout of say
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
}
// Step 3.4: Store classifier and its accuracy to weigh its future vote
numItemsPack++;
models.push_back(std::move(model));
significanceModels.push_back(alpha_t);
n_models++;
// VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models,
// featuresUsed.size());
}
if (block_update) {
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
}
if (convergence && !finished) {
auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) {
priorAccuracy = accuracy;
} else {
improvement = accuracy - priorAccuracy;
}
if (improvement < convergence_threshold) {
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f
// current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
tolerance++;
} else {
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f
// prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
tolerance = 0; // Reset the counter if the model performs better
numItemsPack = 0;
}
if (convergence_best) {
// Keep the best accuracy until now as the prior accuracy
priorAccuracy = std::max(accuracy, priorAccuracy);
} else {
// Keep the last accuray obtained as the prior accuracy
priorAccuracy = accuracy;
}
}
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(),
// features.size());
finished = finished || tolerance > maxTolerance || pairSelection.size() == 0;
}
if (tolerance > maxTolerance) {
if (numItemsPack < n_models) {
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
for (int i = 0; i < numItemsPack; ++i) {
significanceModels.pop_back();
models.pop_back();
n_models--;
}
} else {
notes.push_back("Convergence threshold reached & 0 models eliminated");
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d",
// n_models, numItemsPack);
}
}
if (pairSelection.size() > 0) {
notes.push_back("Pairs not used in train: " + std::to_string(pairSelection.size()));
status = WARNING;
}
notes.push_back("Number of models: " + std::to_string(n_models));
}
std::vector<std::string> XBA2DE::graph(const std::string &title) const { return Ensemble::graph(title); }
} // namespace bayesnet

View File

@@ -0,0 +1,28 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef XBA2DE_H
#define XBA2DE_H
#include <string>
#include <vector>
#include "Boost.h"
namespace bayesnet {
class XBA2DE : public Boost {
public:
explicit XBA2DE(bool predict_voting = false);
virtual ~XBA2DE() = default;
std::vector<std::string> graph(const std::string& title = "XBA2DE") const override;
std::string getVersion() override { return version; };
protected:
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
private:
std::vector<int> initializeModels(const Smoothing_t smoothing);
std::vector<std::vector<int>> X_train_, X_test_;
std::vector<int> y_train_, y_test_;
std::string version = "0.9.7";
};
}
#endif

View File

@@ -0,0 +1,184 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include "XBAODE.h"
#include "bayesnet/classifiers/XSPODE.h"
#include "bayesnet/utils/TensorUtils.h"
#include <limits.h>
#include <random>
#include <tuple>
namespace bayesnet
{
XBAODE::XBAODE() : Boost(false) {}
std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing)
{
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
std::vector<int> featuresSelected = featureSelection(weights_);
for (const int &feature : featuresSelected) {
std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
model->fit(dataset, features, className, states, weights_, smoothing);
add_model(std::move(model), 1.0);
}
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " +
std::to_string(features.size()) + " with " + select_features_algorithm);
return featuresSelected;
}
void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing)
{
X_train_ = TensorUtils::to_matrix(X_train);
y_train_ = TensorUtils::to_vector<int>(y_train);
if (convergence) {
X_test_ = TensorUtils::to_matrix(X_test);
y_test_ = TensorUtils::to_vector<int>(y_test);
}
fitted = true;
double alpha_t;
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
bool finished = false;
std::vector<int> featuresUsed;
n_models = 0;
if (selectFeatures) {
featuresUsed = initializeModels(smoothing);
auto ypred = predict(X_train_);
auto ypred_t = torch::tensor(ypred);
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
// Update significance of the models
for (const int &feature : featuresUsed) {
significanceModels.pop_back();
}
for (const int &feature : featuresUsed) {
significanceModels.push_back(alpha_t);
}
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t,
// n_models);
if (finished) {
return;
}
}
int numItemsPack = 0; // The counter of the models inserted in the current pack
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double improvement = 1.0;
double convergence_threshold = 1e-4;
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
// Step 0: Set the finish condition
// epsilon sub t > 0.5 => inverse the weights_ policy
// validation error is not decreasing
// run out of features
bool ascending = order_algorithm == bayesnet::Orders.ASC;
std::mt19937 g{173};
while (!finished) {
// Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
if (order_algorithm == bayesnet::Orders.RAND) {
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
}
// Remove used features
featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(),
[&](auto x) {
return std::find(featuresUsed.begin(), featuresUsed.end(), x) !=
featuresUsed.end();
}),
featureSelection.end());
int k = bisection ? pow(2, tolerance) : 1;
int counter = 0; // The model counter of the current pack
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k,
// featureSelection.size());
while (counter++ < k && featureSelection.size() > 0) {
auto feature = featureSelection[0];
featureSelection.erase(featureSelection.begin());
std::unique_ptr<Classifier> model;
model = std::make_unique<XSpode>(feature);
model->fit(dataset, features, className, states, weights_, smoothing);
/*dynamic_cast<XSpode*>(model.get())->fitx(X_train, y_train, weights_,
* smoothing); // using exclusive XSpode fit method*/
// DEBUG
/*std::cout << dynamic_cast<XSpode*>(model.get())->to_string() <<
* std::endl;*/
// DEBUG
std::vector<int> ypred;
if (alpha_block) {
//
// Compute the prediction with the current ensemble + model
//
// Add the model to the ensemble
add_model(std::move(model), 1.0);
// Compute the prediction
ypred = predict(X_train_);
model = std::move(models.back());
// Remove the model from the ensemble
remove_last_model();
} else {
ypred = model->predict(X_train_);
}
// Step 3.1: Compute the classifier amout of say
auto ypred_t = torch::tensor(ypred);
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
// Step 3.4: Store classifier and its accuracy to weigh its future vote
numItemsPack++;
featuresUsed.push_back(feature);
add_model(std::move(model), alpha_t);
// VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d
// featuresUsed: %zu", finished, numItemsPack, n_models,
// featuresUsed.size());
} // End of the pack
if (convergence && !finished) {
auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) {
priorAccuracy = accuracy;
} else {
improvement = accuracy - priorAccuracy;
}
if (improvement < convergence_threshold) {
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
// numItemsPack, improvement, priorAccuracy, accuracy);
tolerance++;
} else {
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
// numItemsPack, improvement, priorAccuracy, accuracy);
tolerance = 0; // Reset the counter if the model performs better
numItemsPack = 0;
}
if (convergence_best) {
// Keep the best accuracy until now as the prior accuracy
priorAccuracy = std::max(accuracy, priorAccuracy);
} else {
// Keep the last accuray obtained as the prior accuracy
priorAccuracy = accuracy;
}
}
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size:
// %zu", tolerance, featuresUsed.size(), features.size());
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
}
if (tolerance > maxTolerance) {
if (numItemsPack < n_models) {
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated
// of %d", numItemsPack, n_models);
for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) {
remove_last_model();
}
// VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features
// used.", n_models, featuresUsed.size());
} else {
notes.push_back("Convergence threshold reached & 0 models eliminated");
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated
// n_models=%d numItemsPack=%d", n_models, numItemsPack);
}
}
if (featuresUsed.size() != features.size()) {
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " +
std::to_string(features.size()));
status = bayesnet::WARNING;
}
notes.push_back("Number of models: " + std::to_string(n_models));
return;
}
} // namespace bayesnet

View File

@@ -0,0 +1,27 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef XBAODE_H
#define XBAODE_H
#include <vector>
#include <cmath>
#include "Boost.h"
namespace bayesnet {
class XBAODE : public Boost {
public:
XBAODE();
std::string getVersion() override { return version; };
protected:
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
private:
std::vector<int> initializeModels(const Smoothing_t smoothing);
std::vector<std::vector<int>> X_train_, X_test_;
std::vector<int> y_train_, y_test_;
std::string version = "0.9.7";
};
}
#endif // XBAODE_H

View File

@@ -1,84 +1,141 @@
// *************************************************************** // **
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE // SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// *************************************************************** // **
#include <limits>
#include "bayesnet/utils/bayesnetUtils.h" #include "bayesnet/utils/bayesnetUtils.h"
#include "FeatureSelect.h" #include "FeatureSelect.h"
namespace bayesnet {
FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
namespace bayesnet {
using namespace torch::indexing; // for Ellipsis constant
//---------------------------------------------------------------------
// ctor
//---------------------------------------------------------------------
FeatureSelect::FeatureSelect(const torch::Tensor& samples,
const std::vector<std::string>& features,
const std::string& className,
int maxFeatures,
int classNumStates,
const torch::Tensor& weights)
: Metrics(samples, features, className, classNumStates),
maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures),
weights(weights)
{ {
} }
//---------------------------------------------------------------------
// public helpers
//---------------------------------------------------------------------
void FeatureSelect::initialize() void FeatureSelect::initialize()
{ {
selectedFeatures.clear(); selectedFeatures.clear();
selectedScores.clear(); selectedScores.clear();
suLabels.clear();
suFeatures.clear();
fitted = false;
} }
//---------------------------------------------------------------------
// Symmetrical Uncertainty (SU)
//---------------------------------------------------------------------
double FeatureSelect::symmetricalUncertainty(int a, int b) double FeatureSelect::symmetricalUncertainty(int a, int b)
{ {
/* /*
Compute symmetrical uncertainty. Normalize* information gain (mutual * Compute symmetrical uncertainty. Normalises the information gain
information) with the entropies of the features in order to compensate * (mutual information) with the entropies of the variables to compensate
the bias due to high cardinality features. *Range [0, 1] * the bias due to highcardinality features. Range: [0, 1]
(https://www.sciencedirect.com/science/article/pii/S0020025519303603) * See: https://www.sciencedirect.com/science/article/pii/S0020025519303603
*/ */
auto x = samples.index({ a, "..." });
auto y = samples.index({ b, "..." }); auto x = samples.index({ a, Ellipsis }); // row a => feature a
auto mu = mutualInformation(x, y, weights); auto y = (b >= 0) ? samples.index({ b, Ellipsis }) // row b (>=0) => feature b
auto hx = entropy(x, weights); : samples.index({ -1, Ellipsis }); // 1 treated as last row = labels
auto hy = entropy(y, weights);
return 2.0 * mu / (hx + hy); double mu = mutualInformation(x, y, weights);
double hx = entropy(x, weights);
double hy = entropy(y, weights);
const double denom = hx + hy;
if (denom == 0.0) return 0.0; // perfectly pure variables
return 2.0 * mu / denom;
} }
//---------------------------------------------------------------------
// SU featureclass
//---------------------------------------------------------------------
void FeatureSelect::computeSuLabels() void FeatureSelect::computeSuLabels()
{ {
// Compute Simmetrical Uncertainty between features and labels // Compute Symmetrical Uncertainty between each feature and the class labels
// https://en.wikipedia.org/wiki/Symmetric_uncertainty // https://en.wikipedia.org/wiki/Symmetric_uncertainty
for (int i = 0; i < features.size(); ++i) { const int classIdx = static_cast<int>(samples.size(0)) - 1; // labels in last row
suLabels.push_back(symmetricalUncertainty(i, -1)); suLabels.reserve(features.size());
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
suLabels.emplace_back(symmetricalUncertainty(i, classIdx));
} }
} }
double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature)
//---------------------------------------------------------------------
// SU featurefeature with cache
//---------------------------------------------------------------------
double FeatureSelect::computeSuFeatures(int firstFeature, int secondFeature)
{ {
// Compute Simmetrical Uncertainty between features // Order the pair to exploit symmetry => only one entry in the map
// https://en.wikipedia.org/wiki/Symmetric_uncertainty auto ordered = std::minmax(firstFeature, secondFeature);
try { const std::pair<int, int> key{ ordered.first, ordered.second };
return suFeatures.at({ firstFeature, secondFeature });
} auto it = suFeatures.find(key);
catch (const std::out_of_range& e) { if (it != suFeatures.end()) return it->second;
double result = symmetricalUncertainty(firstFeature, secondFeature);
suFeatures[{firstFeature, secondFeature}] = result; double result = symmetricalUncertainty(key.first, key.second);
suFeatures[key] = result; // store once (symmetry handled by ordering)
return result; return result;
} }
}
//---------------------------------------------------------------------
// Correlationbased Feature Selection (CFS) merit
//---------------------------------------------------------------------
double FeatureSelect::computeMeritCFS() double FeatureSelect::computeMeritCFS()
{ {
double rcf = 0; const int n = static_cast<int>(selectedFeatures.size());
for (auto feature : selectedFeatures) { if (n == 0) return 0.0;
rcf += suLabels[feature];
} // average r_cf (featureclass)
double rff = 0; double rcf_sum = 0.0;
int n = selectedFeatures.size(); for (int f : selectedFeatures) rcf_sum += suLabels[f];
for (const auto& item : doCombinations(selectedFeatures)) { const double rcf_avg = rcf_sum / n;
rff += computeSuFeatures(item.first, item.second);
} // average r_ff (featurefeature)
return rcf / sqrt(n + (n * n - n) * rff); double rff_sum = 0.0;
const auto& pairs = doCombinations(selectedFeatures); // generates each unordered pair once
for (const auto& p : pairs) rff_sum += computeSuFeatures(p.first, p.second);
const double numPairs = n * (n - 1) * 0.5;
const double rff_avg = (numPairs > 0) ? rff_sum / numPairs : 0.0;
// Merit_S = k * r_cf / sqrt( k + k*(k1) * r_ff ) (Hall, 1999)
const double k = static_cast<double>(n);
return (k * rcf_avg) / std::sqrt(k + k * (k - 1) * rff_avg);
} }
//---------------------------------------------------------------------
// getters
//---------------------------------------------------------------------
std::vector<int> FeatureSelect::getFeatures() const std::vector<int> FeatureSelect::getFeatures() const
{ {
if (!fitted) { if (!fitted) throw std::runtime_error("FeatureSelect not fitted");
throw std::runtime_error("FeatureSelect not fitted");
}
return selectedFeatures; return selectedFeatures;
} }
std::vector<double> FeatureSelect::getScores() const std::vector<double> FeatureSelect::getScores() const
{ {
if (!fitted) { if (!fitted) throw std::runtime_error("FeatureSelect not fitted");
throw std::runtime_error("FeatureSelect not fitted");
}
return selectedScores; return selectedScores;
} }
}
} // namespace bayesnet

View File

@@ -5,20 +5,20 @@
// *************************************************************** // ***************************************************************
#include <thread> #include <thread>
#include <mutex>
#include <sstream> #include <sstream>
#include <numeric>
#include <algorithm>
#include "Network.h" #include "Network.h"
#include "bayesnet/utils/bayesnetUtils.h" #include "bayesnet/utils/bayesnetUtils.h"
#include "bayesnet/utils/CountingSemaphore.h"
#include <pthread.h>
#include <fstream>
namespace bayesnet { namespace bayesnet {
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 } Network::Network() : fitted{ false }, classNumStates{ 0 }
{ {
} }
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 } Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
{ fitted(other.fitted), samples(other.samples)
}
Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples)
{ {
if (samples.defined()) if (samples.defined())
samples = samples.clone(); samples = samples.clone();
@@ -35,16 +35,15 @@ namespace bayesnet {
nodes.clear(); nodes.clear();
samples = torch::Tensor(); samples = torch::Tensor();
} }
float Network::getMaxThreads() const
{
return maxThreads;
}
torch::Tensor& Network::getSamples() torch::Tensor& Network::getSamples()
{ {
return samples; return samples;
} }
void Network::addNode(const std::string& name) void Network::addNode(const std::string& name)
{ {
if (fitted) {
throw std::invalid_argument("Cannot add node to a fitted network. Initialize first.");
}
if (name == "") { if (name == "") {
throw std::invalid_argument("Node name cannot be empty"); throw std::invalid_argument("Node name cannot be empty");
} }
@@ -94,12 +93,21 @@ namespace bayesnet {
} }
void Network::addEdge(const std::string& parent, const std::string& child) void Network::addEdge(const std::string& parent, const std::string& child)
{ {
if (fitted) {
throw std::invalid_argument("Cannot add edge to a fitted network. Initialize first.");
}
if (nodes.find(parent) == nodes.end()) { if (nodes.find(parent) == nodes.end()) {
throw std::invalid_argument("Parent node " + parent + " does not exist"); throw std::invalid_argument("Parent node " + parent + " does not exist");
} }
if (nodes.find(child) == nodes.end()) { if (nodes.find(child) == nodes.end()) {
throw std::invalid_argument("Child node " + child + " does not exist"); throw std::invalid_argument("Child node " + child + " does not exist");
} }
// Check if the edge is already in the graph
for (auto& node : nodes[parent]->getChildren()) {
if (node->getName() == child) {
throw std::invalid_argument("Edge " + parent + " -> " + child + " already exists");
}
}
// Temporarily add edge to check for cycles // Temporarily add edge to check for cycles
nodes[parent]->addChild(nodes[child].get()); nodes[parent]->addChild(nodes[child].get());
nodes[child]->addParent(nodes[parent].get()); nodes[child]->addParent(nodes[parent].get());
@@ -155,7 +163,7 @@ namespace bayesnet {
classNumStates = nodes.at(className)->getNumStates(); classNumStates = nodes.at(className)->getNumStates();
} }
// X comes in nxm, where n is the number of features and m the number of samples // X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states) void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights); checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className; this->className = className;
@@ -164,17 +172,17 @@ namespace bayesnet {
for (int i = 0; i < featureNames.size(); ++i) { for (int i = 0; i < featureNames.size(); ++i) {
auto row_feature = X.index({ i, "..." }); auto row_feature = X.index({ i, "..." });
} }
completeFit(states, weights); completeFit(states, weights, smoothing);
} }
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states) void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights); checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className; this->className = className;
this->samples = samples; this->samples = samples;
completeFit(states, weights); completeFit(states, weights, smoothing);
} }
// input_data comes in nxm, where n is the number of features and m the number of samples // input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states) void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{ {
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64); const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights); checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
@@ -185,17 +193,43 @@ namespace bayesnet {
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32)); samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
} }
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(states, weights); completeFit(states, weights, smoothing);
} }
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
{ {
setStates(states); setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
std::vector<std::thread> threads; std::vector<std::thread> threads;
auto& semaphore = CountingSemaphore::getInstance();
const double n_samples = static_cast<double>(samples.size(1));
auto worker = [&](std::pair<const std::string, std::unique_ptr<Node>>& node, int i) {
std::string threadName = "FitWorker-" + std::to_string(i);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
double numStates = static_cast<double>(node.second->getNumStates());
double smoothing_factor;
switch (smoothing) {
case Smoothing_t::ORIGINAL:
smoothing_factor = 1.0 / n_samples;
break;
case Smoothing_t::LAPLACE:
smoothing_factor = 1.0;
break;
case Smoothing_t::CESTNIK:
smoothing_factor = 1 / numStates;
break;
default:
smoothing_factor = 0.0; // No smoothing
}
node.second->computeCPT(samples, features, smoothing_factor, weights);
semaphore.release();
};
int i = 0;
for (auto& node : nodes) { for (auto& node : nodes) {
threads.emplace_back([this, &node, &weights]() { semaphore.acquire();
node.second->computeCPT(samples, features, laplaceSmoothing, weights); threads.emplace_back(worker, std::ref(node), i++);
});
} }
for (auto& thread : threads) { for (auto& thread : threads) {
thread.join(); thread.join();
@@ -207,15 +241,39 @@ namespace bayesnet {
if (!fitted) { if (!fitted) {
throw std::logic_error("You must call fit() before calling predict()"); throw std::logic_error("You must call fit() before calling predict()");
} }
// Ensure the sample size is equal to the number of features
if (samples.size(0) != features.size() - 1) {
throw std::invalid_argument("(T) Sample size (" + std::to_string(samples.size(0)) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
torch::Tensor result; torch::Tensor result;
std::vector<std::thread> threads;
std::mutex mtx;
auto& semaphore = CountingSemaphore::getInstance();
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64); result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
for (int i = 0; i < samples.size(1); ++i) { auto worker = [&](const torch::Tensor& sample, int i) {
const torch::Tensor sample = samples.index({ "...", i }); std::string threadName = "PredictWorker-" + std::to_string(i);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
auto psample = predict_sample(sample); auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64); auto temp = torch::tensor(psample, torch::kFloat64);
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64)); {
std::lock_guard<std::mutex> lock(mtx);
result.index_put_({ i, "..." }, temp); result.index_put_({ i, "..." }, temp);
} }
semaphore.release();
};
for (int i = 0; i < samples.size(1); ++i) {
semaphore.acquire();
const torch::Tensor sample = samples.index({ "...", i });
threads.emplace_back(worker, sample, i);
}
for (auto& thread : threads) {
thread.join();
}
if (proba) if (proba)
return result; return result;
return result.argmax(1); return result.argmax(1);
@@ -239,18 +297,38 @@ namespace bayesnet {
if (!fitted) { if (!fitted) {
throw std::logic_error("You must call fit() before calling predict()"); throw std::logic_error("You must call fit() before calling predict()");
} }
std::vector<int> predictions; // Ensure the sample size is equal to the number of features
if (tsamples.size() != features.size() - 1) {
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::vector<int> predictions(tsamples[0].size(), 0);
std::vector<int> sample; std::vector<int> sample;
std::vector<std::thread> threads;
auto& semaphore = CountingSemaphore::getInstance();
auto worker = [&](const std::vector<int>& sample, const int row, int& prediction) {
std::string threadName = "(V)PWorker-" + std::to_string(row);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
auto classProbabilities = predict_sample(sample);
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
int predictedClass = distance(classProbabilities.begin(), maxElem);
prediction = predictedClass;
semaphore.release();
};
for (int row = 0; row < tsamples[0].size(); ++row) { for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear(); sample.clear();
for (int col = 0; col < tsamples.size(); ++col) { for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]); sample.push_back(tsamples[col][row]);
} }
std::vector<double> classProbabilities = predict_sample(sample); semaphore.acquire();
// Find the class with the maximum posterior probability threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end()); }
int predictedClass = distance(classProbabilities.begin(), maxElem); for (auto& thread : threads) {
predictions.push_back(predictedClass); thread.join();
} }
return predictions; return predictions;
} }
@@ -261,14 +339,36 @@ namespace bayesnet {
if (!fitted) { if (!fitted) {
throw std::logic_error("You must call fit() before calling predict_proba()"); throw std::logic_error("You must call fit() before calling predict_proba()");
} }
std::vector<std::vector<double>> predictions; // Ensure the sample size is equal to the number of features
if (tsamples.size() != features.size() - 1) {
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::vector<std::vector<double>> predictions(tsamples[0].size(), std::vector<double>(classNumStates, 0.0));
std::vector<int> sample; std::vector<int> sample;
std::vector<std::thread> threads;
auto& semaphore = CountingSemaphore::getInstance();
auto worker = [&](const std::vector<int>& sample, int row, std::vector<double>& predictions) {
std::string threadName = "(V)PWorker-" + std::to_string(row);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
std::vector<double> classProbabilities = predict_sample(sample);
predictions = classProbabilities;
semaphore.release();
};
for (int row = 0; row < tsamples[0].size(); ++row) { for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear(); sample.clear();
for (int col = 0; col < tsamples.size(); ++col) { for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]); sample.push_back(tsamples[col][row]);
} }
predictions.push_back(predict_sample(sample)); semaphore.acquire();
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
}
for (auto& thread : threads) {
thread.join();
} }
return predictions; return predictions;
} }
@@ -286,11 +386,6 @@ namespace bayesnet {
// Return 1xn std::vector of probabilities // Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const std::vector<int>& sample) std::vector<double> Network::predict_sample(const std::vector<int>& sample)
{ {
// Ensure the sample size is equal to the number of features
if (sample.size() != features.size() - 1) {
throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::map<std::string, int> evidence; std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(); ++i) { for (int i = 0; i < sample.size(); ++i) {
evidence[features[i]] = sample[i]; evidence[features[i]] = sample[i];
@@ -300,44 +395,26 @@ namespace bayesnet {
// Return 1xn std::vector of probabilities // Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const torch::Tensor& sample) std::vector<double> Network::predict_sample(const torch::Tensor& sample)
{ {
// Ensure the sample size is equal to the number of features
if (sample.size(0) != features.size() - 1) {
throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::map<std::string, int> evidence; std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(0); ++i) { for (int i = 0; i < sample.size(0); ++i) {
evidence[features[i]] = sample[i].item<int>(); evidence[features[i]] = sample[i].item<int>();
} }
return exactInference(evidence); return exactInference(evidence);
} }
double Network::computeFactor(std::map<std::string, int>& completeEvidence)
{
double result = 1.0;
for (auto& node : getNodes()) {
result *= node.second->getFactorValue(completeEvidence);
}
return result;
}
std::vector<double> Network::exactInference(std::map<std::string, int>& evidence) std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
{ {
std::vector<double> result(classNumStates, 0.0); std::vector<double> result(classNumStates, 0.0);
std::vector<std::thread> threads;
std::mutex mtx;
for (int i = 0; i < classNumStates; ++i) {
threads.emplace_back([this, &result, &evidence, i, &mtx]() {
auto completeEvidence = std::map<std::string, int>(evidence); auto completeEvidence = std::map<std::string, int>(evidence);
for (int i = 0; i < classNumStates; ++i) {
completeEvidence[getClassName()] = i; completeEvidence[getClassName()] = i;
double factor = computeFactor(completeEvidence); double partial = 1.0;
std::lock_guard<std::mutex> lock(mtx); for (auto& node : getNodes()) {
result[i] = factor; partial *= node.second->getFactorValue(completeEvidence);
});
} }
for (auto& thread : threads) { result[i] = partial;
thread.join();
} }
// Normalize result // Normalize result
double sum = accumulate(result.begin(), result.end(), 0.0); double sum = std::accumulate(result.begin(), result.end(), 0.0);
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; }); transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result; return result;
} }
@@ -410,11 +487,7 @@ namespace bayesnet {
result.insert(it2, fatherName); result.insert(it2, fatherName);
ending = false; ending = false;
} }
} else {
throw std::logic_error("Error in topological sort because of node " + feature + " is not in result");
} }
} else {
throw std::logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
} }
} }
} }

View File

@@ -10,16 +10,16 @@
#include <vector> #include <vector>
#include "bayesnet/config.h" #include "bayesnet/config.h"
#include "Node.h" #include "Node.h"
#include "Smoothing.h"
namespace bayesnet { namespace bayesnet {
class Network { class Network {
public: public:
Network(); Network();
explicit Network(float);
explicit Network(const Network&); explicit Network(const Network&);
~Network() = default; ~Network() = default;
torch::Tensor& getSamples(); torch::Tensor& getSamples();
float getMaxThreads() const;
void addNode(const std::string&); void addNode(const std::string&);
void addEdge(const std::string&, const std::string&); void addEdge(const std::string&, const std::string&);
std::map<std::string, std::unique_ptr<Node>>& getNodes(); std::map<std::string, std::unique_ptr<Node>>& getNodes();
@@ -32,9 +32,9 @@ namespace bayesnet {
/* /*
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on. Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
*/ */
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states); void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states); void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states); void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba); torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
@@ -50,19 +50,16 @@ namespace bayesnet {
private: private:
std::map<std::string, std::unique_ptr<Node>> nodes; std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted; bool fitted;
float maxThreads = 0.95;
int classNumStates; int classNumStates;
std::vector<std::string> features; // Including classname std::vector<std::string> features; // Including classname
std::string className; std::string className;
double laplaceSmoothing;
torch::Tensor samples; // n+1xm tensor used to fit the model torch::Tensor samples; // n+1xm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&); bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
std::vector<double> predict_sample(const std::vector<int>&); std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&); std::vector<double> predict_sample(const torch::Tensor&);
std::vector<double> exactInference(std::map<std::string, int>&); std::vector<double> exactInference(std::map<std::string, int>&);
double computeFactor(std::map<std::string, int>&); void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights); void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const std::map<std::string, std::vector<int>>&); void setStates(const std::map<std::string, std::vector<int>>&);
}; };
} }

View File

@@ -9,7 +9,7 @@
namespace bayesnet { namespace bayesnet {
Node::Node(const std::string& name) Node::Node(const std::string& name)
: name(name), numStates(0), cpTable(torch::Tensor()), parents(std::vector<Node*>()), children(std::vector<Node*>()) : name(name)
{ {
} }
void Node::clear() void Node::clear()
@@ -90,52 +90,60 @@ namespace bayesnet {
} }
return result; return result;
} }
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights) void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
{ {
dimensions.clear(); dimensions.clear();
dimensions.reserve(parents.size() + 1);
// Get dimensions of the CPT // Get dimensions of the CPT
dimensions.push_back(numStates); dimensions.push_back(numStates);
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); }); for (const auto& parent : parents) {
dimensions.push_back(parent->getNumStates());
// Create a tensor of zeros with the dimensions of the CPT }
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing; //transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
// Create a tensor initialized with smoothing
cpTable = torch::full(dimensions, smoothing, torch::kDouble);
// Create a map for quick feature index lookup
std::unordered_map<std::string, int> featureIndexMap;
for (size_t i = 0; i < features.size(); ++i) {
featureIndexMap[features[i]] = i;
}
// Fill table with counts // Fill table with counts
auto pos = find(features.begin(), features.end(), name); // Get the index of this node's feature
if (pos == features.end()) { int name_index = featureIndexMap[name];
throw std::logic_error("Feature " + name + " not found in dataset"); // Get parent indices in dataset
std::vector<int> parent_indices;
parent_indices.reserve(parents.size());
for (const auto& parent : parents) {
parent_indices.push_back(featureIndexMap[parent->getName()]);
} }
int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
c10::List<c10::optional<at::Tensor>> coordinates; c10::List<c10::optional<at::Tensor>> coordinates;
coordinates.push_back(dataset.index({ name_index, n_sample })); for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
for (auto parent : parents) { coordinates.clear();
pos = find(features.begin(), features.end(), parent->getName()); auto sample = dataset.index({ "...", n_sample });
if (pos == features.end()) { coordinates.push_back(sample[name_index]);
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset"); for (size_t i = 0; i < parent_indices.size(); ++i) {
} coordinates.push_back(sample[parent_indices[i]]);
int parent_index = pos - features.begin();
coordinates.push_back(dataset.index({ parent_index, n_sample }));
} }
// Increment the count of the corresponding coordinate // Increment the count of the corresponding coordinate
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>()); cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
} }
// Normalize the counts // Normalize the counts (dividing each row by the sum of the row)
cpTable = cpTable / cpTable.sum(0); cpTable /= cpTable.sum(0, true);
} }
float Node::getFactorValue(std::map<std::string, int>& evidence) double Node::getFactorValue(std::map<std::string, int>& evidence)
{ {
c10::List<c10::optional<at::Tensor>> coordinates; c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h) // following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(at::tensor(evidence[name])); coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); }); transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<float>(); return cpTable.index({ coordinates }).item<double>();
} }
std::vector<std::string> Node::graph(const std::string& className) std::vector<std::string> Node::graph(const std::string& className)
{ {
auto output = std::vector<std::string>(); auto output = std::vector<std::string>();
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : ""; auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
output.push_back(name + " [shape=circle" + suffix + "] \n"); output.push_back("\"" + name + "\" [shape=circle" + suffix + "] \n");
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); }); transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return "\"" + name + "\" -> \"" + child->getName() + "\""; });
return output; return output;
} }
} }

View File

@@ -12,14 +12,6 @@
#include <torch/torch.h> #include <torch/torch.h>
namespace bayesnet { namespace bayesnet {
class Node { class Node {
private:
std::string name;
std::vector<Node*> parents;
std::vector<Node*> children;
int numStates; // number of states of the variable
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
std::vector<int64_t> dimensions; // dimensions of the cpTable
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
public: public:
explicit Node(const std::string&); explicit Node(const std::string&);
void clear(); void clear();
@@ -31,12 +23,20 @@ namespace bayesnet {
std::vector<Node*>& getParents(); std::vector<Node*>& getParents();
std::vector<Node*>& getChildren(); std::vector<Node*>& getChildren();
torch::Tensor& getCPT(); torch::Tensor& getCPT();
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights); void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights);
int getNumStates() const; int getNumStates() const;
void setNumStates(int); void setNumStates(int);
unsigned minFill(); unsigned minFill();
std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
float getFactorValue(std::map<std::string, int>&); double getFactorValue(std::map<std::string, int>&);
private:
std::string name;
std::vector<Node*> parents;
std::vector<Node*> children;
int numStates = 0; // number of states of the variable
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
std::vector<int64_t> dimensions; // dimensions of the cpTable
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
}; };
} }
#endif #endif

View File

@@ -0,0 +1,17 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef SMOOTHING_H
#define SMOOTHING_H
namespace bayesnet {
enum class Smoothing_t {
NONE = -1,
ORIGINAL = 0,
LAPLACE,
CESTNIK
};
}
#endif // SMOOTHING_H

View File

@@ -4,29 +4,79 @@
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// *************************************************************** // ***************************************************************
#include <map>
#include <unordered_map>
#include <tuple>
#include "Mst.h" #include "Mst.h"
#include "BayesMetrics.h" #include "BayesMetrics.h"
namespace bayesnet { namespace bayesnet {
//samples is n+1xm tensor used to fit the model //samples is n+1xm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates) Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: samples(samples) : samples(samples)
, features(features)
, className(className) , className(className)
, features(features)
, classNumStates(classNumStates) , classNumStates(classNumStates)
{ {
} }
//samples is n+1xm std::vector used to fit the model //samples is n+1xm std::vector used to fit the model
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates) Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: features(features) : samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
, className(className) , className(className)
, features(features)
, classNumStates(classNumStates) , classNumStates(classNumStates)
, samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
{ {
for (int i = 0; i < vsamples.size(); ++i) { for (int i = 0; i < vsamples.size(); ++i) {
samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32)); samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32));
} }
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
} }
std::vector<std::pair<int, int>> Metrics::SelectKPairs(const torch::Tensor& weights, std::vector<int>& featuresExcluded, bool ascending, unsigned k)
{
// Return the K Best features
auto n = features.size();
// compute scores
scoresKPairs.clear();
pairsKBest.clear();
auto labels = samples.index({ -1, "..." });
for (int i = 0; i < n - 1; ++i) {
if (std::find(featuresExcluded.begin(), featuresExcluded.end(), i) != featuresExcluded.end()) {
continue;
}
for (int j = i + 1; j < n; ++j) {
if (std::find(featuresExcluded.begin(), featuresExcluded.end(), j) != featuresExcluded.end()) {
continue;
}
auto key = std::make_pair(i, j);
auto value = conditionalMutualInformation(samples.index({ i, "..." }), samples.index({ j, "..." }), labels, weights);
scoresKPairs.push_back({ key, value });
}
}
// sort scores
if (ascending) {
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
{ return a.second < b.second; });
} else {
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
{ return a.second > b.second; });
}
for (auto& [pairs, score] : scoresKPairs) {
pairsKBest.push_back(pairs);
}
if (k != 0 && k < pairsKBest.size()) {
if (ascending) {
int limit = pairsKBest.size() - k;
for (int i = 0; i < limit; i++) {
pairsKBest.erase(pairsKBest.begin());
scoresKPairs.erase(scoresKPairs.begin());
}
} else {
pairsKBest.resize(k);
scoresKPairs.resize(k);
}
}
return pairsKBest;
}
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k) std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
{ {
// Return the K Best features // Return the K Best features
@@ -66,7 +116,10 @@ namespace bayesnet {
{ {
return scoresKBest; return scoresKBest;
} }
std::vector<std::pair<std::pair<int, int>, double>> Metrics::getScoresKPairs() const
{
return scoresKPairs;
}
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights) torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
{ {
auto result = std::vector<double>(); auto result = std::vector<double>();
@@ -105,14 +158,8 @@ namespace bayesnet {
} }
return matrix; return matrix;
} }
// To use in Python // Measured in nats (natural logarithm (log) base e)
std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_) // Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
{
const torch::Tensor weights = torch::tensor(weights_);
auto matrix = conditionalEdge(weights);
std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
return v;
}
double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights) double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
{ {
torch::Tensor counts = feature.bincount(weights); torch::Tensor counts = feature.bincount(weights);
@@ -151,10 +198,54 @@ namespace bayesnet {
} }
return entropyValue; return entropyValue;
} }
// I(X;Y) = H(Y) - H(Y|X) // H(X|Y,C) = sum_{y in Y, c in C} p(x,c) H(X|Y=y,C=c)
double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
{
// Ensure the tensors are of the same length
assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0));
// Convert tensors to vectors for easier processing
auto firstFeatureData = firstFeature.accessor<int, 1>();
auto secondFeatureData = secondFeature.accessor<int, 1>();
auto labelsData = labels.accessor<int, 1>();
auto weightsData = weights.accessor<double, 1>();
int numSamples = firstFeature.size(0);
// Maps for joint and marginal probabilities
std::map<std::tuple<int, int, int>, double> jointCount;
std::map<std::tuple<int, int>, double> marginalCount;
// Compute joint and marginal counts
for (int i = 0; i < numSamples; ++i) {
auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]);
auto keyMarginal = std::make_tuple(firstFeatureData[i], labelsData[i]);
jointCount[keyJoint] += weightsData[i];
marginalCount[keyMarginal] += weightsData[i];
}
// Total weight sum
double totalWeight = torch::sum(weights).item<double>();
if (totalWeight == 0)
return 0;
// Compute the conditional entropy
double conditionalEntropy = 0.0;
for (const auto& [keyJoint, jointFreq] : jointCount) {
auto [x, c, y] = keyJoint;
auto keyMarginal = std::make_tuple(x, c);
//double p_xc = marginalCount[keyMarginal] / totalWeight;
double p_y_given_xc = jointFreq / marginalCount[keyMarginal];
if (p_y_given_xc > 0) {
conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
}
}
return conditionalEntropy;
}
// I(X;Y) = H(Y) - H(Y|X) ; I(X;Y) >= 0
double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights) double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{ {
return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights); return std::max(entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights), 0.0);
}
// I(X;Y|C) = H(X|C) - H(X|Y,C) >= 0
double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
{
return std::max(conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights), 0.0);
} }
/* /*
Compute the maximum spanning tree considering the weights as distances Compute the maximum spanning tree considering the weights as distances

View File

@@ -16,21 +16,26 @@ namespace bayesnet {
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates); Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates); Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0); std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
std::vector<std::pair<int, int>> SelectKPairs(const torch::Tensor& weights, std::vector<int>& featuresExcluded, bool ascending = false, unsigned k = 0);
std::vector<double> getScoresKBest() const; std::vector<double> getScoresKBest() const;
std::vector<std::pair<std::pair<int, int>, double>> getScoresKPairs() const;
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights); double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python double conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
torch::Tensor conditionalEdge(const torch::Tensor& weights); torch::Tensor conditionalEdge(const torch::Tensor& weights);
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root); std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
// Measured in nats (natural logarithm (log) base e)
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
protected: protected:
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
std::string className; std::string className;
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
std::vector<std::string> features; std::vector<std::string> features;
template <class T> template <class T>
std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source) std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
{ {
std::vector<std::pair<T, T>> result; std::vector<std::pair<T, T>> result;
for (int i = 0; i < source.size(); ++i) { for (int i = 0; i < source.size() - 1; ++i) {
T temp = source[i]; T temp = source[i];
for (int j = i + 1; j < source.size(); ++j) { for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] }); result.push_back({ temp, source[j] });
@@ -49,6 +54,8 @@ namespace bayesnet {
int classNumStates = 0; int classNumStates = 0;
std::vector<double> scoresKBest; std::vector<double> scoresKBest;
std::vector<int> featuresKBest; // sorted indices of the features std::vector<int> featuresKBest; // sorted indices of the features
std::vector<std::pair<int, int>> pairsKBest; // sorted indices of the pairs
std::vector<std::pair<std::pair<int, int>, double>> scoresKPairs;
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights); double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
}; };
} }

View File

@@ -0,0 +1,54 @@
#ifndef COUNTING_SEMAPHORE_H
#define COUNTING_SEMAPHORE_H
#include <mutex>
#include <condition_variable>
#include <algorithm>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <thread>
class CountingSemaphore {
public:
static CountingSemaphore& getInstance()
{
static CountingSemaphore instance;
return instance;
}
// Delete copy constructor and assignment operator
CountingSemaphore(const CountingSemaphore&) = delete;
CountingSemaphore& operator=(const CountingSemaphore&) = delete;
void acquire()
{
std::unique_lock<std::mutex> lock(mtx_);
cv_.wait(lock, [this]() { return count_ > 0; });
--count_;
}
void release()
{
std::lock_guard<std::mutex> lock(mtx_);
++count_;
if (count_ <= max_count_) {
cv_.notify_one();
}
}
uint getCount() const
{
return count_;
}
uint getMaxCount() const
{
return max_count_;
}
private:
CountingSemaphore()
: max_count_(std::max(1u, static_cast<uint>(0.95 * std::thread::hardware_concurrency()))),
count_(max_count_)
{
}
std::mutex mtx_;
std::condition_variable cv_;
const uint max_count_;
uint count_;
};
#endif

View File

@@ -53,14 +53,14 @@ namespace bayesnet {
} }
} }
void insertElement(std::list<int>& variables, int variable) void MST::insertElement(std::list<int>& variables, int variable)
{ {
if (std::find(variables.begin(), variables.end(), variable) == variables.end()) { if (std::find(variables.begin(), variables.end(), variable) == variables.end()) {
variables.push_front(variable); variables.push_front(variable);
} }
} }
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original) std::vector<std::pair<int, int>> MST::reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
{ {
// Create the edges of a DAG from the MST // Create the edges of a DAG from the MST
// replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted // replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted

View File

@@ -14,6 +14,8 @@ namespace bayesnet {
public: public:
MST() = default; MST() = default;
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root); MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
void insertElement(std::list<int>& variables, int variable);
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original);
std::vector<std::pair<int, int>> maximumSpanningTree(); std::vector<std::pair<int, int>> maximumSpanningTree();
private: private:
torch::Tensor weights; torch::Tensor weights;

View File

@@ -0,0 +1,51 @@
#ifndef TENSORUTILS_H
#define TENSORUTILS_H
#include <torch/torch.h>
#include <vector>
namespace bayesnet {
class TensorUtils {
public:
static std::vector<std::vector<int>> to_matrix(const torch::Tensor& X)
{
// Ensure tensor is contiguous in memory
auto X_contig = X.contiguous();
// Access tensor data pointer directly
auto data_ptr = X_contig.data_ptr<int>();
// IF you are using int64_t as the data type, use the following line
//auto data_ptr = X_contig.data_ptr<int64_t>();
//std::vector<std::vector<int64_t>> data(X.size(0), std::vector<int64_t>(X.size(1)));
// Prepare output container
std::vector<std::vector<int>> data(X.size(0), std::vector<int>(X.size(1)));
// Fill the 2D vector in a single loop using pointer arithmetic
int rows = X.size(0);
int cols = X.size(1);
for (int i = 0; i < rows; ++i) {
std::copy(data_ptr + i * cols, data_ptr + (i + 1) * cols, data[i].begin());
}
return data;
}
template <typename T>
static std::vector<T> to_vector(const torch::Tensor& y)
{
// Ensure the tensor is contiguous in memory
auto y_contig = y.contiguous();
// Access data pointer
auto data_ptr = y_contig.data_ptr<T>();
// Prepare output container
std::vector<T> data(y.size(0));
// Copy data efficiently
std::copy(data_ptr, data_ptr + y.size(0), data.begin());
return data;
}
};
}
#endif // TENSORUTILS_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -137,7 +137,7 @@
include(CMakeParseArguments) include(CMakeParseArguments)
option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE) option(CODE_COVERAGE_VERBOSE "Verbose information" TRUE)
# Check prereqs # Check prereqs
find_program( GCOV_PATH gcov ) find_program( GCOV_PATH gcov )
@@ -160,8 +160,12 @@ foreach(LANG ${LANGUAGES})
endif() endif()
elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU" elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang") AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
if ("${LANG}" MATCHES "CUDA")
message(STATUS "Ignoring CUDA")
else()
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
endif() endif()
endif()
endforeach() endforeach()
set(COVERAGE_COMPILER_FLAGS "-g --coverage" set(COVERAGE_COMPILER_FLAGS "-g --coverage"

Binary file not shown.

676
diagrams/BayesNet.puml Normal file
View File

@@ -0,0 +1,676 @@
@startuml
title clang-uml class diagram model
class "bayesnet::Node" as C_0010428199432536647474
class C_0010428199432536647474 #aliceblue;line:blue;line.dotted;text:blue {
+Node(const std::string &) : void
..
+addChild(Node *) : void
+addParent(Node *) : void
+clear() : void
+computeCPT(const torch::Tensor & dataset, const std::vector<std::string> & features, const double smoothing, const torch::Tensor & weights) : void
+getCPT() : torch::Tensor &
+getChildren() : std::vector<Node *> &
+getFactorValue(std::map<std::string,int> &) : double
+getName() const : std::string
+getNumStates() const : int
+getParents() : std::vector<Node *> &
+graph(const std::string & clasName) : std::vector<std::string>
+minFill() : unsigned int
+removeChild(Node *) : void
+removeParent(Node *) : void
+setNumStates(int) : void
__
}
enum "bayesnet::Smoothing_t" as C_0013393078277439680282
enum C_0013393078277439680282 {
NONE
ORIGINAL
LAPLACE
CESTNIK
}
class "bayesnet::Network" as C_0009493661199123436603
class C_0009493661199123436603 #aliceblue;line:blue;line.dotted;text:blue {
+Network() : void
+Network(const Network &) : void
+~Network() = default : void
..
+addEdge(const std::string &, const std::string &) : void
+addNode(const std::string &) : void
+dump_cpt() const : std::string
+fit(const torch::Tensor & samples, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
+fit(const torch::Tensor & X, const torch::Tensor & y, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
+fit(const std::vector<std::vector<int>> & input_data, const std::vector<int> & labels, const std::vector<double> & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
+getClassName() const : std::string
+getClassNumStates() const : int
+getEdges() const : std::vector<std::pair<std::string,std::string>>
+getFeatures() const : std::vector<std::string>
+getNodes() : std::map<std::string,std::unique_ptr<Node>> &
+getNumEdges() const : int
+getSamples() : torch::Tensor &
+getStates() const : int
+graph(const std::string & title) const : std::vector<std::string>
+initialize() : void
+predict(const std::vector<std::vector<int>> &) : std::vector<int>
+predict(const torch::Tensor &) : torch::Tensor
+predict_proba(const std::vector<std::vector<int>> &) : std::vector<std::vector<double>>
+predict_proba(const torch::Tensor &) : torch::Tensor
+predict_tensor(const torch::Tensor & samples, const bool proba) : torch::Tensor
+score(const std::vector<std::vector<int>> &, const std::vector<int> &) : double
+show() const : std::vector<std::string>
+topological_sort() : std::vector<std::string>
+version() : std::string
__
}
enum "bayesnet::status_t" as C_0005907365846270811004
enum C_0005907365846270811004 {
NORMAL
WARNING
ERROR
}
abstract "bayesnet::BaseClassifier" as C_0002617087915615796317
abstract C_0002617087915615796317 #aliceblue;line:blue;line.dotted;text:blue {
+~BaseClassifier() = default : void
..
{abstract} +dump_cpt() const = 0 : std::string
{abstract} +fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights, const Smoothing_t smoothing) = 0 : BaseClassifier &
{abstract} +fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
{abstract} +getClassNumStates() const = 0 : int
{abstract} +getNotes() const = 0 : std::vector<std::string>
{abstract} +getNumberOfEdges() const = 0 : int
{abstract} +getNumberOfNodes() const = 0 : int
{abstract} +getNumberOfStates() const = 0 : int
{abstract} +getStatus() const = 0 : status_t
+getValidHyperparameters() : std::vector<std::string> &
{abstract} +getVersion() = 0 : std::string
{abstract} +graph(const std::string & title = "") const = 0 : std::vector<std::string>
{abstract} +predict(std::vector<std::vector<int>> & X) = 0 : std::vector<int>
{abstract} +predict(torch::Tensor & X) = 0 : torch::Tensor
{abstract} +predict_proba(std::vector<std::vector<int>> & X) = 0 : std::vector<std::vector<double>>
{abstract} +predict_proba(torch::Tensor & X) = 0 : torch::Tensor
{abstract} +score(std::vector<std::vector<int>> & X, std::vector<int> & y) = 0 : float
{abstract} +score(torch::Tensor & X, torch::Tensor & y) = 0 : float
{abstract} +setHyperparameters(const nlohmann::json & hyperparameters) = 0 : void
{abstract} +show() const = 0 : std::vector<std::string>
{abstract} +topological_order() = 0 : std::vector<std::string>
{abstract} #trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) = 0 : void
__
#notes : std::vector<std::string>
#status : status_t
#validHyperparameters : std::vector<std::string>
}
class "bayesnet::Metrics" as C_0005895723015084986588
class C_0005895723015084986588 #aliceblue;line:blue;line.dotted;text:blue {
+Metrics() = default : void
+Metrics(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
+Metrics(const std::vector<std::vector<int>> & vsamples, const std::vector<int> & labels, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
..
+SelectKBestWeighted(const torch::Tensor & weights, bool ascending = false, unsigned int k = 0) : std::vector<int>
+SelectKPairs(const torch::Tensor & weights, std::vector<int> & featuresExcluded, bool ascending = false, unsigned int k = 0) : std::vector<std::pair<int,int>>
+conditionalEdge(const torch::Tensor & weights) : torch::Tensor
+conditionalEntropy(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & labels, const torch::Tensor & weights) : double
+conditionalMutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & labels, const torch::Tensor & weights) : double
#doCombinations<T>(const std::vector<T> & source) : std::vector<std::pair<T, T> >
+entropy(const torch::Tensor & feature, const torch::Tensor & weights) : double
+getScoresKBest() const : std::vector<double>
+getScoresKPairs() const : std::vector<std::pair<std::pair<int,int>,double>>
+maximumSpanningTree(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : std::vector<std::pair<int,int>>
+mutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & weights) : double
#pop_first<T>(std::vector<T> & v) : T
__
#className : std::string
#features : std::vector<std::string>
#samples : torch::Tensor
}
abstract "bayesnet::Classifier" as C_0016351972983202413152
abstract C_0016351972983202413152 #aliceblue;line:blue;line.dotted;text:blue {
+Classifier(Network model) : void
+~Classifier() = default : void
..
+addNodes() : void
#buildDataset(torch::Tensor & y) : void
{abstract} #buildModel(const torch::Tensor & weights) = 0 : void
#checkFitParameters() : void
+dump_cpt() const : std::string
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
+fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights, const Smoothing_t smoothing) : Classifier &
+getClassNumStates() const : int
+getNotes() const : std::vector<std::string>
+getNumberOfEdges() const : int
+getNumberOfNodes() const : int
+getNumberOfStates() const : int
+getStatus() const : status_t
+getVersion() : std::string
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
+predict(torch::Tensor & X) : torch::Tensor
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
+predict_proba(torch::Tensor & X) : torch::Tensor
+score(torch::Tensor & X, torch::Tensor & y) : float
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
+setHyperparameters(const nlohmann::json & hyperparameters) : void
+show() const : std::vector<std::string>
+topological_order() : std::vector<std::string>
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
__
#CLASSIFIER_NOT_FITTED : const std::string
#className : std::string
#dataset : torch::Tensor
#features : std::vector<std::string>
#fitted : bool
#m : unsigned int
#metrics : Metrics
#model : Network
#n : unsigned int
#states : std::map<std::string,std::vector<int>>
}
class "bayesnet::Proposal" as C_0017759964713298103839
class C_0017759964713298103839 #aliceblue;line:blue;line.dotted;text:blue {
+Proposal(torch::Tensor & pDataset, std::vector<std::string> & features_, std::string & className_) : void
+~Proposal() : void
..
#checkInput(const torch::Tensor & X, const torch::Tensor & y) : void
#fit_local_discretization(const torch::Tensor & y) : std::map<std::string,std::vector<int>>
#localDiscretizationProposal(const std::map<std::string,std::vector<int>> & states, Network & model) : std::map<std::string,std::vector<int>>
#prepareX(torch::Tensor & X) : torch::Tensor
__
#Xf : torch::Tensor
#discretizers : map<std::string,mdlp::CPPFImdlp *>
#y : torch::Tensor
}
class "bayesnet::KDB" as C_0008902920152122000044
class C_0008902920152122000044 #aliceblue;line:blue;line.dotted;text:blue {
+KDB(int k, float theta = 0.03) : void
+~KDB() = default : void
..
#add_m_edges(int idx, std::vector<int> & S, torch::Tensor & weights) : void
#buildModel(const torch::Tensor & weights) : void
+graph(const std::string & name = "KDB") const : std::vector<std::string>
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
__
}
class "bayesnet::KDBLd" as C_0002756018222998454702
class C_0002756018222998454702 #aliceblue;line:blue;line.dotted;text:blue {
+KDBLd(int k) : void
+~KDBLd() = default : void
..
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : KDBLd &
+graph(const std::string & name = "KDB") const : std::vector<std::string>
+predict(torch::Tensor & X) : torch::Tensor
{static} +version() : std::string
__
}
class "bayesnet::SPODE" as C_0004096182510460307610
class C_0004096182510460307610 #aliceblue;line:blue;line.dotted;text:blue {
+SPODE(int root) : void
+~SPODE() = default : void
..
#buildModel(const torch::Tensor & weights) : void
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
__
}
class "bayesnet::SPODELd" as C_0010957245114062042836
class C_0010957245114062042836 #aliceblue;line:blue;line.dotted;text:blue {
+SPODELd(int root) : void
+~SPODELd() = default : void
..
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
+graph(const std::string & name = "SPODELd") const : std::vector<std::string>
+predict(torch::Tensor & X) : torch::Tensor
{static} +version() : std::string
__
}
class "bayesnet::SPnDE" as C_0016268916386101512883
class C_0016268916386101512883 #aliceblue;line:blue;line.dotted;text:blue {
+SPnDE(std::vector<int> parents) : void
+~SPnDE() = default : void
..
#buildModel(const torch::Tensor & weights) : void
+graph(const std::string & name = "SPnDE") const : std::vector<std::string>
__
}
class "bayesnet::TAN" as C_0014087955399074584137
class C_0014087955399074584137 #aliceblue;line:blue;line.dotted;text:blue {
+TAN() : void
+~TAN() = default : void
..
#buildModel(const torch::Tensor & weights) : void
+graph(const std::string & name = "TAN") const : std::vector<std::string>
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
__
}
class "bayesnet::TANLd" as C_0013350632773616302678
class C_0013350632773616302678 #aliceblue;line:blue;line.dotted;text:blue {
+TANLd() : void
+~TANLd() = default : void
..
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : TANLd &
+graph(const std::string & name = "TANLd") const : std::vector<std::string>
+predict(torch::Tensor & X) : torch::Tensor
__
}
class "bayesnet::XSp2de" as C_0007640742442325463418
class C_0007640742442325463418 #aliceblue;line:blue;line.dotted;text:blue {
+XSp2de(int spIndex1, int spIndex2) : void
..
#buildModel(const torch::Tensor & weights) : void
+fitx(torch::Tensor & X, torch::Tensor & y, torch::Tensor & weights_, const Smoothing_t smoothing) : void
+getClassNumStates() const : int
+getNFeatures() const : int
+getNumberOfEdges() const : int
+getNumberOfNodes() const : int
+getNumberOfStates() const : int
+graph(const std::string & title) const : std::vector<std::string>
+predict(const std::vector<int> & instance) const : int
+predict(std::vector<std::vector<int>> & test_data) : std::vector<int>
+predict(torch::Tensor & X) : torch::Tensor
+predict_proba(const std::vector<int> & instance) const : std::vector<double>
+predict_proba(std::vector<std::vector<int>> & test_data) : std::vector<std::vector<double>>
+predict_proba(torch::Tensor & X) : torch::Tensor
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
+score(torch::Tensor & X, torch::Tensor & y) : float
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
+to_string() const : std::string
#trainModel(const torch::Tensor & weights, const bayesnet::Smoothing_t smoothing) : void
__
}
class "bayesnet::XSpode" as C_0015654113248178830206
class C_0015654113248178830206 #aliceblue;line:blue;line.dotted;text:blue {
+XSpode(int spIndex) : void
..
#buildModel(const torch::Tensor & weights) : void
+fitx(torch::Tensor & X, torch::Tensor & y, torch::Tensor & weights_, const Smoothing_t smoothing) : void
+getClassNumStates() const : int
+getNFeatures() const : int
+getNumberOfEdges() const : int
+getNumberOfNodes() const : int
+getNumberOfStates() const : int
+getStates() : std::vector<int> &
+graph(const std::string & title) const : std::vector<std::string>
+normalize(std::vector<double> & v) const : void
+predict(const std::vector<int> & instance) const : int
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
+predict(torch::Tensor & X) : torch::Tensor
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
+predict_proba(torch::Tensor & X) : torch::Tensor
+predict_proba(const std::vector<int> & instance) const : std::vector<double>
+score(torch::Tensor & X, torch::Tensor & y) : float
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
+to_string() const : std::string
#trainModel(const torch::Tensor & weights, const bayesnet::Smoothing_t smoothing) : void
__
}
class "bayesnet::TensorUtils" as C_0010304804115474100819
class C_0010304804115474100819 #aliceblue;line:blue;line.dotted;text:blue {
{static} +to_matrix(const torch::Tensor & X) : std::vector<std::vector<int>>
{static} +to_vector<T>(const torch::Tensor & y) : std::vector<T>
__
}
class "bayesnet::Ensemble" as C_0015881931090842884611
class C_0015881931090842884611 #aliceblue;line:blue;line.dotted;text:blue {
+Ensemble(bool predict_voting = true) : void
+~Ensemble() = default : void
..
#compute_arg_max(std::vector<std::vector<double>> & X) : std::vector<int>
#compute_arg_max(torch::Tensor & X) : torch::Tensor
+dump_cpt() const : std::string
+getNumberOfEdges() const : int
+getNumberOfNodes() const : int
+getNumberOfStates() const : int
+graph(const std::string & title) const : std::vector<std::string>
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
+predict(torch::Tensor & X) : torch::Tensor
#predict_average_proba(torch::Tensor & X) : torch::Tensor
#predict_average_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
#predict_average_voting(torch::Tensor & X) : torch::Tensor
#predict_average_voting(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
+predict_proba(torch::Tensor & X) : torch::Tensor
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
+score(torch::Tensor & X, torch::Tensor & y) : float
+show() const : std::vector<std::string>
+topological_order() : std::vector<std::string>
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
#voting(torch::Tensor & votes) : torch::Tensor
__
#models : std::vector<std::unique_ptr<Classifier>>
#n_models : unsigned int
#predict_voting : bool
#significanceModels : std::vector<double>
}
class "bayesnet::A2DE" as C_0001410789567057647859
class C_0001410789567057647859 #aliceblue;line:blue;line.dotted;text:blue {
+A2DE(bool predict_voting = false) : void
+~A2DE() : void
..
#buildModel(const torch::Tensor & weights) : void
+graph(const std::string & title = "A2DE") const : std::vector<std::string>
+setHyperparameters(const nlohmann::json & hyperparameters) : void
__
}
class "bayesnet::AODE" as C_0006288892608974306258
class C_0006288892608974306258 #aliceblue;line:blue;line.dotted;text:blue {
+AODE(bool predict_voting = false) : void
+~AODE() : void
..
#buildModel(const torch::Tensor & weights) : void
+graph(const std::string & title = "AODE") const : std::vector<std::string>
+setHyperparameters(const nlohmann::json & hyperparameters) : void
__
}
class "bayesnet::AODELd" as C_0003898187834670349177
class C_0003898187834670349177 #aliceblue;line:blue;line.dotted;text:blue {
+AODELd(bool predict_voting = true) : void
+~AODELd() = default : void
..
#buildModel(const torch::Tensor & weights) : void
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_, const Smoothing_t smoothing) : AODELd &
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
__
}
abstract "bayesnet::FeatureSelect" as C_0013562609546004646591
abstract C_0013562609546004646591 #aliceblue;line:blue;line.dotted;text:blue {
+FeatureSelect(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
+~FeatureSelect() : void
..
#computeMeritCFS() : double
#computeSuFeatures(const int a, const int b) : double
#computeSuLabels() : void
{abstract} +fit() = 0 : void
+getFeatures() const : std::vector<int>
+getScores() const : std::vector<double>
#initialize() : void
#symmetricalUncertainty(int a, int b) : double
__
#fitted : bool
#maxFeatures : int
#selectedFeatures : std::vector<int>
#selectedScores : std::vector<double>
#suFeatures : std::map<std::pair<int,int>,double>
#suLabels : std::vector<double>
#weights : const torch::Tensor &
}
class "bayesnet::(anonymous_60357672)" as C_0006397015156479549697
class C_0006397015156479549697 #aliceblue;line:blue;line.dotted;text:blue {
__
+CFS : std::string
+FCBF : std::string
+IWSS : std::string
}
class "bayesnet::(anonymous_60358326)" as C_0013066254331852347304
class C_0013066254331852347304 #aliceblue;line:blue;line.dotted;text:blue {
__
+ASC : std::string
+DESC : std::string
+RAND : std::string
}
class "bayesnet::Boost" as C_0009819322948617116148
class C_0009819322948617116148 #aliceblue;line:blue;line.dotted;text:blue {
+Boost(bool predict_voting = false) : void
+~Boost() = default : void
..
#add_model(std::unique_ptr<Classifier> model, double significance) : void
#buildModel(const torch::Tensor & weights) : void
#featureSelection(torch::Tensor & weights_) : std::vector<int>
#remove_last_model() : void
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
#update_weights(torch::Tensor & ytrain, torch::Tensor & ypred, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
#update_weights_block(int k, torch::Tensor & ytrain, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
__
#X_test : torch::Tensor
#X_train : torch::Tensor
#alpha_block : bool
#bisection : bool
#block_update : bool
#convergence : bool
#convergence_best : bool
#featureSelector : FeatureSelect *
#maxTolerance : int
#order_algorithm : std::string
#selectFeatures : bool
#select_features_algorithm : std::string
#threshold : double
#y_test : torch::Tensor
#y_train : torch::Tensor
}
class "bayesnet::BoostA2DE" as C_0000272055465257861326
class C_0000272055465257861326 #aliceblue;line:blue;line.dotted;text:blue {
+BoostA2DE(bool predict_voting = false) : void
+~BoostA2DE() = default : void
..
+graph(const std::string & title = "BoostA2DE") const : std::vector<std::string>
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
__
}
class "bayesnet::(anonymous_60425028)" as C_0000461144706913711531
class C_0000461144706913711531 #aliceblue;line:blue;line.dotted;text:blue {
__
+CFS : std::string
+FCBF : std::string
+IWSS : std::string
}
class "bayesnet::(anonymous_60425682)" as C_0014849589915262463453
class C_0014849589915262463453 #aliceblue;line:blue;line.dotted;text:blue {
__
+ASC : std::string
+DESC : std::string
+RAND : std::string
}
class "bayesnet::BoostAODE" as C_0002867772739198819061
class C_0002867772739198819061 #aliceblue;line:blue;line.dotted;text:blue {
+BoostAODE(bool predict_voting = false) : void
+~BoostAODE() = default : void
..
+graph(const std::string & title = "BoostAODE") const : std::vector<std::string>
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
__
}
class "bayesnet::XBA2DE" as C_0008480973840710001141
class C_0008480973840710001141 #aliceblue;line:blue;line.dotted;text:blue {
+XBA2DE(bool predict_voting = false) : void
+~XBA2DE() = default : void
..
+getVersion() : std::string
+graph(const std::string & title = "XBA2DE") const : std::vector<std::string>
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
__
}
class "bayesnet::(anonymous_60414016)" as C_0008746994658440620779
class C_0008746994658440620779 #aliceblue;line:blue;line.dotted;text:blue {
__
+CFS : std::string
+FCBF : std::string
+IWSS : std::string
}
class "bayesnet::(anonymous_60414670)" as C_0008030559132212449356
class C_0008030559132212449356 #aliceblue;line:blue;line.dotted;text:blue {
__
+ASC : std::string
+DESC : std::string
+RAND : std::string
}
class "bayesnet::XBAODE" as C_0005198482342493966768
class C_0005198482342493966768 #aliceblue;line:blue;line.dotted;text:blue {
+XBAODE() : void
..
+getVersion() : std::string
#trainModel(const torch::Tensor & weights, const bayesnet::Smoothing_t smoothing) : void
__
}
class "bayesnet::CFS" as C_0000093018845530739957
class C_0000093018845530739957 #aliceblue;line:blue;line.dotted;text:blue {
+CFS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
+~CFS() : void
..
+fit() : void
__
}
class "bayesnet::FCBF" as C_0001157456122733975432
class C_0001157456122733975432 #aliceblue;line:blue;line.dotted;text:blue {
+FCBF(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
+~FCBF() : void
..
+fit() : void
__
}
class "bayesnet::IWSS" as C_0000066148117395428429
class C_0000066148117395428429 #aliceblue;line:blue;line.dotted;text:blue {
+IWSS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
+~IWSS() : void
..
+fit() : void
__
}
class "bayesnet::(anonymous_60810808)" as C_0012002108046995621535
class C_0012002108046995621535 #aliceblue;line:blue;line.dotted;text:blue {
__
+CFS : std::string
+FCBF : std::string
+IWSS : std::string
}
class "bayesnet::(anonymous_60811462)" as C_0004735044229422764240
class C_0004735044229422764240 #aliceblue;line:blue;line.dotted;text:blue {
__
+ASC : std::string
+DESC : std::string
+RAND : std::string
}
class "bayesnet::(anonymous_60804220)" as C_0007082100550474633839
class C_0007082100550474633839 #aliceblue;line:blue;line.dotted;text:blue {
__
+CFS : std::string
+FCBF : std::string
+IWSS : std::string
}
class "bayesnet::(anonymous_60804874)" as C_0003669430095936529648
class C_0003669430095936529648 #aliceblue;line:blue;line.dotted;text:blue {
__
+ASC : std::string
+DESC : std::string
+RAND : std::string
}
class "bayesnet::(anonymous_60809706)" as C_0012336951062058157227
class C_0012336951062058157227 #aliceblue;line:blue;line.dotted;text:blue {
__
+CFS : std::string
+FCBF : std::string
+IWSS : std::string
}
class "bayesnet::(anonymous_60810360)" as C_0002435892998884329673
class C_0002435892998884329673 #aliceblue;line:blue;line.dotted;text:blue {
__
+ASC : std::string
+DESC : std::string
+RAND : std::string
}
class "bayesnet::MST" as C_0001054867409378333602
class C_0001054867409378333602 #aliceblue;line:blue;line.dotted;text:blue {
+MST() = default : void
+MST(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : void
..
+insertElement(std::list<int> & variables, int variable) : void
+maximumSpanningTree() : std::vector<std::pair<int,int>>
+reorder(std::vector<std::pair<float,std::pair<int,int>>> T, int root_original) : std::vector<std::pair<int,int>>
__
}
class "bayesnet::Graph" as C_0009576333456015187741
class C_0009576333456015187741 #aliceblue;line:blue;line.dotted;text:blue {
+Graph(int V) : void
..
+addEdge(int u, int v, float wt) : void
+find_set(int i) : int
+get_mst() : std::vector<std::pair<float,std::pair<int,int>>>
+kruskal_algorithm() : void
+union_set(int u, int v) : void
__
}
C_0010428199432536647474 --> C_0010428199432536647474 : -parents
C_0010428199432536647474 --> C_0010428199432536647474 : -children
C_0009493661199123436603 ..> C_0013393078277439680282
C_0009493661199123436603 o-- C_0010428199432536647474 : -nodes
C_0002617087915615796317 ..> C_0013393078277439680282
C_0002617087915615796317 o-- C_0005907365846270811004 : #status
C_0016351972983202413152 ..> C_0013393078277439680282
C_0016351972983202413152 ..> C_0005907365846270811004
C_0016351972983202413152 o-- C_0009493661199123436603 : #model
C_0016351972983202413152 o-- C_0005895723015084986588 : #metrics
C_0002617087915615796317 <|-- C_0016351972983202413152
C_0017759964713298103839 ..> C_0009493661199123436603
C_0016351972983202413152 <|-- C_0008902920152122000044
C_0002756018222998454702 ..> C_0013393078277439680282
C_0008902920152122000044 <|-- C_0002756018222998454702
C_0017759964713298103839 <|-- C_0002756018222998454702
C_0016351972983202413152 <|-- C_0004096182510460307610
C_0010957245114062042836 ..> C_0013393078277439680282
C_0004096182510460307610 <|-- C_0010957245114062042836
C_0017759964713298103839 <|-- C_0010957245114062042836
C_0016351972983202413152 <|-- C_0016268916386101512883
C_0016351972983202413152 <|-- C_0014087955399074584137
C_0013350632773616302678 ..> C_0013393078277439680282
C_0014087955399074584137 <|-- C_0013350632773616302678
C_0017759964713298103839 <|-- C_0013350632773616302678
C_0007640742442325463418 ..> C_0013393078277439680282
C_0016351972983202413152 <|-- C_0007640742442325463418
C_0015654113248178830206 ..> C_0013393078277439680282
C_0016351972983202413152 <|-- C_0015654113248178830206
C_0015881931090842884611 ..> C_0013393078277439680282
C_0015881931090842884611 o-- C_0016351972983202413152 : #models
C_0016351972983202413152 <|-- C_0015881931090842884611
C_0015881931090842884611 <|-- C_0001410789567057647859
C_0015881931090842884611 <|-- C_0006288892608974306258
C_0003898187834670349177 ..> C_0013393078277439680282
C_0015881931090842884611 <|-- C_0003898187834670349177
C_0017759964713298103839 <|-- C_0003898187834670349177
C_0005895723015084986588 <|-- C_0013562609546004646591
C_0009819322948617116148 ..> C_0016351972983202413152
C_0009819322948617116148 --> C_0013562609546004646591 : #featureSelector
C_0015881931090842884611 <|-- C_0009819322948617116148
C_0000272055465257861326 ..> C_0013393078277439680282
C_0009819322948617116148 <|-- C_0000272055465257861326
C_0002867772739198819061 ..> C_0013393078277439680282
C_0009819322948617116148 <|-- C_0002867772739198819061
C_0008480973840710001141 ..> C_0013393078277439680282
C_0009819322948617116148 <|-- C_0008480973840710001141
C_0005198482342493966768 ..> C_0013393078277439680282
C_0009819322948617116148 <|-- C_0005198482342493966768
C_0013562609546004646591 <|-- C_0000093018845530739957
C_0013562609546004646591 <|-- C_0001157456122733975432
C_0013562609546004646591 <|-- C_0000066148117395428429
'Generated with clang-uml, version 0.5.5
'LLVM version clang version 18.1.8 (Fedora 18.1.8-5.fc41)
@enduml

1
diagrams/BayesNet.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 229 KiB

314
diagrams/dependency.svg Normal file
View File

@@ -0,0 +1,314 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 12.1.0 (20240811.2233)
-->
<!-- Title: BayesNet Pages: 1 -->
<svg width="3725pt" height="432pt"
viewBox="0.00 0.00 3724.84 431.80" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 427.8)">
<title>BayesNet</title>
<polygon fill="white" stroke="none" points="-4,4 -4,-427.8 3720.84,-427.8 3720.84,4 -4,4"/>
<!-- node0 -->
<g id="node1" class="node">
<title>node0</title>
<polygon fill="none" stroke="black" points="1655.43,-398.35 1655.43,-413.26 1625.69,-423.8 1583.63,-423.8 1553.89,-413.26 1553.89,-398.35 1583.63,-387.8 1625.69,-387.8 1655.43,-398.35"/>
<text text-anchor="middle" x="1604.66" y="-401.53" font-family="Times,serif" font-size="12.00">BayesNet</text>
</g>
<!-- node1 -->
<g id="node2" class="node">
<title>node1</title>
<polygon fill="none" stroke="black" points="413.32,-257.8 372.39,-273.03 206.66,-279.8 40.93,-273.03 0,-257.8 114.69,-245.59 298.64,-245.59 413.32,-257.8"/>
<text text-anchor="middle" x="206.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10.so</text>
</g>
<!-- node0&#45;&gt;node1 -->
<g id="edge1" class="edge">
<title>node0&#45;&gt;node1</title>
<path fill="none" stroke="black" d="M1553.59,-400.53C1451.65,-391.91 1215.69,-371.61 1017.66,-351.8 773.36,-327.37 488.07,-295.22 329.31,-277.01"/>
<polygon fill="black" stroke="black" points="329.93,-273.56 319.6,-275.89 329.14,-280.51 329.93,-273.56"/>
</g>
<!-- node2 -->
<g id="node3" class="node">
<title>node2</title>
<polygon fill="none" stroke="black" points="894.21,-257.8 848.35,-273.03 662.66,-279.8 476.98,-273.03 431.12,-257.8 559.61,-245.59 765.71,-245.59 894.21,-257.8"/>
<text text-anchor="middle" x="662.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10_cuda.so</text>
</g>
<!-- node0&#45;&gt;node2 -->
<g id="edge2" class="edge">
<title>node0&#45;&gt;node2</title>
<path fill="none" stroke="black" d="M1555.34,-397.37C1408.12,-375.18 969.52,-309.06 767.13,-278.55"/>
<polygon fill="black" stroke="black" points="767.81,-275.12 757.4,-277.09 766.77,-282.04 767.81,-275.12"/>
</g>
<!-- node3 -->
<g id="node4" class="node">
<title>node3</title>
<polygon fill="none" stroke="black" points="1338.68,-257.8 1296.49,-273.03 1125.66,-279.8 954.84,-273.03 912.65,-257.8 1030.86,-245.59 1220.46,-245.59 1338.68,-257.8"/>
<text text-anchor="middle" x="1125.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libkineto.a</text>
</g>
<!-- node0&#45;&gt;node3 -->
<g id="edge3" class="edge">
<title>node0&#45;&gt;node3</title>
<path fill="none" stroke="black" d="M1566.68,-393.54C1484.46,-369.17 1289.3,-311.32 1188.44,-281.41"/>
<polygon fill="black" stroke="black" points="1189.53,-278.09 1178.95,-278.6 1187.54,-284.8 1189.53,-278.09"/>
</g>
<!-- node4 -->
<g id="node5" class="node">
<title>node4</title>
<polygon fill="none" stroke="black" points="1552.26,-257.8 1532.93,-273.03 1454.66,-279.8 1376.4,-273.03 1357.07,-257.8 1411.23,-245.59 1498.1,-245.59 1552.26,-257.8"/>
<text text-anchor="middle" x="1454.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/lib64/libcuda.so</text>
</g>
<!-- node0&#45;&gt;node4 -->
<g id="edge4" class="edge">
<title>node0&#45;&gt;node4</title>
<path fill="none" stroke="black" d="M1586.27,-387.39C1559.5,-362.05 1509.72,-314.92 1479.65,-286.46"/>
<polygon fill="black" stroke="black" points="1482.13,-283.99 1472.46,-279.65 1477.31,-289.07 1482.13,-283.99"/>
</g>
<!-- node5 -->
<g id="node6" class="node">
<title>node5</title>
<polygon fill="none" stroke="black" points="1873.26,-257.8 1843.23,-273.03 1721.66,-279.8 1600.09,-273.03 1570.06,-257.8 1654.19,-245.59 1789.13,-245.59 1873.26,-257.8"/>
<text text-anchor="middle" x="1721.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libcudart.so</text>
</g>
<!-- node0&#45;&gt;node5 -->
<g id="edge5" class="edge">
<title>node0&#45;&gt;node5</title>
<path fill="none" stroke="black" d="M1619.76,-387.77C1628.83,-377.46 1640.53,-363.98 1650.66,-351.8 1668.32,-330.59 1687.84,-306.03 1701.94,-288.1"/>
<polygon fill="black" stroke="black" points="1704.43,-290.59 1707.84,-280.56 1698.92,-286.27 1704.43,-290.59"/>
</g>
<!-- node6 -->
<g id="node7" class="node">
<title>node6</title>
<polygon fill="none" stroke="black" points="2231.79,-257.8 2198.1,-273.03 2061.66,-279.8 1925.23,-273.03 1891.53,-257.8 1985.95,-245.59 2137.38,-245.59 2231.79,-257.8"/>
<text text-anchor="middle" x="2061.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libnvToolsExt.so</text>
</g>
<!-- node0&#45;&gt;node6 -->
<g id="edge6" class="edge">
<title>node0&#45;&gt;node6</title>
<path fill="none" stroke="black" d="M1642.06,-393.18C1721.31,-368.56 1906.71,-310.95 2002.32,-281.24"/>
<polygon fill="black" stroke="black" points="2003.28,-284.61 2011.79,-278.3 2001.21,-277.92 2003.28,-284.61"/>
</g>
<!-- node7 -->
<g id="node8" class="node">
<title>node7</title>
<polygon fill="none" stroke="black" points="2541.44,-257.8 2512.56,-273.03 2395.66,-279.8 2278.76,-273.03 2249.89,-257.8 2330.79,-245.59 2460.54,-245.59 2541.44,-257.8"/>
<text text-anchor="middle" x="2395.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libnvrtc.so</text>
</g>
<!-- node0&#45;&gt;node7 -->
<g id="edge7" class="edge">
<title>node0&#45;&gt;node7</title>
<path fill="none" stroke="black" d="M1651.19,-396.45C1780.36,-373.26 2144.76,-307.85 2311.05,-277.99"/>
<polygon fill="black" stroke="black" points="2311.47,-281.47 2320.7,-276.26 2310.24,-274.58 2311.47,-281.47"/>
</g>
<!-- node8 -->
<g id="node9" class="node">
<title>node8</title>
<polygon fill="none" stroke="black" points="1642.01,-326.35 1642.01,-341.26 1620.13,-351.8 1589.19,-351.8 1567.31,-341.26 1567.31,-326.35 1589.19,-315.8 1620.13,-315.8 1642.01,-326.35"/>
<text text-anchor="middle" x="1604.66" y="-329.53" font-family="Times,serif" font-size="12.00">fimdlp</text>
</g>
<!-- node0&#45;&gt;node8 -->
<g id="edge8" class="edge">
<title>node0&#45;&gt;node8</title>
<path fill="none" stroke="black" d="M1604.66,-387.5C1604.66,-380.21 1604.66,-371.53 1604.66,-363.34"/>
<polygon fill="black" stroke="black" points="1608.16,-363.42 1604.66,-353.42 1601.16,-363.42 1608.16,-363.42"/>
</g>
<!-- node19 -->
<g id="node10" class="node">
<title>node19</title>
<polygon fill="none" stroke="black" points="2709.74,-267.37 2634.66,-279.8 2559.58,-267.37 2588.26,-247.24 2681.06,-247.24 2709.74,-267.37"/>
<text text-anchor="middle" x="2634.66" y="-257.53" font-family="Times,serif" font-size="12.00">torch_library</text>
</g>
<!-- node0&#45;&gt;node19 -->
<g id="edge29" class="edge">
<title>node0&#45;&gt;node19</title>
<path fill="none" stroke="black" d="M1655.87,-399.32C1798.23,-383.79 2210.64,-336.94 2550.66,-279.8 2559.43,-278.33 2568.68,-276.62 2577.72,-274.86"/>
<polygon fill="black" stroke="black" points="2578.38,-278.3 2587.5,-272.92 2577.01,-271.43 2578.38,-278.3"/>
</g>
<!-- node8&#45;&gt;node1 -->
<g id="edge9" class="edge">
<title>node8&#45;&gt;node1</title>
<path fill="none" stroke="black" d="M1566.84,-331.58C1419.81,-326.72 872.06,-307.69 421.66,-279.8 401.07,-278.53 379.38,-277.02 358.03,-275.43"/>
<polygon fill="black" stroke="black" points="358.3,-271.94 348.06,-274.67 357.77,-278.92 358.3,-271.94"/>
</g>
<!-- node8&#45;&gt;node2 -->
<g id="edge10" class="edge">
<title>node8&#45;&gt;node2</title>
<path fill="none" stroke="black" d="M1566.86,-330C1445.11,-320.95 1057.97,-292.18 831.67,-275.36"/>
<polygon fill="black" stroke="black" points="832.09,-271.89 821.86,-274.63 831.57,-278.87 832.09,-271.89"/>
</g>
<!-- node8&#45;&gt;node3 -->
<g id="edge11" class="edge">
<title>node8&#45;&gt;node3</title>
<path fill="none" stroke="black" d="M1567.08,-327.31C1495.4,-316.84 1336.86,-293.67 1230.62,-278.14"/>
<polygon fill="black" stroke="black" points="1231.44,-274.72 1221.04,-276.74 1230.42,-281.65 1231.44,-274.72"/>
</g>
<!-- node8&#45;&gt;node4 -->
<g id="edge12" class="edge">
<title>node8&#45;&gt;node4</title>
<path fill="none" stroke="black" d="M1578.53,-320.61C1555.96,-310.08 1522.92,-294.66 1496.64,-282.4"/>
<polygon fill="black" stroke="black" points="1498.12,-279.22 1487.58,-278.17 1495.16,-285.57 1498.12,-279.22"/>
</g>
<!-- node8&#45;&gt;node5 -->
<g id="edge13" class="edge">
<title>node8&#45;&gt;node5</title>
<path fill="none" stroke="black" d="M1627.78,-318.97C1644.15,-309.18 1666.44,-295.84 1685.2,-284.62"/>
<polygon fill="black" stroke="black" points="1686.83,-287.73 1693.61,-279.59 1683.23,-281.72 1686.83,-287.73"/>
</g>
<!-- node8&#45;&gt;node6 -->
<g id="edge14" class="edge">
<title>node8&#45;&gt;node6</title>
<path fill="none" stroke="black" d="M1642.45,-327.02C1712.36,-316.31 1863.89,-293.1 1964.32,-277.71"/>
<polygon fill="black" stroke="black" points="1964.84,-281.18 1974.2,-276.2 1963.78,-274.26 1964.84,-281.18"/>
</g>
<!-- node8&#45;&gt;node7 -->
<g id="edge15" class="edge">
<title>node8&#45;&gt;node7</title>
<path fill="none" stroke="black" d="M1642.33,-330.01C1740.75,-322.64 2013.75,-301.7 2240.66,-279.8 2254.16,-278.5 2268.32,-277.06 2282.35,-275.58"/>
<polygon fill="black" stroke="black" points="2282.49,-279.08 2292.06,-274.54 2281.75,-272.12 2282.49,-279.08"/>
</g>
<!-- node8&#45;&gt;node19 -->
<g id="edge16" class="edge">
<title>node8&#45;&gt;node19</title>
<path fill="none" stroke="black" d="M1642.25,-332.63C1770.06,-331.64 2199.48,-324.94 2550.66,-279.8 2560.1,-278.59 2570.07,-276.92 2579.71,-275.1"/>
<polygon fill="black" stroke="black" points="2580.21,-278.57 2589.34,-273.21 2578.86,-271.7 2580.21,-278.57"/>
</g>
<!-- node20 -->
<g id="node11" class="node">
<title>node20</title>
<polygon fill="none" stroke="black" points="2606.81,-185.8 2533.89,-201.03 2238.66,-207.8 1943.43,-201.03 1870.52,-185.8 2074.82,-173.59 2402.5,-173.59 2606.81,-185.8"/>
<text text-anchor="middle" x="2238.66" y="-185.53" font-family="Times,serif" font-size="12.00">&#45;Wl,&#45;&#45;no&#45;as&#45;needed,&quot;/home/rmontanana/Code/libtorch/lib/libtorch.so&quot; &#45;Wl,&#45;&#45;as&#45;needed</text>
</g>
<!-- node19&#45;&gt;node20 -->
<g id="edge17" class="edge">
<title>node19&#45;&gt;node20</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2583.63,-250.21C2572.76,-248.03 2561.34,-245.79 2550.66,-243.8 2482.14,-231.05 2404.92,-217.93 2344.44,-207.93"/>
<polygon fill="black" stroke="black" points="2345.28,-204.52 2334.84,-206.34 2344.14,-211.42 2345.28,-204.52"/>
</g>
<!-- node9 -->
<g id="node12" class="node">
<title>node9</title>
<polygon fill="none" stroke="black" points="2542.56,-123.37 2445.66,-135.8 2348.77,-123.37 2385.78,-103.24 2505.55,-103.24 2542.56,-123.37"/>
<text text-anchor="middle" x="2445.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch_cpu_library</text>
</g>
<!-- node19&#45;&gt;node9 -->
<g id="edge18" class="edge">
<title>node19&#45;&gt;node9</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2635.72,-246.84C2636.4,-227.49 2634.61,-192.58 2615.66,-171.8 2601.13,-155.87 2551.93,-141.56 2510.18,-131.84"/>
<polygon fill="black" stroke="black" points="2511.2,-128.48 2500.67,-129.68 2509.65,-135.31 2511.2,-128.48"/>
</g>
<!-- node13 -->
<g id="node16" class="node">
<title>node13</title>
<polygon fill="none" stroke="black" points="3056.45,-195.37 2953.66,-207.8 2850.87,-195.37 2890.13,-175.24 3017.19,-175.24 3056.45,-195.37"/>
<text text-anchor="middle" x="2953.66" y="-185.53" font-family="Times,serif" font-size="12.00">torch_cuda_library</text>
</g>
<!-- node19&#45;&gt;node13 -->
<g id="edge22" class="edge">
<title>node19&#45;&gt;node13</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2685.21,-249.71C2741.11,-237.45 2831.21,-217.67 2891.42,-204.46"/>
<polygon fill="black" stroke="black" points="2891.8,-207.96 2900.82,-202.4 2890.3,-201.13 2891.8,-207.96"/>
</g>
<!-- node10 -->
<g id="node13" class="node">
<title>node10</title>
<polygon fill="none" stroke="black" points="2362.4,-27.9 2285.6,-43.12 1974.66,-49.9 1663.72,-43.12 1586.93,-27.9 1802.1,-15.68 2147.22,-15.68 2362.4,-27.9"/>
<text text-anchor="middle" x="1974.66" y="-27.63" font-family="Times,serif" font-size="12.00">&#45;Wl,&#45;&#45;no&#45;as&#45;needed,&quot;/home/rmontanana/Code/libtorch/lib/libtorch_cpu.so&quot; &#45;Wl,&#45;&#45;as&#45;needed</text>
</g>
<!-- node9&#45;&gt;node10 -->
<g id="edge19" class="edge">
<title>node9&#45;&gt;node10</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2381.16,-105.31C2301.63,-91.15 2165.65,-66.92 2073.05,-50.43"/>
<polygon fill="black" stroke="black" points="2073.93,-47.03 2063.48,-48.72 2072.71,-53.92 2073.93,-47.03"/>
</g>
<!-- node11 -->
<g id="node14" class="node">
<title>node11</title>
<polygon fill="none" stroke="black" points="2510.72,-37.46 2445.66,-49.9 2380.61,-37.46 2405.46,-17.34 2485.87,-17.34 2510.72,-37.46"/>
<text text-anchor="middle" x="2445.66" y="-27.63" font-family="Times,serif" font-size="12.00">caffe2::mkl</text>
</g>
<!-- node9&#45;&gt;node11 -->
<g id="edge20" class="edge">
<title>node9&#45;&gt;node11</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2445.66,-102.95C2445.66,-91.68 2445.66,-75.4 2445.66,-61.37"/>
<polygon fill="black" stroke="black" points="2449.16,-61.78 2445.66,-51.78 2442.16,-61.78 2449.16,-61.78"/>
</g>
<!-- node12 -->
<g id="node15" class="node">
<title>node12</title>
<polygon fill="none" stroke="black" points="2794.95,-41.76 2661.66,-63.8 2528.37,-41.76 2579.28,-6.09 2744.04,-6.09 2794.95,-41.76"/>
<text text-anchor="middle" x="2661.66" y="-34.75" font-family="Times,serif" font-size="12.00">dummy</text>
<text text-anchor="middle" x="2661.66" y="-20.5" font-family="Times,serif" font-size="12.00">(protobuf::libprotobuf)</text>
</g>
<!-- node9&#45;&gt;node12 -->
<g id="edge21" class="edge">
<title>node9&#45;&gt;node12</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2481.82,-102.76C2512.55,-90.82 2557.5,-73.36 2594.77,-58.89"/>
<polygon fill="black" stroke="black" points="2595.6,-62.32 2603.65,-55.44 2593.06,-55.79 2595.6,-62.32"/>
</g>
<!-- node13&#45;&gt;node9 -->
<g id="edge28" class="edge">
<title>node13&#45;&gt;node9</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2880.59,-179.79C2799.97,-169.71 2666.42,-152.57 2551.66,-135.8 2540.2,-134.13 2528.06,-132.27 2516.24,-130.41"/>
<polygon fill="black" stroke="black" points="2516.96,-126.98 2506.54,-128.86 2515.87,-133.89 2516.96,-126.98"/>
</g>
<!-- node14 -->
<g id="node17" class="node">
<title>node14</title>
<polygon fill="none" stroke="black" points="3346.69,-113.8 3268.85,-129.03 2953.66,-135.8 2638.48,-129.03 2560.63,-113.8 2778.75,-101.59 3128.58,-101.59 3346.69,-113.8"/>
<text text-anchor="middle" x="2953.66" y="-113.53" font-family="Times,serif" font-size="12.00">&#45;Wl,&#45;&#45;no&#45;as&#45;needed,&quot;/home/rmontanana/Code/libtorch/lib/libtorch_cuda.so&quot; &#45;Wl,&#45;&#45;as&#45;needed</text>
</g>
<!-- node13&#45;&gt;node14 -->
<g id="edge23" class="edge">
<title>node13&#45;&gt;node14</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2953.66,-174.97C2953.66,-167.13 2953.66,-157.01 2953.66,-147.53"/>
<polygon fill="black" stroke="black" points="2957.16,-147.59 2953.66,-137.59 2950.16,-147.59 2957.16,-147.59"/>
</g>
<!-- node15 -->
<g id="node18" class="node">
<title>node15</title>
<polygon fill="none" stroke="black" points="3514.74,-123.37 3439.66,-135.8 3364.58,-123.37 3393.26,-103.24 3486.06,-103.24 3514.74,-123.37"/>
<text text-anchor="middle" x="3439.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch::cudart</text>
</g>
<!-- node13&#45;&gt;node15 -->
<g id="edge24" class="edge">
<title>node13&#45;&gt;node15</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3028.35,-180.51C3109.24,-171.17 3241.96,-154.78 3355.66,-135.8 3364.43,-134.34 3373.69,-132.63 3382.72,-130.88"/>
<polygon fill="black" stroke="black" points="3383.38,-134.31 3392.51,-128.93 3382.02,-127.45 3383.38,-134.31"/>
</g>
<!-- node17 -->
<g id="node20" class="node">
<title>node17</title>
<polygon fill="none" stroke="black" points="3716.84,-123.37 3624.66,-135.8 3532.48,-123.37 3567.69,-103.24 3681.63,-103.24 3716.84,-123.37"/>
<text text-anchor="middle" x="3624.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch::nvtoolsext</text>
</g>
<!-- node13&#45;&gt;node17 -->
<g id="edge26" class="edge">
<title>node13&#45;&gt;node17</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3033.64,-183.25C3144.1,-175.14 3349.47,-158.53 3523.66,-135.8 3534.84,-134.35 3546.67,-132.57 3558.15,-130.72"/>
<polygon fill="black" stroke="black" points="3558.68,-134.18 3567.98,-129.1 3557.54,-127.27 3558.68,-134.18"/>
</g>
<!-- node16 -->
<g id="node19" class="node">
<title>node16</title>
<polygon fill="none" stroke="black" points="3510.78,-27.9 3496.7,-43.12 3439.66,-49.9 3382.63,-43.12 3368.54,-27.9 3408.01,-15.68 3471.31,-15.68 3510.78,-27.9"/>
<text text-anchor="middle" x="3439.66" y="-27.63" font-family="Times,serif" font-size="12.00">CUDA::cudart</text>
</g>
<!-- node15&#45;&gt;node16 -->
<g id="edge25" class="edge">
<title>node15&#45;&gt;node16</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3439.66,-102.95C3439.66,-91.68 3439.66,-75.4 3439.66,-61.37"/>
<polygon fill="black" stroke="black" points="3443.16,-61.78 3439.66,-51.78 3436.16,-61.78 3443.16,-61.78"/>
</g>
<!-- node18 -->
<g id="node21" class="node">
<title>node18</title>
<polygon fill="none" stroke="black" points="3714.32,-27.9 3696.56,-43.12 3624.66,-49.9 3552.77,-43.12 3535.01,-27.9 3584.76,-15.68 3664.56,-15.68 3714.32,-27.9"/>
<text text-anchor="middle" x="3624.66" y="-27.63" font-family="Times,serif" font-size="12.00">CUDA::nvToolsExt</text>
</g>
<!-- node17&#45;&gt;node18 -->
<g id="edge27" class="edge">
<title>node17&#45;&gt;node18</title>
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3624.66,-102.95C3624.66,-91.68 3624.66,-75.4 3624.66,-61.37"/>
<polygon fill="black" stroke="black" points="3628.16,-61.78 3624.66,-51.78 3621.16,-61.78 3628.16,-61.78"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 18 KiB

View File

@@ -5,6 +5,7 @@
The hyperparameters defined in the algorithm are: The hyperparameters defined in the algorithm are:
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *true*. - ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *true*.
- ***bisection_best*** (*boolean*): If set to *true*, the algorithm will take as *priorAccuracy* the best accuracy computed. If set to *false⁺ it will take the last accuracy as *priorAccuracy*. Default value: *false*.
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*. - ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
@@ -26,4 +27,4 @@ The hyperparameters defined in the algorithm are:
## Operation ## Operation
### [Algorithm](./algorithm.md) ### [Base Algorithm](./algorithm.md)

2912
docs/Doxyfile.in Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -105,8 +105,7 @@
2. $numItemsPack \leftarrow 0$ 2. $numItemsPack \leftarrow 0$
10. If 10. If $(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
$(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
11. $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$ 11. $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$

BIN
docs/logo_small.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -1,5 +0,0 @@
filter = bayesnet/
exclude-directories = build_debug/lib/
exclude = bayesnet/utils/loguru.*
print-summary = yes
sort = uncovered-percent

View File

@@ -1,168 +0,0 @@
#include "ArffFiles.h"
#include <fstream>
#include <sstream>
#include <map>
#include <iostream>
ArffFiles::ArffFiles() = default;
std::vector<std::string> ArffFiles::getLines() const
{
return lines;
}
unsigned long int ArffFiles::getSize() const
{
return lines.size();
}
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
{
return attributes;
}
std::string ArffFiles::getClassName() const
{
return className;
}
std::string ArffFiles::getClassType() const
{
return classType;
}
std::vector<std::vector<float>>& ArffFiles::getX()
{
return X;
}
std::vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::loadCommon(std::string fileName)
{
std::ifstream file(fileName);
if (!file.is_open()) {
throw std::invalid_argument("Unable to open file");
}
std::string line;
std::string keyword;
std::string attribute;
std::string type;
std::string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
std::stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
type += type_w + " ";
attributes.emplace_back(trim(attribute), trim(type));
continue;
}
if (line[0] == '@') {
continue;
}
lines.push_back(line);
}
file.close();
if (attributes.empty())
throw std::invalid_argument("No attributes found");
}
void ArffFiles::load(const std::string& fileName, bool classLast)
{
int labelIndex;
loadCommon(fileName);
if (classLast) {
className = std::get<0>(attributes.back());
classType = std::get<1>(attributes.back());
attributes.pop_back();
labelIndex = static_cast<int>(attributes.size());
} else {
className = std::get<0>(attributes.front());
classType = std::get<1>(attributes.front());
attributes.erase(attributes.begin());
labelIndex = 0;
}
generateDataset(labelIndex);
}
void ArffFiles::load(const std::string& fileName, const std::string& name)
{
int labelIndex;
loadCommon(fileName);
bool found = false;
for (int i = 0; i < attributes.size(); ++i) {
if (attributes[i].first == name) {
className = std::get<0>(attributes[i]);
classType = std::get<1>(attributes[i]);
attributes.erase(attributes.begin() + i);
labelIndex = i;
found = true;
break;
}
}
if (!found) {
throw std::invalid_argument("Class name not found");
}
generateDataset(labelIndex);
}
void ArffFiles::generateDataset(int labelIndex)
{
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
auto yy = std::vector<std::string>(lines.size(), "");
auto removeLines = std::vector<int>(); // Lines with missing values
for (size_t i = 0; i < lines.size(); i++) {
std::stringstream ss(lines[i]);
std::string value;
int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) {
if (pos++ == labelIndex) {
yy[i] = value;
} else {
if (value == "?") {
X[xIndex++][i] = -1;
removeLines.push_back(i);
} else
X[xIndex++][i] = stof(value);
}
}
}
for (auto i : removeLines) {
yy.erase(yy.begin() + i);
for (auto& x : X) {
x.erase(x.begin() + i);
}
}
y = factorize(yy);
}
std::string ArffFiles::trim(const std::string& source)
{
std::string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s;
}
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
{
std::vector<int> yy;
yy.reserve(labels_t.size());
std::map<std::string, int> labelMap;
int i = 0;
for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}
yy.push_back(labelMap[label]);
}
return yy;
}

View File

@@ -1,32 +0,0 @@
#ifndef ARFFFILES_H
#define ARFFFILES_H
#include <string>
#include <vector>
class ArffFiles {
private:
std::vector<std::string> lines;
std::vector<std::pair<std::string, std::string>> attributes;
std::string className;
std::string classType;
std::vector<std::vector<float>> X;
std::vector<int> y;
void generateDataset(int);
void loadCommon(std::string);
public:
ArffFiles();
void load(const std::string&, bool = true);
void load(const std::string&, const std::string&);
std::vector<std::string> getLines() const;
unsigned long int getSize() const;
std::string getClassName() const;
std::string getClassType() const;
static std::string trim(const std::string&);
std::vector<std::vector<float>>& getX();
std::vector<int>& getY();
std::vector<std::pair<std::string, std::string>> getAttributes() const;
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
};
#endif

View File

@@ -1 +0,0 @@
add_library(ArffFiles ArffFiles.cc)

Submodule lib/catch2 updated: bff6e35e2b...029fe3b460

Submodule lib/folding updated: 71d6055be4...2ac43e32ac

Submodule lib/json updated: 199dea11b1...620034ecec

Submodule lib/mdlp updated: 5708dc3de9...7d62d6af4a

BIN
logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 543 KiB

View File

@@ -4,17 +4,19 @@ project(bayesnet_sample)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
find_package(Torch REQUIRED) find_package(Torch CONFIG REQUIRED)
find_library(BayesNet NAMES BayesNet.a libBayesNet.a REQUIRED) find_package(bayesnet CONFIG REQUIRED)
find_package(fimdlp CONFIG REQUIRED)
find_package(folding CONFIG REQUIRED)
find_package(arff-files CONFIG REQUIRED)
find_package(nlohman_json CONFIG REQUIRED)
include_directories(
lib/Files
lib/mdlp
lib/json/include
/usr/local/include
)
add_subdirectory(lib/Files)
add_subdirectory(lib/mdlp)
add_executable(bayesnet_sample sample.cc) add_executable(bayesnet_sample sample.cc)
target_link_libraries(bayesnet_sample ArffFiles mdlp "${TORCH_LIBRARIES}" "${BayesNet}") target_link_libraries(bayesnet_sample PRIVATE
fimdlp::fimdlp
arff-files::arff-files
"${TORCH_LIBRARIES}"
bayesnet::bayesnet
nlohmann_json::nlohmann_json
folding::folding
)

View File

@@ -1,174 +0,0 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include "ArffFiles.h"
#include <fstream>
#include <sstream>
#include <map>
#include <iostream>
ArffFiles::ArffFiles() = default;
std::vector<std::string> ArffFiles::getLines() const
{
return lines;
}
unsigned long int ArffFiles::getSize() const
{
return lines.size();
}
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
{
return attributes;
}
std::string ArffFiles::getClassName() const
{
return className;
}
std::string ArffFiles::getClassType() const
{
return classType;
}
std::vector<std::vector<float>>& ArffFiles::getX()
{
return X;
}
std::vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::loadCommon(std::string fileName)
{
std::ifstream file(fileName);
if (!file.is_open()) {
throw std::invalid_argument("Unable to open file");
}
std::string line;
std::string keyword;
std::string attribute;
std::string type;
std::string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
std::stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
type += type_w + " ";
attributes.emplace_back(trim(attribute), trim(type));
continue;
}
if (line[0] == '@') {
continue;
}
lines.push_back(line);
}
file.close();
if (attributes.empty())
throw std::invalid_argument("No attributes found");
}
void ArffFiles::load(const std::string& fileName, bool classLast)
{
int labelIndex;
loadCommon(fileName);
if (classLast) {
className = std::get<0>(attributes.back());
classType = std::get<1>(attributes.back());
attributes.pop_back();
labelIndex = static_cast<int>(attributes.size());
} else {
className = std::get<0>(attributes.front());
classType = std::get<1>(attributes.front());
attributes.erase(attributes.begin());
labelIndex = 0;
}
generateDataset(labelIndex);
}
void ArffFiles::load(const std::string& fileName, const std::string& name)
{
int labelIndex;
loadCommon(fileName);
bool found = false;
for (int i = 0; i < attributes.size(); ++i) {
if (attributes[i].first == name) {
className = std::get<0>(attributes[i]);
classType = std::get<1>(attributes[i]);
attributes.erase(attributes.begin() + i);
labelIndex = i;
found = true;
break;
}
}
if (!found) {
throw std::invalid_argument("Class name not found");
}
generateDataset(labelIndex);
}
void ArffFiles::generateDataset(int labelIndex)
{
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
auto yy = std::vector<std::string>(lines.size(), "");
auto removeLines = std::vector<int>(); // Lines with missing values
for (size_t i = 0; i < lines.size(); i++) {
std::stringstream ss(lines[i]);
std::string value;
int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) {
if (pos++ == labelIndex) {
yy[i] = value;
} else {
if (value == "?") {
X[xIndex++][i] = -1;
removeLines.push_back(i);
} else
X[xIndex++][i] = stof(value);
}
}
}
for (auto i : removeLines) {
yy.erase(yy.begin() + i);
for (auto& x : X) {
x.erase(x.begin() + i);
}
}
y = factorize(yy);
}
std::string ArffFiles::trim(const std::string& source)
{
std::string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s;
}
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
{
std::vector<int> yy;
yy.reserve(labels_t.size());
std::map<std::string, int> labelMap;
int i = 0;
for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}
yy.push_back(labelMap[label]);
}
return yy;
}

View File

@@ -1,38 +0,0 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef ARFFFILES_H
#define ARFFFILES_H
#include <string>
#include <vector>
class ArffFiles {
private:
std::vector<std::string> lines;
std::vector<std::pair<std::string, std::string>> attributes;
std::string className;
std::string classType;
std::vector<std::vector<float>> X;
std::vector<int> y;
void generateDataset(int);
void loadCommon(std::string);
public:
ArffFiles();
void load(const std::string&, bool = true);
void load(const std::string&, const std::string&);
std::vector<std::string> getLines() const;
unsigned long int getSize() const;
std::string getClassName() const;
std::string getClassType() const;
static std::string trim(const std::string&);
std::vector<std::vector<float>>& getX();
std::vector<int>& getY();
std::vector<std::pair<std::string, std::string>> getAttributes() const;
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
};
#endif

View File

@@ -1 +0,0 @@
add_library(ArffFiles ArffFiles.cc)

View File

@@ -1,55 +0,0 @@
// __ _____ _____ _____
// __| | __| | | | JSON for Modern C++
// | | |__ | | | | | | version 3.11.3
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
//
// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
// SPDX-License-Identifier: MIT
#pragma once
#include <utility>
#include <nlohmann/detail/abi_macros.hpp>
#include <nlohmann/detail/conversions/from_json.hpp>
#include <nlohmann/detail/conversions/to_json.hpp>
#include <nlohmann/detail/meta/identity_tag.hpp>
NLOHMANN_JSON_NAMESPACE_BEGIN
/// @sa https://json.nlohmann.me/api/adl_serializer/
template<typename ValueType, typename>
struct adl_serializer
{
/// @brief convert a JSON value to any value type
/// @sa https://json.nlohmann.me/api/adl_serializer/from_json/
template<typename BasicJsonType, typename TargetType = ValueType>
static auto from_json(BasicJsonType && j, TargetType& val) noexcept(
noexcept(::nlohmann::from_json(std::forward<BasicJsonType>(j), val)))
-> decltype(::nlohmann::from_json(std::forward<BasicJsonType>(j), val), void())
{
::nlohmann::from_json(std::forward<BasicJsonType>(j), val);
}
/// @brief convert a JSON value to any value type
/// @sa https://json.nlohmann.me/api/adl_serializer/from_json/
template<typename BasicJsonType, typename TargetType = ValueType>
static auto from_json(BasicJsonType && j) noexcept(
noexcept(::nlohmann::from_json(std::forward<BasicJsonType>(j), detail::identity_tag<TargetType> {})))
-> decltype(::nlohmann::from_json(std::forward<BasicJsonType>(j), detail::identity_tag<TargetType> {}))
{
return ::nlohmann::from_json(std::forward<BasicJsonType>(j), detail::identity_tag<TargetType> {});
}
/// @brief convert any value type to a JSON value
/// @sa https://json.nlohmann.me/api/adl_serializer/to_json/
template<typename BasicJsonType, typename TargetType = ValueType>
static auto to_json(BasicJsonType& j, TargetType && val) noexcept(
noexcept(::nlohmann::to_json(j, std::forward<TargetType>(val))))
-> decltype(::nlohmann::to_json(j, std::forward<TargetType>(val)), void())
{
::nlohmann::to_json(j, std::forward<TargetType>(val));
}
};
NLOHMANN_JSON_NAMESPACE_END

View File

@@ -1,103 +0,0 @@
// __ _____ _____ _____
// __| | __| | | | JSON for Modern C++
// | | |__ | | | | | | version 3.11.3
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
//
// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdint> // uint8_t, uint64_t
#include <tuple> // tie
#include <utility> // move
#include <nlohmann/detail/abi_macros.hpp>
NLOHMANN_JSON_NAMESPACE_BEGIN
/// @brief an internal type for a backed binary type
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/
template<typename BinaryType>
class byte_container_with_subtype : public BinaryType
{
public:
using container_type = BinaryType;
using subtype_type = std::uint64_t;
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/byte_container_with_subtype/
byte_container_with_subtype() noexcept(noexcept(container_type()))
: container_type()
{}
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/byte_container_with_subtype/
byte_container_with_subtype(const container_type& b) noexcept(noexcept(container_type(b)))
: container_type(b)
{}
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/byte_container_with_subtype/
byte_container_with_subtype(container_type&& b) noexcept(noexcept(container_type(std::move(b))))
: container_type(std::move(b))
{}
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/byte_container_with_subtype/
byte_container_with_subtype(const container_type& b, subtype_type subtype_) noexcept(noexcept(container_type(b)))
: container_type(b)
, m_subtype(subtype_)
, m_has_subtype(true)
{}
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/byte_container_with_subtype/
byte_container_with_subtype(container_type&& b, subtype_type subtype_) noexcept(noexcept(container_type(std::move(b))))
: container_type(std::move(b))
, m_subtype(subtype_)
, m_has_subtype(true)
{}
bool operator==(const byte_container_with_subtype& rhs) const
{
return std::tie(static_cast<const BinaryType&>(*this), m_subtype, m_has_subtype) ==
std::tie(static_cast<const BinaryType&>(rhs), rhs.m_subtype, rhs.m_has_subtype);
}
bool operator!=(const byte_container_with_subtype& rhs) const
{
return !(rhs == *this);
}
/// @brief sets the binary subtype
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/set_subtype/
void set_subtype(subtype_type subtype_) noexcept
{
m_subtype = subtype_;
m_has_subtype = true;
}
/// @brief return the binary subtype
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/subtype/
constexpr subtype_type subtype() const noexcept
{
return m_has_subtype ? m_subtype : static_cast<subtype_type>(-1);
}
/// @brief return whether the value has a subtype
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/has_subtype/
constexpr bool has_subtype() const noexcept
{
return m_has_subtype;
}
/// @brief clears the binary subtype
/// @sa https://json.nlohmann.me/api/byte_container_with_subtype/clear_subtype/
void clear_subtype() noexcept
{
m_subtype = 0;
m_has_subtype = false;
}
private:
subtype_type m_subtype = 0;
bool m_has_subtype = false;
};
NLOHMANN_JSON_NAMESPACE_END

View File

@@ -1,100 +0,0 @@
// __ _____ _____ _____
// __| | __| | | | JSON for Modern C++
// | | |__ | | | | | | version 3.11.3
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
//
// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
// SPDX-License-Identifier: MIT
#pragma once
// This file contains all macro definitions affecting or depending on the ABI
#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK
#if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH)
#if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 3
#warning "Already included a different version of the library!"
#endif
#endif
#endif
#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum)
#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum)
#define NLOHMANN_JSON_VERSION_PATCH 3 // NOLINT(modernize-macro-to-enum)
#ifndef JSON_DIAGNOSTICS
#define JSON_DIAGNOSTICS 0
#endif
#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
#define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0
#endif
#if JSON_DIAGNOSTICS
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag
#else
#define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS
#endif
#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
#define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp
#else
#define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON
#endif
#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION
#define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0
#endif
// Construct the namespace ABI tags component
#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b
#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \
NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b)
#define NLOHMANN_JSON_ABI_TAGS \
NLOHMANN_JSON_ABI_TAGS_CONCAT( \
NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \
NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON)
// Construct the namespace version component
#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \
_v ## major ## _ ## minor ## _ ## patch
#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \
NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch)
#if NLOHMANN_JSON_NAMESPACE_NO_VERSION
#define NLOHMANN_JSON_NAMESPACE_VERSION
#else
#define NLOHMANN_JSON_NAMESPACE_VERSION \
NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \
NLOHMANN_JSON_VERSION_MINOR, \
NLOHMANN_JSON_VERSION_PATCH)
#endif
// Combine namespace components
#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b
#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \
NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b)
#ifndef NLOHMANN_JSON_NAMESPACE
#define NLOHMANN_JSON_NAMESPACE \
nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \
NLOHMANN_JSON_ABI_TAGS, \
NLOHMANN_JSON_NAMESPACE_VERSION)
#endif
#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN
#define NLOHMANN_JSON_NAMESPACE_BEGIN \
namespace nlohmann \
{ \
inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \
NLOHMANN_JSON_ABI_TAGS, \
NLOHMANN_JSON_NAMESPACE_VERSION) \
{
#endif
#ifndef NLOHMANN_JSON_NAMESPACE_END
#define NLOHMANN_JSON_NAMESPACE_END \
} /* namespace (inline namespace) NOLINT(readability/namespace) */ \
} // namespace nlohmann
#endif

View File

@@ -1,497 +0,0 @@
// __ _____ _____ _____
// __| | __| | | | JSON for Modern C++
// | | |__ | | | | | | version 3.11.3
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
//
// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm> // transform
#include <array> // array
#include <forward_list> // forward_list
#include <iterator> // inserter, front_inserter, end
#include <map> // map
#include <string> // string
#include <tuple> // tuple, make_tuple
#include <type_traits> // is_arithmetic, is_same, is_enum, underlying_type, is_convertible
#include <unordered_map> // unordered_map
#include <utility> // pair, declval
#include <valarray> // valarray
#include <nlohmann/detail/exceptions.hpp>
#include <nlohmann/detail/macro_scope.hpp>
#include <nlohmann/detail/meta/cpp_future.hpp>
#include <nlohmann/detail/meta/identity_tag.hpp>
#include <nlohmann/detail/meta/std_fs.hpp>
#include <nlohmann/detail/meta/type_traits.hpp>
#include <nlohmann/detail/string_concat.hpp>
#include <nlohmann/detail/value_t.hpp>
NLOHMANN_JSON_NAMESPACE_BEGIN
namespace detail
{
template<typename BasicJsonType>
inline void from_json(const BasicJsonType& j, typename std::nullptr_t& n)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_null()))
{
JSON_THROW(type_error::create(302, concat("type must be null, but is ", j.type_name()), &j));
}
n = nullptr;
}
// overloads for basic_json template parameters
template < typename BasicJsonType, typename ArithmeticType,
enable_if_t < std::is_arithmetic<ArithmeticType>::value&&
!std::is_same<ArithmeticType, typename BasicJsonType::boolean_t>::value,
int > = 0 >
void get_arithmetic_value(const BasicJsonType& j, ArithmeticType& val)
{
switch (static_cast<value_t>(j))
{
case value_t::number_unsigned:
{
val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_unsigned_t*>());
break;
}
case value_t::number_integer:
{
val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_integer_t*>());
break;
}
case value_t::number_float:
{
val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_float_t*>());
break;
}
case value_t::null:
case value_t::object:
case value_t::array:
case value_t::string:
case value_t::boolean:
case value_t::binary:
case value_t::discarded:
default:
JSON_THROW(type_error::create(302, concat("type must be number, but is ", j.type_name()), &j));
}
}
template<typename BasicJsonType>
inline void from_json(const BasicJsonType& j, typename BasicJsonType::boolean_t& b)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_boolean()))
{
JSON_THROW(type_error::create(302, concat("type must be boolean, but is ", j.type_name()), &j));
}
b = *j.template get_ptr<const typename BasicJsonType::boolean_t*>();
}
template<typename BasicJsonType>
inline void from_json(const BasicJsonType& j, typename BasicJsonType::string_t& s)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_string()))
{
JSON_THROW(type_error::create(302, concat("type must be string, but is ", j.type_name()), &j));
}
s = *j.template get_ptr<const typename BasicJsonType::string_t*>();
}
template <
typename BasicJsonType, typename StringType,
enable_if_t <
std::is_assignable<StringType&, const typename BasicJsonType::string_t>::value
&& is_detected_exact<typename BasicJsonType::string_t::value_type, value_type_t, StringType>::value
&& !std::is_same<typename BasicJsonType::string_t, StringType>::value
&& !is_json_ref<StringType>::value, int > = 0 >
inline void from_json(const BasicJsonType& j, StringType& s)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_string()))
{
JSON_THROW(type_error::create(302, concat("type must be string, but is ", j.type_name()), &j));
}
s = *j.template get_ptr<const typename BasicJsonType::string_t*>();
}
template<typename BasicJsonType>
inline void from_json(const BasicJsonType& j, typename BasicJsonType::number_float_t& val)
{
get_arithmetic_value(j, val);
}
template<typename BasicJsonType>
inline void from_json(const BasicJsonType& j, typename BasicJsonType::number_unsigned_t& val)
{
get_arithmetic_value(j, val);
}
template<typename BasicJsonType>
inline void from_json(const BasicJsonType& j, typename BasicJsonType::number_integer_t& val)
{
get_arithmetic_value(j, val);
}
#if !JSON_DISABLE_ENUM_SERIALIZATION
template<typename BasicJsonType, typename EnumType,
enable_if_t<std::is_enum<EnumType>::value, int> = 0>
inline void from_json(const BasicJsonType& j, EnumType& e)
{
typename std::underlying_type<EnumType>::type val;
get_arithmetic_value(j, val);
e = static_cast<EnumType>(val);
}
#endif // JSON_DISABLE_ENUM_SERIALIZATION
// forward_list doesn't have an insert method
template<typename BasicJsonType, typename T, typename Allocator,
enable_if_t<is_getable<BasicJsonType, T>::value, int> = 0>
inline void from_json(const BasicJsonType& j, std::forward_list<T, Allocator>& l)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
{
JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
}
l.clear();
std::transform(j.rbegin(), j.rend(),
std::front_inserter(l), [](const BasicJsonType & i)
{
return i.template get<T>();
});
}
// valarray doesn't have an insert method
template<typename BasicJsonType, typename T,
enable_if_t<is_getable<BasicJsonType, T>::value, int> = 0>
inline void from_json(const BasicJsonType& j, std::valarray<T>& l)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
{
JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
}
l.resize(j.size());
std::transform(j.begin(), j.end(), std::begin(l),
[](const BasicJsonType & elem)
{
return elem.template get<T>();
});
}
template<typename BasicJsonType, typename T, std::size_t N>
auto from_json(const BasicJsonType& j, T (&arr)[N]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
-> decltype(j.template get<T>(), void())
{
for (std::size_t i = 0; i < N; ++i)
{
arr[i] = j.at(i).template get<T>();
}
}
template<typename BasicJsonType>
inline void from_json_array_impl(const BasicJsonType& j, typename BasicJsonType::array_t& arr, priority_tag<3> /*unused*/)
{
arr = *j.template get_ptr<const typename BasicJsonType::array_t*>();
}
template<typename BasicJsonType, typename T, std::size_t N>
auto from_json_array_impl(const BasicJsonType& j, std::array<T, N>& arr,
priority_tag<2> /*unused*/)
-> decltype(j.template get<T>(), void())
{
for (std::size_t i = 0; i < N; ++i)
{
arr[i] = j.at(i).template get<T>();
}
}
template<typename BasicJsonType, typename ConstructibleArrayType,
enable_if_t<
std::is_assignable<ConstructibleArrayType&, ConstructibleArrayType>::value,
int> = 0>
auto from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr, priority_tag<1> /*unused*/)
-> decltype(
arr.reserve(std::declval<typename ConstructibleArrayType::size_type>()),
j.template get<typename ConstructibleArrayType::value_type>(),
void())
{
using std::end;
ConstructibleArrayType ret;
ret.reserve(j.size());
std::transform(j.begin(), j.end(),
std::inserter(ret, end(ret)), [](const BasicJsonType & i)
{
// get<BasicJsonType>() returns *this, this won't call a from_json
// method when value_type is BasicJsonType
return i.template get<typename ConstructibleArrayType::value_type>();
});
arr = std::move(ret);
}
template<typename BasicJsonType, typename ConstructibleArrayType,
enable_if_t<
std::is_assignable<ConstructibleArrayType&, ConstructibleArrayType>::value,
int> = 0>
inline void from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr,
priority_tag<0> /*unused*/)
{
using std::end;
ConstructibleArrayType ret;
std::transform(
j.begin(), j.end(), std::inserter(ret, end(ret)),
[](const BasicJsonType & i)
{
// get<BasicJsonType>() returns *this, this won't call a from_json
// method when value_type is BasicJsonType
return i.template get<typename ConstructibleArrayType::value_type>();
});
arr = std::move(ret);
}
template < typename BasicJsonType, typename ConstructibleArrayType,
enable_if_t <
is_constructible_array_type<BasicJsonType, ConstructibleArrayType>::value&&
!is_constructible_object_type<BasicJsonType, ConstructibleArrayType>::value&&
!is_constructible_string_type<BasicJsonType, ConstructibleArrayType>::value&&
!std::is_same<ConstructibleArrayType, typename BasicJsonType::binary_t>::value&&
!is_basic_json<ConstructibleArrayType>::value,
int > = 0 >
auto from_json(const BasicJsonType& j, ConstructibleArrayType& arr)
-> decltype(from_json_array_impl(j, arr, priority_tag<3> {}),
j.template get<typename ConstructibleArrayType::value_type>(),
void())
{
if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
{
JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
}
from_json_array_impl(j, arr, priority_tag<3> {});
}
template < typename BasicJsonType, typename T, std::size_t... Idx >
std::array<T, sizeof...(Idx)> from_json_inplace_array_impl(BasicJsonType&& j,
identity_tag<std::array<T, sizeof...(Idx)>> /*unused*/, index_sequence<Idx...> /*unused*/)
{
return { { std::forward<BasicJsonType>(j).at(Idx).template get<T>()... } };
}
template < typename BasicJsonType, typename T, std::size_t N >
auto from_json(BasicJsonType&& j, identity_tag<std::array<T, N>> tag)
-> decltype(from_json_inplace_array_impl(std::forward<BasicJsonType>(j), tag, make_index_sequence<N> {}))
{
if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
{
JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
}
return from_json_inplace_array_impl(std::forward<BasicJsonType>(j), tag, make_index_sequence<N> {});
}
template<typename BasicJsonType>
inline void from_json(const BasicJsonType& j, typename BasicJsonType::binary_t& bin)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_binary()))
{
JSON_THROW(type_error::create(302, concat("type must be binary, but is ", j.type_name()), &j));
}
bin = *j.template get_ptr<const typename BasicJsonType::binary_t*>();
}
template<typename BasicJsonType, typename ConstructibleObjectType,
enable_if_t<is_constructible_object_type<BasicJsonType, ConstructibleObjectType>::value, int> = 0>
inline void from_json(const BasicJsonType& j, ConstructibleObjectType& obj)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_object()))
{
JSON_THROW(type_error::create(302, concat("type must be object, but is ", j.type_name()), &j));
}
ConstructibleObjectType ret;
const auto* inner_object = j.template get_ptr<const typename BasicJsonType::object_t*>();
using value_type = typename ConstructibleObjectType::value_type;
std::transform(
inner_object->begin(), inner_object->end(),
std::inserter(ret, ret.begin()),
[](typename BasicJsonType::object_t::value_type const & p)
{
return value_type(p.first, p.second.template get<typename ConstructibleObjectType::mapped_type>());
});
obj = std::move(ret);
}
// overload for arithmetic types, not chosen for basic_json template arguments
// (BooleanType, etc..); note: Is it really necessary to provide explicit
// overloads for boolean_t etc. in case of a custom BooleanType which is not
// an arithmetic type?
template < typename BasicJsonType, typename ArithmeticType,
enable_if_t <
std::is_arithmetic<ArithmeticType>::value&&
!std::is_same<ArithmeticType, typename BasicJsonType::number_unsigned_t>::value&&
!std::is_same<ArithmeticType, typename BasicJsonType::number_integer_t>::value&&
!std::is_same<ArithmeticType, typename BasicJsonType::number_float_t>::value&&
!std::is_same<ArithmeticType, typename BasicJsonType::boolean_t>::value,
int > = 0 >
inline void from_json(const BasicJsonType& j, ArithmeticType& val)
{
switch (static_cast<value_t>(j))
{
case value_t::number_unsigned:
{
val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_unsigned_t*>());
break;
}
case value_t::number_integer:
{
val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_integer_t*>());
break;
}
case value_t::number_float:
{
val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_float_t*>());
break;
}
case value_t::boolean:
{
val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::boolean_t*>());
break;
}
case value_t::null:
case value_t::object:
case value_t::array:
case value_t::string:
case value_t::binary:
case value_t::discarded:
default:
JSON_THROW(type_error::create(302, concat("type must be number, but is ", j.type_name()), &j));
}
}
template<typename BasicJsonType, typename... Args, std::size_t... Idx>
std::tuple<Args...> from_json_tuple_impl_base(BasicJsonType&& j, index_sequence<Idx...> /*unused*/)
{
return std::make_tuple(std::forward<BasicJsonType>(j).at(Idx).template get<Args>()...);
}
template < typename BasicJsonType, class A1, class A2 >
std::pair<A1, A2> from_json_tuple_impl(BasicJsonType&& j, identity_tag<std::pair<A1, A2>> /*unused*/, priority_tag<0> /*unused*/)
{
return {std::forward<BasicJsonType>(j).at(0).template get<A1>(),
std::forward<BasicJsonType>(j).at(1).template get<A2>()};
}
template<typename BasicJsonType, typename A1, typename A2>
inline void from_json_tuple_impl(BasicJsonType&& j, std::pair<A1, A2>& p, priority_tag<1> /*unused*/)
{
p = from_json_tuple_impl(std::forward<BasicJsonType>(j), identity_tag<std::pair<A1, A2>> {}, priority_tag<0> {});
}
template<typename BasicJsonType, typename... Args>
std::tuple<Args...> from_json_tuple_impl(BasicJsonType&& j, identity_tag<std::tuple<Args...>> /*unused*/, priority_tag<2> /*unused*/)
{
return from_json_tuple_impl_base<BasicJsonType, Args...>(std::forward<BasicJsonType>(j), index_sequence_for<Args...> {});
}
template<typename BasicJsonType, typename... Args>
inline void from_json_tuple_impl(BasicJsonType&& j, std::tuple<Args...>& t, priority_tag<3> /*unused*/)
{
t = from_json_tuple_impl_base<BasicJsonType, Args...>(std::forward<BasicJsonType>(j), index_sequence_for<Args...> {});
}
template<typename BasicJsonType, typename TupleRelated>
auto from_json(BasicJsonType&& j, TupleRelated&& t)
-> decltype(from_json_tuple_impl(std::forward<BasicJsonType>(j), std::forward<TupleRelated>(t), priority_tag<3> {}))
{
if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
{
JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
}
return from_json_tuple_impl(std::forward<BasicJsonType>(j), std::forward<TupleRelated>(t), priority_tag<3> {});
}
template < typename BasicJsonType, typename Key, typename Value, typename Compare, typename Allocator,
typename = enable_if_t < !std::is_constructible <
typename BasicJsonType::string_t, Key >::value >>
inline void from_json(const BasicJsonType& j, std::map<Key, Value, Compare, Allocator>& m)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
{
JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
}
m.clear();
for (const auto& p : j)
{
if (JSON_HEDLEY_UNLIKELY(!p.is_array()))
{
JSON_THROW(type_error::create(302, concat("type must be array, but is ", p.type_name()), &j));
}
m.emplace(p.at(0).template get<Key>(), p.at(1).template get<Value>());
}
}
template < typename BasicJsonType, typename Key, typename Value, typename Hash, typename KeyEqual, typename Allocator,
typename = enable_if_t < !std::is_constructible <
typename BasicJsonType::string_t, Key >::value >>
inline void from_json(const BasicJsonType& j, std::unordered_map<Key, Value, Hash, KeyEqual, Allocator>& m)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_array()))
{
JSON_THROW(type_error::create(302, concat("type must be array, but is ", j.type_name()), &j));
}
m.clear();
for (const auto& p : j)
{
if (JSON_HEDLEY_UNLIKELY(!p.is_array()))
{
JSON_THROW(type_error::create(302, concat("type must be array, but is ", p.type_name()), &j));
}
m.emplace(p.at(0).template get<Key>(), p.at(1).template get<Value>());
}
}
#if JSON_HAS_FILESYSTEM || JSON_HAS_EXPERIMENTAL_FILESYSTEM
template<typename BasicJsonType>
inline void from_json(const BasicJsonType& j, std_fs::path& p)
{
if (JSON_HEDLEY_UNLIKELY(!j.is_string()))
{
JSON_THROW(type_error::create(302, concat("type must be string, but is ", j.type_name()), &j));
}
p = *j.template get_ptr<const typename BasicJsonType::string_t*>();
}
#endif
struct from_json_fn
{
template<typename BasicJsonType, typename T>
auto operator()(const BasicJsonType& j, T&& val) const
noexcept(noexcept(from_json(j, std::forward<T>(val))))
-> decltype(from_json(j, std::forward<T>(val)))
{
return from_json(j, std::forward<T>(val));
}
};
} // namespace detail
#ifndef JSON_HAS_CPP_17
/// namespace to hold default `from_json` function
/// to see why this is required:
/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html
namespace // NOLINT(cert-dcl59-cpp,fuchsia-header-anon-namespaces,google-build-namespaces)
{
#endif
JSON_INLINE_VARIABLE constexpr const auto& from_json = // NOLINT(misc-definitions-in-headers)
detail::static_const<detail::from_json_fn>::value;
#ifndef JSON_HAS_CPP_17
} // namespace
#endif
NLOHMANN_JSON_NAMESPACE_END

View File

@@ -1,447 +0,0 @@
// __ _____ _____ _____
// __| | __| | | | JSON for Modern C++
// | | |__ | | | | | | version 3.11.3
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
//
// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm> // copy
#include <iterator> // begin, end
#include <string> // string
#include <tuple> // tuple, get
#include <type_traits> // is_same, is_constructible, is_floating_point, is_enum, underlying_type
#include <utility> // move, forward, declval, pair
#include <valarray> // valarray
#include <vector> // vector
#include <nlohmann/detail/iterators/iteration_proxy.hpp>
#include <nlohmann/detail/macro_scope.hpp>
#include <nlohmann/detail/meta/cpp_future.hpp>
#include <nlohmann/detail/meta/std_fs.hpp>
#include <nlohmann/detail/meta/type_traits.hpp>
#include <nlohmann/detail/value_t.hpp>
NLOHMANN_JSON_NAMESPACE_BEGIN
namespace detail
{
//////////////////
// constructors //
//////////////////
/*
* Note all external_constructor<>::construct functions need to call
* j.m_data.m_value.destroy(j.m_data.m_type) to avoid a memory leak in case j contains an
* allocated value (e.g., a string). See bug issue
* https://github.com/nlohmann/json/issues/2865 for more information.
*/
template<value_t> struct external_constructor;
template<>
struct external_constructor<value_t::boolean>
{
template<typename BasicJsonType>
static void construct(BasicJsonType& j, typename BasicJsonType::boolean_t b) noexcept
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::boolean;
j.m_data.m_value = b;
j.assert_invariant();
}
};
template<>
struct external_constructor<value_t::string>
{
template<typename BasicJsonType>
static void construct(BasicJsonType& j, const typename BasicJsonType::string_t& s)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::string;
j.m_data.m_value = s;
j.assert_invariant();
}
template<typename BasicJsonType>
static void construct(BasicJsonType& j, typename BasicJsonType::string_t&& s)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::string;
j.m_data.m_value = std::move(s);
j.assert_invariant();
}
template < typename BasicJsonType, typename CompatibleStringType,
enable_if_t < !std::is_same<CompatibleStringType, typename BasicJsonType::string_t>::value,
int > = 0 >
static void construct(BasicJsonType& j, const CompatibleStringType& str)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::string;
j.m_data.m_value.string = j.template create<typename BasicJsonType::string_t>(str);
j.assert_invariant();
}
};
template<>
struct external_constructor<value_t::binary>
{
template<typename BasicJsonType>
static void construct(BasicJsonType& j, const typename BasicJsonType::binary_t& b)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::binary;
j.m_data.m_value = typename BasicJsonType::binary_t(b);
j.assert_invariant();
}
template<typename BasicJsonType>
static void construct(BasicJsonType& j, typename BasicJsonType::binary_t&& b)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::binary;
j.m_data.m_value = typename BasicJsonType::binary_t(std::move(b));
j.assert_invariant();
}
};
template<>
struct external_constructor<value_t::number_float>
{
template<typename BasicJsonType>
static void construct(BasicJsonType& j, typename BasicJsonType::number_float_t val) noexcept
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::number_float;
j.m_data.m_value = val;
j.assert_invariant();
}
};
template<>
struct external_constructor<value_t::number_unsigned>
{
template<typename BasicJsonType>
static void construct(BasicJsonType& j, typename BasicJsonType::number_unsigned_t val) noexcept
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::number_unsigned;
j.m_data.m_value = val;
j.assert_invariant();
}
};
template<>
struct external_constructor<value_t::number_integer>
{
template<typename BasicJsonType>
static void construct(BasicJsonType& j, typename BasicJsonType::number_integer_t val) noexcept
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::number_integer;
j.m_data.m_value = val;
j.assert_invariant();
}
};
template<>
struct external_constructor<value_t::array>
{
template<typename BasicJsonType>
static void construct(BasicJsonType& j, const typename BasicJsonType::array_t& arr)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::array;
j.m_data.m_value = arr;
j.set_parents();
j.assert_invariant();
}
template<typename BasicJsonType>
static void construct(BasicJsonType& j, typename BasicJsonType::array_t&& arr)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::array;
j.m_data.m_value = std::move(arr);
j.set_parents();
j.assert_invariant();
}
template < typename BasicJsonType, typename CompatibleArrayType,
enable_if_t < !std::is_same<CompatibleArrayType, typename BasicJsonType::array_t>::value,
int > = 0 >
static void construct(BasicJsonType& j, const CompatibleArrayType& arr)
{
using std::begin;
using std::end;
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::array;
j.m_data.m_value.array = j.template create<typename BasicJsonType::array_t>(begin(arr), end(arr));
j.set_parents();
j.assert_invariant();
}
template<typename BasicJsonType>
static void construct(BasicJsonType& j, const std::vector<bool>& arr)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::array;
j.m_data.m_value = value_t::array;
j.m_data.m_value.array->reserve(arr.size());
for (const bool x : arr)
{
j.m_data.m_value.array->push_back(x);
j.set_parent(j.m_data.m_value.array->back());
}
j.assert_invariant();
}
template<typename BasicJsonType, typename T,
enable_if_t<std::is_convertible<T, BasicJsonType>::value, int> = 0>
static void construct(BasicJsonType& j, const std::valarray<T>& arr)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::array;
j.m_data.m_value = value_t::array;
j.m_data.m_value.array->resize(arr.size());
if (arr.size() > 0)
{
std::copy(std::begin(arr), std::end(arr), j.m_data.m_value.array->begin());
}
j.set_parents();
j.assert_invariant();
}
};
template<>
struct external_constructor<value_t::object>
{
template<typename BasicJsonType>
static void construct(BasicJsonType& j, const typename BasicJsonType::object_t& obj)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::object;
j.m_data.m_value = obj;
j.set_parents();
j.assert_invariant();
}
template<typename BasicJsonType>
static void construct(BasicJsonType& j, typename BasicJsonType::object_t&& obj)
{
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::object;
j.m_data.m_value = std::move(obj);
j.set_parents();
j.assert_invariant();
}
template < typename BasicJsonType, typename CompatibleObjectType,
enable_if_t < !std::is_same<CompatibleObjectType, typename BasicJsonType::object_t>::value, int > = 0 >
static void construct(BasicJsonType& j, const CompatibleObjectType& obj)
{
using std::begin;
using std::end;
j.m_data.m_value.destroy(j.m_data.m_type);
j.m_data.m_type = value_t::object;
j.m_data.m_value.object = j.template create<typename BasicJsonType::object_t>(begin(obj), end(obj));
j.set_parents();
j.assert_invariant();
}
};
/////////////
// to_json //
/////////////
template<typename BasicJsonType, typename T,
enable_if_t<std::is_same<T, typename BasicJsonType::boolean_t>::value, int> = 0>
inline void to_json(BasicJsonType& j, T b) noexcept
{
external_constructor<value_t::boolean>::construct(j, b);
}
template < typename BasicJsonType, typename BoolRef,
enable_if_t <
((std::is_same<std::vector<bool>::reference, BoolRef>::value
&& !std::is_same <std::vector<bool>::reference, typename BasicJsonType::boolean_t&>::value)
|| (std::is_same<std::vector<bool>::const_reference, BoolRef>::value
&& !std::is_same <detail::uncvref_t<std::vector<bool>::const_reference>,
typename BasicJsonType::boolean_t >::value))
&& std::is_convertible<const BoolRef&, typename BasicJsonType::boolean_t>::value, int > = 0 >
inline void to_json(BasicJsonType& j, const BoolRef& b) noexcept
{
external_constructor<value_t::boolean>::construct(j, static_cast<typename BasicJsonType::boolean_t>(b));
}
template<typename BasicJsonType, typename CompatibleString,
enable_if_t<std::is_constructible<typename BasicJsonType::string_t, CompatibleString>::value, int> = 0>
inline void to_json(BasicJsonType& j, const CompatibleString& s)
{
external_constructor<value_t::string>::construct(j, s);
}
template<typename BasicJsonType>
inline void to_json(BasicJsonType& j, typename BasicJsonType::string_t&& s)
{
external_constructor<value_t::string>::construct(j, std::move(s));
}
template<typename BasicJsonType, typename FloatType,
enable_if_t<std::is_floating_point<FloatType>::value, int> = 0>
inline void to_json(BasicJsonType& j, FloatType val) noexcept
{
external_constructor<value_t::number_float>::construct(j, static_cast<typename BasicJsonType::number_float_t>(val));
}
template<typename BasicJsonType, typename CompatibleNumberUnsignedType,
enable_if_t<is_compatible_integer_type<typename BasicJsonType::number_unsigned_t, CompatibleNumberUnsignedType>::value, int> = 0>
inline void to_json(BasicJsonType& j, CompatibleNumberUnsignedType val) noexcept
{
external_constructor<value_t::number_unsigned>::construct(j, static_cast<typename BasicJsonType::number_unsigned_t>(val));
}
template<typename BasicJsonType, typename CompatibleNumberIntegerType,
enable_if_t<is_compatible_integer_type<typename BasicJsonType::number_integer_t, CompatibleNumberIntegerType>::value, int> = 0>
inline void to_json(BasicJsonType& j, CompatibleNumberIntegerType val) noexcept
{
external_constructor<value_t::number_integer>::construct(j, static_cast<typename BasicJsonType::number_integer_t>(val));
}
#if !JSON_DISABLE_ENUM_SERIALIZATION
template<typename BasicJsonType, typename EnumType,
enable_if_t<std::is_enum<EnumType>::value, int> = 0>
inline void to_json(BasicJsonType& j, EnumType e) noexcept
{
using underlying_type = typename std::underlying_type<EnumType>::type;
static constexpr value_t integral_value_t = std::is_unsigned<underlying_type>::value ? value_t::number_unsigned : value_t::number_integer;
external_constructor<integral_value_t>::construct(j, static_cast<underlying_type>(e));
}
#endif // JSON_DISABLE_ENUM_SERIALIZATION
template<typename BasicJsonType>
inline void to_json(BasicJsonType& j, const std::vector<bool>& e)
{
external_constructor<value_t::array>::construct(j, e);
}
template < typename BasicJsonType, typename CompatibleArrayType,
enable_if_t < is_compatible_array_type<BasicJsonType,
CompatibleArrayType>::value&&
!is_compatible_object_type<BasicJsonType, CompatibleArrayType>::value&&
!is_compatible_string_type<BasicJsonType, CompatibleArrayType>::value&&
!std::is_same<typename BasicJsonType::binary_t, CompatibleArrayType>::value&&
!is_basic_json<CompatibleArrayType>::value,
int > = 0 >
inline void to_json(BasicJsonType& j, const CompatibleArrayType& arr)
{
external_constructor<value_t::array>::construct(j, arr);
}
template<typename BasicJsonType>
inline void to_json(BasicJsonType& j, const typename BasicJsonType::binary_t& bin)
{
external_constructor<value_t::binary>::construct(j, bin);
}
template<typename BasicJsonType, typename T,
enable_if_t<std::is_convertible<T, BasicJsonType>::value, int> = 0>
inline void to_json(BasicJsonType& j, const std::valarray<T>& arr)
{
external_constructor<value_t::array>::construct(j, std::move(arr));
}
template<typename BasicJsonType>
inline void to_json(BasicJsonType& j, typename BasicJsonType::array_t&& arr)
{
external_constructor<value_t::array>::construct(j, std::move(arr));
}
template < typename BasicJsonType, typename CompatibleObjectType,
enable_if_t < is_compatible_object_type<BasicJsonType, CompatibleObjectType>::value&& !is_basic_json<CompatibleObjectType>::value, int > = 0 >
inline void to_json(BasicJsonType& j, const CompatibleObjectType& obj)
{
external_constructor<value_t::object>::construct(j, obj);
}
template<typename BasicJsonType>
inline void to_json(BasicJsonType& j, typename BasicJsonType::object_t&& obj)
{
external_constructor<value_t::object>::construct(j, std::move(obj));
}
template <
typename BasicJsonType, typename T, std::size_t N,
enable_if_t < !std::is_constructible<typename BasicJsonType::string_t,
const T(&)[N]>::value, // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
int > = 0 >
inline void to_json(BasicJsonType& j, const T(&arr)[N]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
{
external_constructor<value_t::array>::construct(j, arr);
}
template < typename BasicJsonType, typename T1, typename T2, enable_if_t < std::is_constructible<BasicJsonType, T1>::value&& std::is_constructible<BasicJsonType, T2>::value, int > = 0 >
inline void to_json(BasicJsonType& j, const std::pair<T1, T2>& p)
{
j = { p.first, p.second };
}
// for https://github.com/nlohmann/json/pull/1134
template<typename BasicJsonType, typename T,
enable_if_t<std::is_same<T, iteration_proxy_value<typename BasicJsonType::iterator>>::value, int> = 0>
inline void to_json(BasicJsonType& j, const T& b)
{
j = { {b.key(), b.value()} };
}
template<typename BasicJsonType, typename Tuple, std::size_t... Idx>
inline void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence<Idx...> /*unused*/)
{
j = { std::get<Idx>(t)... };
}
template<typename BasicJsonType, typename T, enable_if_t<is_constructible_tuple<BasicJsonType, T>::value, int > = 0>
inline void to_json(BasicJsonType& j, const T& t)
{
to_json_tuple_impl(j, t, make_index_sequence<std::tuple_size<T>::value> {});
}
#if JSON_HAS_FILESYSTEM || JSON_HAS_EXPERIMENTAL_FILESYSTEM
template<typename BasicJsonType>
inline void to_json(BasicJsonType& j, const std_fs::path& p)
{
j = p.string();
}
#endif
struct to_json_fn
{
template<typename BasicJsonType, typename T>
auto operator()(BasicJsonType& j, T&& val) const noexcept(noexcept(to_json(j, std::forward<T>(val))))
-> decltype(to_json(j, std::forward<T>(val)), void())
{
return to_json(j, std::forward<T>(val));
}
};
} // namespace detail
#ifndef JSON_HAS_CPP_17
/// namespace to hold default `to_json` function
/// to see why this is required:
/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html
namespace // NOLINT(cert-dcl59-cpp,fuchsia-header-anon-namespaces,google-build-namespaces)
{
#endif
JSON_INLINE_VARIABLE constexpr const auto& to_json = // NOLINT(misc-definitions-in-headers)
detail::static_const<detail::to_json_fn>::value;
#ifndef JSON_HAS_CPP_17
} // namespace
#endif
NLOHMANN_JSON_NAMESPACE_END

View File

@@ -1,257 +0,0 @@
// __ _____ _____ _____
// __| | __| | | | JSON for Modern C++
// | | |__ | | | | | | version 3.11.3
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
//
// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef> // nullptr_t
#include <exception> // exception
#if JSON_DIAGNOSTICS
#include <numeric> // accumulate
#endif
#include <stdexcept> // runtime_error
#include <string> // to_string
#include <vector> // vector
#include <nlohmann/detail/value_t.hpp>
#include <nlohmann/detail/string_escape.hpp>
#include <nlohmann/detail/input/position_t.hpp>
#include <nlohmann/detail/macro_scope.hpp>
#include <nlohmann/detail/meta/cpp_future.hpp>
#include <nlohmann/detail/meta/type_traits.hpp>
#include <nlohmann/detail/string_concat.hpp>
NLOHMANN_JSON_NAMESPACE_BEGIN
namespace detail
{
////////////////
// exceptions //
////////////////
/// @brief general exception of the @ref basic_json class
/// @sa https://json.nlohmann.me/api/basic_json/exception/
class exception : public std::exception
{
public:
/// returns the explanatory string
const char* what() const noexcept override
{
return m.what();
}
/// the id of the exception
const int id; // NOLINT(cppcoreguidelines-non-private-member-variables-in-classes)
protected:
JSON_HEDLEY_NON_NULL(3)
exception(int id_, const char* what_arg) : id(id_), m(what_arg) {} // NOLINT(bugprone-throw-keyword-missing)
static std::string name(const std::string& ename, int id_)
{
return concat("[json.exception.", ename, '.', std::to_string(id_), "] ");
}
static std::string diagnostics(std::nullptr_t /*leaf_element*/)
{
return "";
}
template<typename BasicJsonType>
static std::string diagnostics(const BasicJsonType* leaf_element)
{
#if JSON_DIAGNOSTICS
std::vector<std::string> tokens;
for (const auto* current = leaf_element; current != nullptr && current->m_parent != nullptr; current = current->m_parent)
{
switch (current->m_parent->type())
{
case value_t::array:
{
for (std::size_t i = 0; i < current->m_parent->m_data.m_value.array->size(); ++i)
{
if (&current->m_parent->m_data.m_value.array->operator[](i) == current)
{
tokens.emplace_back(std::to_string(i));
break;
}
}
break;
}
case value_t::object:
{
for (const auto& element : *current->m_parent->m_data.m_value.object)
{
if (&element.second == current)
{
tokens.emplace_back(element.first.c_str());
break;
}
}
break;
}
case value_t::null: // LCOV_EXCL_LINE
case value_t::string: // LCOV_EXCL_LINE
case value_t::boolean: // LCOV_EXCL_LINE
case value_t::number_integer: // LCOV_EXCL_LINE
case value_t::number_unsigned: // LCOV_EXCL_LINE
case value_t::number_float: // LCOV_EXCL_LINE
case value_t::binary: // LCOV_EXCL_LINE
case value_t::discarded: // LCOV_EXCL_LINE
default: // LCOV_EXCL_LINE
break; // LCOV_EXCL_LINE
}
}
if (tokens.empty())
{
return "";
}
auto str = std::accumulate(tokens.rbegin(), tokens.rend(), std::string{},
[](const std::string & a, const std::string & b)
{
return concat(a, '/', detail::escape(b));
});
return concat('(', str, ") ");
#else
static_cast<void>(leaf_element);
return "";
#endif
}
private:
/// an exception object as storage for error messages
std::runtime_error m;
};
/// @brief exception indicating a parse error
/// @sa https://json.nlohmann.me/api/basic_json/parse_error/
class parse_error : public exception
{
public:
/*!
@brief create a parse error exception
@param[in] id_ the id of the exception
@param[in] pos the position where the error occurred (or with
chars_read_total=0 if the position cannot be
determined)
@param[in] what_arg the explanatory string
@return parse_error object
*/
template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
static parse_error create(int id_, const position_t& pos, const std::string& what_arg, BasicJsonContext context)
{
const std::string w = concat(exception::name("parse_error", id_), "parse error",
position_string(pos), ": ", exception::diagnostics(context), what_arg);
return {id_, pos.chars_read_total, w.c_str()};
}
template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
static parse_error create(int id_, std::size_t byte_, const std::string& what_arg, BasicJsonContext context)
{
const std::string w = concat(exception::name("parse_error", id_), "parse error",
(byte_ != 0 ? (concat(" at byte ", std::to_string(byte_))) : ""),
": ", exception::diagnostics(context), what_arg);
return {id_, byte_, w.c_str()};
}
/*!
@brief byte index of the parse error
The byte index of the last read character in the input file.
@note For an input with n bytes, 1 is the index of the first character and
n+1 is the index of the terminating null byte or the end of file.
This also holds true when reading a byte vector (CBOR or MessagePack).
*/
const std::size_t byte;
private:
parse_error(int id_, std::size_t byte_, const char* what_arg)
: exception(id_, what_arg), byte(byte_) {}
static std::string position_string(const position_t& pos)
{
return concat(" at line ", std::to_string(pos.lines_read + 1),
", column ", std::to_string(pos.chars_read_current_line));
}
};
/// @brief exception indicating errors with iterators
/// @sa https://json.nlohmann.me/api/basic_json/invalid_iterator/
class invalid_iterator : public exception
{
public:
template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
static invalid_iterator create(int id_, const std::string& what_arg, BasicJsonContext context)
{
const std::string w = concat(exception::name("invalid_iterator", id_), exception::diagnostics(context), what_arg);
return {id_, w.c_str()};
}
private:
JSON_HEDLEY_NON_NULL(3)
invalid_iterator(int id_, const char* what_arg)
: exception(id_, what_arg) {}
};
/// @brief exception indicating executing a member function with a wrong type
/// @sa https://json.nlohmann.me/api/basic_json/type_error/
class type_error : public exception
{
public:
template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
static type_error create(int id_, const std::string& what_arg, BasicJsonContext context)
{
const std::string w = concat(exception::name("type_error", id_), exception::diagnostics(context), what_arg);
return {id_, w.c_str()};
}
private:
JSON_HEDLEY_NON_NULL(3)
type_error(int id_, const char* what_arg) : exception(id_, what_arg) {}
};
/// @brief exception indicating access out of the defined range
/// @sa https://json.nlohmann.me/api/basic_json/out_of_range/
class out_of_range : public exception
{
public:
template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
static out_of_range create(int id_, const std::string& what_arg, BasicJsonContext context)
{
const std::string w = concat(exception::name("out_of_range", id_), exception::diagnostics(context), what_arg);
return {id_, w.c_str()};
}
private:
JSON_HEDLEY_NON_NULL(3)
out_of_range(int id_, const char* what_arg) : exception(id_, what_arg) {}
};
/// @brief exception indicating other library errors
/// @sa https://json.nlohmann.me/api/basic_json/other_error/
class other_error : public exception
{
public:
template<typename BasicJsonContext, enable_if_t<is_basic_json_context<BasicJsonContext>::value, int> = 0>
static other_error create(int id_, const std::string& what_arg, BasicJsonContext context)
{
const std::string w = concat(exception::name("other_error", id_), exception::diagnostics(context), what_arg);
return {id_, w.c_str()};
}
private:
JSON_HEDLEY_NON_NULL(3)
other_error(int id_, const char* what_arg) : exception(id_, what_arg) {}
};
} // namespace detail
NLOHMANN_JSON_NAMESPACE_END

View File

@@ -1,129 +0,0 @@
// __ _____ _____ _____
// __| | __| | | | JSON for Modern C++
// | | |__ | | | | | | version 3.11.3
// |_____|_____|_____|_|___| https://github.com/nlohmann/json
//
// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann <https://nlohmann.me>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdint> // uint8_t
#include <cstddef> // size_t
#include <functional> // hash
#include <nlohmann/detail/abi_macros.hpp>
#include <nlohmann/detail/value_t.hpp>
NLOHMANN_JSON_NAMESPACE_BEGIN
namespace detail
{
// boost::hash_combine
inline std::size_t combine(std::size_t seed, std::size_t h) noexcept
{
seed ^= h + 0x9e3779b9 + (seed << 6U) + (seed >> 2U);
return seed;
}
/*!
@brief hash a JSON value
The hash function tries to rely on std::hash where possible. Furthermore, the
type of the JSON value is taken into account to have different hash values for
null, 0, 0U, and false, etc.
@tparam BasicJsonType basic_json specialization
@param j JSON value to hash
@return hash value of j
*/
template<typename BasicJsonType>
std::size_t hash(const BasicJsonType& j)
{
using string_t = typename BasicJsonType::string_t;
using number_integer_t = typename BasicJsonType::number_integer_t;
using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
using number_float_t = typename BasicJsonType::number_float_t;
const auto type = static_cast<std::size_t>(j.type());
switch (j.type())
{
case BasicJsonType::value_t::null:
case BasicJsonType::value_t::discarded:
{
return combine(type, 0);
}
case BasicJsonType::value_t::object:
{
auto seed = combine(type, j.size());
for (const auto& element : j.items())
{
const auto h = std::hash<string_t> {}(element.key());
seed = combine(seed, h);
seed = combine(seed, hash(element.value()));
}
return seed;
}
case BasicJsonType::value_t::array:
{
auto seed = combine(type, j.size());
for (const auto& element : j)
{
seed = combine(seed, hash(element));
}
return seed;
}
case BasicJsonType::value_t::string:
{
const auto h = std::hash<string_t> {}(j.template get_ref<const string_t&>());
return combine(type, h);
}
case BasicJsonType::value_t::boolean:
{
const auto h = std::hash<bool> {}(j.template get<bool>());
return combine(type, h);
}
case BasicJsonType::value_t::number_integer:
{
const auto h = std::hash<number_integer_t> {}(j.template get<number_integer_t>());
return combine(type, h);
}
case BasicJsonType::value_t::number_unsigned:
{
const auto h = std::hash<number_unsigned_t> {}(j.template get<number_unsigned_t>());
return combine(type, h);
}
case BasicJsonType::value_t::number_float:
{
const auto h = std::hash<number_float_t> {}(j.template get<number_float_t>());
return combine(type, h);
}
case BasicJsonType::value_t::binary:
{
auto seed = combine(type, j.get_binary().size());
const auto h = std::hash<bool> {}(j.get_binary().has_subtype());
seed = combine(seed, h);
seed = combine(seed, static_cast<std::size_t>(j.get_binary().subtype()));
for (const auto byte : j.get_binary())
{
seed = combine(seed, std::hash<std::uint8_t> {}(byte));
}
return seed;
}
default: // LCOV_EXCL_LINE
JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE
return 0; // LCOV_EXCL_LINE
}
}
} // namespace detail
NLOHMANN_JSON_NAMESPACE_END

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More