Compare commits
119 Commits
Author | SHA1 | Date | |
---|---|---|---|
7076efc2a1 | |||
9ee388561f
|
|||
70c7d3dd3d
|
|||
400967b4e3
|
|||
c234308701
|
|||
4ded6f51eb
|
|||
b1d317d8f4
|
|||
7876d1a370
|
|||
3bdb14bd65
|
|||
71b05cc1a7
|
|||
a59689272d
|
|||
3d8be79b37
|
|||
619276a5ea
|
|||
e681099360
|
|||
5919fbfd34
|
|||
a26522e62f
|
|||
86cccb6c7b
|
|||
d1b235261e
|
|||
7a8e0391dc
|
|||
6cfbc482d8
|
|||
ca54f799ee
|
|||
06621ea361
|
|||
a70ac3e883
|
|||
b987dcbcc4
|
|||
81fd7df7f0
|
|||
dd98cf159d
|
|||
f658149977
|
|||
fb957ac3fe
|
|||
b90e558238
|
|||
64970cf7f7 | |||
b571a4da4d
|
|||
8a9f329ff9
|
|||
e2781ee525
|
|||
56a2d3ead0
|
|||
dc32a0fc47
|
|||
3d6b4f0614
|
|||
18844c7da7
|
|||
43ceefd2c9
|
|||
e6501502d1
|
|||
d84adf6172
|
|||
268a86cbe0
|
|||
fc4c93b299
|
|||
86f2bc44fc | |||
f0f3d9ad6e
|
|||
9a323cd7a3
|
|||
cb949ac7e5
|
|||
2c297ea15d
|
|||
4e4b6e67f4
|
|||
82847774ee
|
|||
d0955d9369 | |||
2d34eb8c89
|
|||
0159c397fa
|
|||
0bbc8328a9
|
|||
35ca862eca
|
|||
26eb58b104
|
|||
6fcc15d39a
|
|||
9a14133be5
|
|||
59c1cf5b3b
|
|||
8e9090d283
|
|||
02bcab01be
|
|||
716748e18c
|
|||
0b31780d39
|
|||
fa26aa80f7
|
|||
3eb61905fb
|
|||
ca0ae4dacf
|
|||
b34869cc61
|
|||
27a3e5a5e0
|
|||
684443a788
|
|||
6d9badc33b | |||
015b1b0c0f
|
|||
7bb8e4df01
|
|||
53710378de
|
|||
c833e9ba32
|
|||
f5cb46ee29
|
|||
fa35681abe
|
|||
b0bd0e6eee
|
|||
d43be27821
|
|||
a2853dd2e5
|
|||
0341bd5648
|
|||
22b742f068
|
|||
2584e8294d
|
|||
291ba0fb0e
|
|||
80043d5181
|
|||
677ec5613d
|
|||
cccaa6e0af
|
|||
2e3e0e0fc2
|
|||
8784a24898
|
|||
54496c68f1
|
|||
1f236a70db
|
|||
ef3c74633c
|
|||
7efd95095c | |||
0e24135d46
|
|||
521bfd2a8e
|
|||
e2e0fb0c40
|
|||
56b62a67cc
|
|||
c0fc107abb
|
|||
d8c44b3b7c
|
|||
6ab7cd2cbd
|
|||
b578ea8a2d
|
|||
9a752d15dc
|
|||
4992685e94
|
|||
346b693c79
|
|||
164c8bd90c
|
|||
ced29a2c2e
|
|||
0ec53f405f
|
|||
f806015b29
|
|||
8115f25c06
|
|||
618a1e539c
|
|||
7aeffba740
|
|||
e79ea63afb | |||
3c7382a93a
|
|||
b4a222b100
|
|||
23ef0cc5f7
|
|||
793b2d3cd5
|
|||
ae469b8146
|
|||
f014928411
|
|||
c4b563a339
|
|||
49bb0582e6
|
|||
b4c5261e01 |
4
.clang-format
Normal file
4
.clang-format
Normal file
@@ -0,0 +1,4 @@
|
||||
# .clang-format
|
||||
BasedOnStyle: LLVM
|
||||
IndentWidth: 4
|
||||
ColumnLimit: 120
|
@@ -1,4 +1,4 @@
|
||||
compilation_database_dir: build_debug
|
||||
compilation_database_dir: build_Debug
|
||||
output_directory: diagrams
|
||||
diagrams:
|
||||
BayesNet:
|
||||
|
57
.devcontainer/Dockerfile
Normal file
57
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,57 @@
|
||||
FROM mcr.microsoft.com/devcontainers/cpp:ubuntu22.04
|
||||
|
||||
ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.29.3"
|
||||
|
||||
# Optionally install the cmake for vcpkg
|
||||
COPY ./reinstall-cmake.sh /tmp/
|
||||
|
||||
RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \
|
||||
chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \
|
||||
fi \
|
||||
&& rm -f /tmp/reinstall-cmake.sh
|
||||
|
||||
|
||||
# [Optional] Uncomment this section to install additional vcpkg ports.
|
||||
# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install <your-port-name-here>"
|
||||
|
||||
# [Optional] Uncomment this section to install additional packages.
|
||||
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||
&& apt-get -y install --no-install-recommends wget software-properties-common libdatetime-perl libcapture-tiny-perl libdatetime-format-dateparse-perl libgd-perl
|
||||
|
||||
# Add PPA for GCC 13
|
||||
RUN add-apt-repository ppa:ubuntu-toolchain-r/test
|
||||
RUN apt-get update
|
||||
|
||||
# Install GCC 13.1
|
||||
RUN apt-get install -y gcc-13 g++-13 doxygen
|
||||
|
||||
# Install lcov 2.1
|
||||
RUN wget --quiet https://github.com/linux-test-project/lcov/releases/download/v2.1/lcov-2.1.tar.gz && \
|
||||
tar -xvf lcov-2.1.tar.gz && \
|
||||
cd lcov-2.1 && \
|
||||
make install
|
||||
RUN rm lcov-2.1.tar.gz
|
||||
RUN rm -fr lcov-2.1
|
||||
|
||||
# Install Miniconda
|
||||
RUN mkdir -p /opt/conda
|
||||
RUN wget --quiet "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh" -O /opt/conda/miniconda.sh && \
|
||||
bash /opt/conda/miniconda.sh -b -p /opt/miniconda
|
||||
|
||||
# Add conda to PATH
|
||||
ENV PATH=/opt/miniconda/bin:$PATH
|
||||
|
||||
# add CXX and CC to the environment with gcc 13
|
||||
ENV CXX=/usr/bin/g++-13
|
||||
ENV CC=/usr/bin/gcc-13
|
||||
|
||||
# link the last gcov version
|
||||
RUN rm /usr/bin/gcov
|
||||
RUN ln -s /usr/bin/gcov-13 /usr/bin/gcov
|
||||
|
||||
# change ownership of /opt/miniconda to vscode user
|
||||
RUN chown -R vscode:vscode /opt/miniconda
|
||||
|
||||
USER vscode
|
||||
RUN conda init
|
||||
RUN conda install -y -c conda-forge yaml pytorch
|
37
.devcontainer/devcontainer.json
Normal file
37
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,37 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/cpp
|
||||
{
|
||||
"name": "C++",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile"
|
||||
},
|
||||
// "features": {
|
||||
// "ghcr.io/devcontainers/features/conda:1": {}
|
||||
// }
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
"postCreateCommand": "make release && make debug && echo 'Done!'",
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
"customizations": {
|
||||
// Configure properties specific to VS Code.
|
||||
"vscode": {
|
||||
"settings": {},
|
||||
"extensions": [
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.cpptools-extension-pack",
|
||||
"ms-vscode.cpptools-themes",
|
||||
"ms-vscode.cmake-tools",
|
||||
"ms-azuretools.vscode-docker",
|
||||
"jbenden.c-cpp-flylint",
|
||||
"matepek.vscode-catch2-test-adapter",
|
||||
"GitHub.copilot"
|
||||
]
|
||||
}
|
||||
}
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
}
|
59
.devcontainer/reinstall-cmake.sh
Normal file
59
.devcontainer/reinstall-cmake.sh
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env bash
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information.
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
#
|
||||
set -e
|
||||
|
||||
CMAKE_VERSION=${1:-"none"}
|
||||
|
||||
if [ "${CMAKE_VERSION}" = "none" ]; then
|
||||
echo "No CMake version specified, skipping CMake reinstallation"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Cleanup temporary directory and associated files when exiting the script.
|
||||
cleanup() {
|
||||
EXIT_CODE=$?
|
||||
set +e
|
||||
if [[ -n "${TMP_DIR}" ]]; then
|
||||
echo "Executing cleanup of tmp files"
|
||||
rm -Rf "${TMP_DIR}"
|
||||
fi
|
||||
exit $EXIT_CODE
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
|
||||
echo "Installing CMake..."
|
||||
apt-get -y purge --auto-remove cmake
|
||||
mkdir -p /opt/cmake
|
||||
|
||||
architecture=$(dpkg --print-architecture)
|
||||
case "${architecture}" in
|
||||
arm64)
|
||||
ARCH=aarch64 ;;
|
||||
amd64)
|
||||
ARCH=x86_64 ;;
|
||||
*)
|
||||
echo "Unsupported architecture ${architecture}."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
CMAKE_BINARY_NAME="cmake-${CMAKE_VERSION}-linux-${ARCH}.sh"
|
||||
CMAKE_CHECKSUM_NAME="cmake-${CMAKE_VERSION}-SHA-256.txt"
|
||||
TMP_DIR=$(mktemp -d -t cmake-XXXXXXXXXX)
|
||||
|
||||
echo "${TMP_DIR}"
|
||||
cd "${TMP_DIR}"
|
||||
|
||||
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_BINARY_NAME}" -O
|
||||
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_CHECKSUM_NAME}" -O
|
||||
|
||||
sha256sum -c --ignore-missing "${CMAKE_CHECKSUM_NAME}"
|
||||
sh "${TMP_DIR}/${CMAKE_BINARY_NAME}" --prefix=/opt/cmake --skip-license
|
||||
|
||||
ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
|
||||
ln -s /opt/cmake/bin/ctest /usr/local/bin/ctest
|
12
.github/dependabot.yml
vendored
Normal file
12
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for more information:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
# https://containers.dev/guide/dependabot
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "devcontainers"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: weekly
|
12
.github/workflows/main.yml
vendored
12
.github/workflows/main.yml
vendored
@@ -1,12 +0,0 @@
|
||||
name: CI
|
||||
on: push
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- run: sudo apt-get install ninja-build cmake
|
||||
- run: ninja --version
|
||||
- run: cmake --version
|
||||
- run: g++ --version
|
6
.gitignore
vendored
6
.gitignore
vendored
@@ -39,4 +39,10 @@ cmake-build*/**
|
||||
puml/**
|
||||
.vscode/settings.json
|
||||
sample/build
|
||||
**/.DS_Store
|
||||
docs/manual
|
||||
docs/man3
|
||||
docs/man
|
||||
docs/Doxyfile
|
||||
.cache
|
||||
|
||||
|
21
.gitmodules
vendored
21
.gitmodules
vendored
@@ -1,13 +1,3 @@
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
main = main
|
||||
update = merge
|
||||
[submodule "lib/catch2"]
|
||||
path = lib/catch2
|
||||
main = v2.x
|
||||
update = merge
|
||||
url = https://github.com/catchorg/Catch2.git
|
||||
[submodule "lib/json"]
|
||||
path = lib/json
|
||||
url = https://github.com/nlohmann/json.git
|
||||
@@ -18,3 +8,14 @@
|
||||
url = https://github.com/rmontanana/folding
|
||||
main = main
|
||||
update = merge
|
||||
[submodule "tests/lib/catch2"]
|
||||
path = tests/lib/catch2
|
||||
url = https://github.com/catchorg/Catch2.git
|
||||
main = main
|
||||
update = merge
|
||||
[submodule "tests/lib/Files"]
|
||||
path = tests/lib/Files
|
||||
url = https://github.com/rmontanana/ArffFiles
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
4
.sonarlint/connectedMode.json
Normal file
4
.sonarlint/connectedMode.json
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"sonarCloudOrganization": "rmontanana",
|
||||
"projectKey": "rmontanana_BayesNet"
|
||||
}
|
8
.vscode/launch.json
vendored
8
.vscode/launch.json
vendored
@@ -5,7 +5,7 @@
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "sample",
|
||||
"program": "${workspaceFolder}/build_release/sample/bayesnet_sample",
|
||||
"program": "${workspaceFolder}/sample/build/bayesnet_sample",
|
||||
"args": [
|
||||
"${workspaceFolder}/tests/data/glass.arff"
|
||||
]
|
||||
@@ -14,11 +14,11 @@
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "test",
|
||||
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet",
|
||||
"program": "${workspaceFolder}/build_Debug/tests/TestBayesNet",
|
||||
"args": [
|
||||
"Block Update"
|
||||
"[XBAODE]"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build_debug/tests"
|
||||
"cwd": "${workspaceFolder}/build_Debug/tests"
|
||||
},
|
||||
{
|
||||
"name": "(gdb) Launch",
|
||||
|
77
CHANGELOG.md
77
CHANGELOG.md
@@ -5,6 +5,78 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [1.0.7] 2025-03-16
|
||||
|
||||
|
||||
### Added
|
||||
|
||||
- A new hyperparameter to the BoostAODE class, *alphablock*, to control the way α is computed, with the last model or with the ensmble built so far. Default value is *false*.
|
||||
- A new hyperparameter to the SPODE class, *parent*, to set the root node of the model. If no value is set the root parameter of the constructor is used.
|
||||
- A new hyperparameter to the TAN class, *parent*, to set the root node of the model. If not set the first feature is used as root.
|
||||
- A new model named XSPODE, an optimized for speed averaged one dependence estimator.
|
||||
- A new model named XSP2DE, an optimized for speed averaged two dependence estimator.
|
||||
- A new model named XBAODE, an optimized for speed BoostAODE model.
|
||||
- A new model named XBA2DE, an optimized for speed BoostA2DE model.
|
||||
|
||||
### Internal
|
||||
|
||||
- Optimize ComputeCPT method in the Node class.
|
||||
- Add methods getCount and getMaxCount to the CountingSemaphore class, returning the current count and the maximum count of threads respectively.
|
||||
|
||||
### Changed
|
||||
|
||||
- Hyperparameter *maxTolerance* in the BoostAODE class is now in [1, 6] range (it was in [1, 4] range before).
|
||||
|
||||
## [1.0.6] 2024-11-23
|
||||
|
||||
### Fixed
|
||||
|
||||
- Prevent existing edges to be added to the network in the `add_edge` method.
|
||||
- Don't allow to add nodes or edges on already fiited networks.
|
||||
- Number of threads spawned
|
||||
- Network class tests
|
||||
|
||||
### Added
|
||||
|
||||
- Library logo generated with <https://openart.ai> to README.md
|
||||
- Link to the coverage report in the README.md coverage label.
|
||||
- *convergence_best* hyperparameter to the BoostAODE class, to control the way the prior accuracy is computed if convergence is set. Default value is *false*.
|
||||
- SPnDE model.
|
||||
- A2DE model.
|
||||
- BoostA2DE model.
|
||||
- A2DE & SPnDE tests.
|
||||
- Add tests to reach 99% of coverage.
|
||||
- Add tests to check the correct version of the mdlp, folding and json libraries.
|
||||
- Library documentation generated with Doxygen.
|
||||
- Link to documentation in the README.md.
|
||||
- Three types of smoothing the Bayesian Network ORIGINAL, LAPLACE and CESTNIK.
|
||||
|
||||
### Internal
|
||||
|
||||
- Fixed doxygen optional dependency
|
||||
- Add env parallel variable to Makefile
|
||||
- Add CountingSemaphore class to manage the number of threads spawned.
|
||||
- Ignore CUDA language in CMake CodeCoverage module.
|
||||
- Update mdlp library as a git submodule.
|
||||
- Create library ShuffleArffFile to limit the number of samples with a parameter and shuffle them.
|
||||
- Refactor catch2 library location to test/lib
|
||||
- Refactor loadDataset function in tests.
|
||||
- Remove conditionalEdgeWeights method in BayesMetrics.
|
||||
- Refactor Coverage Report generation.
|
||||
- Add devcontainer to work on apple silicon.
|
||||
- Change build cmake folder names to Debug & Release.
|
||||
- Add a Makefile target (doc) to generate the documentation.
|
||||
- Add a Makefile target (doc-install) to install the documentation.
|
||||
|
||||
### Libraries versions
|
||||
|
||||
- mdlp: 2.0.1
|
||||
- Folding: 1.1.0
|
||||
- json: 3.11
|
||||
- ArffFiles: 1.1.0
|
||||
|
||||
## [1.0.5] 2024-04-20
|
||||
|
||||
### Added
|
||||
@@ -25,6 +97,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy.
|
||||
- Default hyperparameter values in BoostAODE: bisection is true, maxTolerance is 3, convergence is true
|
||||
|
||||
### Removed
|
||||
|
||||
- The 'predict_single' hyperparameter from the BoostAODE class.
|
||||
- The 'repeatSparent' hyperparameter from the BoostAODE class.
|
||||
|
||||
## [1.0.4] 2024-03-06
|
||||
|
||||
### Added
|
||||
|
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(BayesNet
|
||||
VERSION 1.0.5
|
||||
VERSION 1.0.7
|
||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||
LANGUAGES CXX
|
||||
@@ -25,8 +25,12 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
|
||||
endif()
|
||||
|
||||
# Options
|
||||
# -------
|
||||
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
||||
@@ -45,11 +49,12 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(CODE_COVERAGE ON)
|
||||
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
|
||||
|
||||
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
message(STATUS "Languages=${LANGUAGES}")
|
||||
if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE("Code coverage enabled")
|
||||
MESSAGE(STATUS "Code coverage enabled")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
|
||||
@@ -59,21 +64,21 @@ endif (ENABLE_CLANG_TIDY)
|
||||
|
||||
# External libraries - dependencies of BayesNet
|
||||
# ---------------------------------------------
|
||||
|
||||
# include(FetchContent)
|
||||
add_git_submodule("lib/mdlp")
|
||||
add_git_submodule("lib/json")
|
||||
add_git_submodule("lib/mdlp")
|
||||
|
||||
# Subdirectories
|
||||
# --------------
|
||||
add_subdirectory(config)
|
||||
add_subdirectory(lib/Files)
|
||||
add_subdirectory(bayesnet)
|
||||
|
||||
# Testing
|
||||
# -------
|
||||
if (ENABLE_TESTING)
|
||||
MESSAGE("Testing enabled")
|
||||
add_git_submodule("lib/catch2")
|
||||
MESSAGE(STATUS "Testing enabled")
|
||||
add_subdirectory(tests/lib/catch2)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
endif (ENABLE_TESTING)
|
||||
@@ -86,3 +91,18 @@ install(TARGETS BayesNet
|
||||
CONFIGURATIONS Release)
|
||||
install(DIRECTORY bayesnet/ DESTINATION include/bayesnet FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h")
|
||||
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h DESTINATION include/bayesnet CONFIGURATIONS Release)
|
||||
|
||||
# Documentation
|
||||
# -------------
|
||||
find_package(Doxygen)
|
||||
if (Doxygen_FOUND)
|
||||
set(DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs)
|
||||
set(doxyfile_in ${DOC_DIR}/Doxyfile.in)
|
||||
set(doxyfile ${DOC_DIR}/Doxyfile)
|
||||
configure_file(${doxyfile_in} ${doxyfile} @ONLY)
|
||||
doxygen_add_docs(doxygen
|
||||
WORKING_DIRECTORY ${DOC_DIR}
|
||||
CONFIG_FILE ${doxyfile})
|
||||
else (Doxygen_FOUND)
|
||||
MESSAGE("* Doxygen not found")
|
||||
endif (Doxygen_FOUND)
|
||||
|
103
Makefile
103
Makefile
@@ -1,16 +1,22 @@
|
||||
SHELL := /bin/bash
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean debug release sample updatebadge
|
||||
.PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean debug release sample updatebadge doc doc-install
|
||||
|
||||
f_release = build_release
|
||||
f_debug = build_debug
|
||||
f_release = build_Release
|
||||
f_debug = build_Debug
|
||||
f_diagrams = diagrams
|
||||
app_targets = BayesNet
|
||||
test_targets = TestBayesNet
|
||||
clang-uml = clang-uml
|
||||
plantuml = plantuml
|
||||
lcov = lcov
|
||||
genhtml = genhtml
|
||||
dot = dot
|
||||
n_procs = -j 16
|
||||
docsrcdir = docs/manual
|
||||
mansrcdir = docs/man3
|
||||
mandestdir = /usr/local/share/man
|
||||
sed_command_link = 's/e">LCOV -/e"><a href="https:\/\/rmontanana.github.io\/bayesnet">Back to manual<\/a> LCOV -/g'
|
||||
sed_command_diagram = 's/Diagram"/Diagram" width="100%" height="100%" /g'
|
||||
|
||||
define ClearTests
|
||||
@for t in $(test_targets); do \
|
||||
@@ -37,7 +43,7 @@ setup: ## Install dependencies for tests and coverage
|
||||
fi
|
||||
@echo "* You should install plantuml & graphviz for the diagrams"
|
||||
|
||||
diagrams: ## Create an UML class diagram & depnendency of the project (diagrams/BayesNet.png)
|
||||
diagrams: ## Create an UML class diagram & dependency of the project (diagrams/BayesNet.png)
|
||||
@which $(plantuml) || (echo ">>> Please install plantuml"; exit 1)
|
||||
@which $(dot) || (echo ">>> Please install graphviz"; exit 1)
|
||||
@which $(clang-uml) || (echo ">>> Please install clang-uml"; exit 1)
|
||||
@@ -52,10 +58,10 @@ diagrams: ## Create an UML class diagram & depnendency of the project (diagrams/
|
||||
@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
|
||||
|
||||
buildd: ## Build the debug targets
|
||||
cmake --build $(f_debug) -t $(app_targets) $(n_procs)
|
||||
cmake --build $(f_debug) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
|
||||
|
||||
buildr: ## Build the release targets
|
||||
cmake --build $(f_release) -t $(app_targets) $(n_procs)
|
||||
cmake --build $(f_release) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
|
||||
|
||||
clean: ## Clean the tests info
|
||||
@echo ">>> Cleaning Debug BayesNet tests...";
|
||||
@@ -91,15 +97,23 @@ fname = "tests/data/iris.arff"
|
||||
sample: ## Build sample
|
||||
@echo ">>> Building Sample...";
|
||||
@if [ -d ./sample/build ]; then rm -rf ./sample/build; fi
|
||||
@cd sample && cmake -B build -S . && cmake --build build -t bayesnet_sample
|
||||
@cd sample && cmake -B build -S . -D CMAKE_BUILD_TYPE=Debug && cmake --build build -t bayesnet_sample
|
||||
sample/build/bayesnet_sample $(fname)
|
||||
@echo ">>> Done";
|
||||
|
||||
fname = "tests/data/iris.arff"
|
||||
sample2: ## Build sample2
|
||||
@echo ">>> Building Sample...";
|
||||
@if [ -d ./sample/build ]; then rm -rf ./sample/build; fi
|
||||
@cd sample && cmake -B build -S . -D CMAKE_BUILD_TYPE=Debug && cmake --build build -t bayesnet_sample_xspode
|
||||
sample/build/bayesnet_sample_xspode $(fname)
|
||||
@echo ">>> Done";
|
||||
|
||||
opt = ""
|
||||
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
|
||||
@echo ">>> Running BayesNet & Platform tests...";
|
||||
@echo ">>> Running BayesNet tests...";
|
||||
@$(MAKE) clean
|
||||
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
|
||||
@cmake --build $(f_debug) -t $(test_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
|
||||
@for t in $(test_targets); do \
|
||||
echo ">>> Running $$t...";\
|
||||
if [ -f $(f_debug)/tests/$$t ]; then \
|
||||
@@ -112,31 +126,70 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
|
||||
|
||||
coverage: ## Run tests and generate coverage report (build/index.html)
|
||||
@echo ">>> Building tests with coverage..."
|
||||
@$(MAKE) test
|
||||
@gcovr $(f_debug)/tests
|
||||
@echo ">>> Done";
|
||||
|
||||
viewcoverage: ## Run tests, generate coverage report and upload it to codecov (build/index.html)
|
||||
@echo ">>> Building tests with coverage..."
|
||||
@$(MAKE) coverage
|
||||
@which $(lcov) || (echo ">>ease install lcov"; exit 1)
|
||||
@if [ ! -f $(f_debug)/tests/coverage.info ] ; then $(MAKE) test ; fi
|
||||
@echo ">>> Building report..."
|
||||
@cd $(f_debug)/tests; \
|
||||
lcov --directory . --capture --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
genhtml coverage.info --output-directory coverage >/dev/null 2>&1;
|
||||
$(lcov) --directory CMakeFiles --capture --demangle-cpp --ignore-errors source,source --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'bayesnet/utils/loguru.*' --ignore-errors unused --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info '/opt/miniconda/*' --ignore-errors unused --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --summary coverage.info
|
||||
@$(MAKE) updatebadge
|
||||
@xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null
|
||||
@echo ">>> Done";
|
||||
|
||||
viewcoverage: ## View the html coverage report
|
||||
@which $(genhtml) >/dev/null || (echo ">>> Please install lcov (genhtml not found)"; exit 1)
|
||||
@if [ ! -d $(docsrcdir)/coverage ]; then mkdir -p $(docsrcdir)/coverage; fi
|
||||
@if [ ! -f $(f_debug)/tests/coverage.info ]; then \
|
||||
echo ">>> No coverage.info file found. Run make coverage first!"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@$(genhtml) $(f_debug)/tests/coverage.info --demangle-cpp --output-directory $(docsrcdir)/coverage --title "BayesNet Coverage Report" -s -k -f --legend >/dev/null 2>&1;
|
||||
@xdg-open $(docsrcdir)/coverage/index.html || open $(docsrcdir)/coverage/index.html 2>/dev/null
|
||||
@echo ">>> Done";
|
||||
|
||||
updatebadge: ## Update the coverage badge in README.md
|
||||
@which python || (echo ">>> Please install python"; exit 1)
|
||||
@if [ ! -f $(f_debug)/tests/coverage.info ]; then \
|
||||
echo ">>> No coverage.info file found. Run make coverage first!"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@echo ">>> Updating coverage badge..."
|
||||
@env python update_coverage.py $(f_debug)/tests
|
||||
@echo ">>> Done";
|
||||
|
||||
doc: ## Generate documentation
|
||||
@echo ">>> Generating documentation..."
|
||||
@cmake --build $(f_release) -t doxygen
|
||||
@cp -rp diagrams $(docsrcdir)
|
||||
@
|
||||
@if [ "$(shell uname)" = "Darwin" ]; then \
|
||||
sed -i "" $(sed_command_link) $(docsrcdir)/coverage/index.html ; \
|
||||
sed -i "" $(sed_command_diagram) $(docsrcdir)/index.html ; \
|
||||
else \
|
||||
sed -i $(sed_command_link) $(docsrcdir)/coverage/index.html ; \
|
||||
sed -i $(sed_command_diagram) $(docsrcdir)/index.html ; \
|
||||
fi
|
||||
@echo ">>> Done";
|
||||
|
||||
docdir = ""
|
||||
doc-install: ## Install documentation
|
||||
@echo ">>> Installing documentation..."
|
||||
@if [ "$(docdir)" = "" ]; then \
|
||||
echo "docdir parameter has to be set when calling doc-install, i.e. docdir=../bayesnet_help"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@if [ ! -d $(docdir) ]; then \
|
||||
@$(MAKE) doc; \
|
||||
fi
|
||||
@cp -rp $(docsrcdir)/* $(docdir)
|
||||
@sudo cp -rp $(mansrcdir) $(mandestdir)
|
||||
@echo ">>> Done";
|
||||
|
||||
help: ## Show help message
|
||||
@IFS=$$'\n' ; \
|
||||
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||
|
55
README.md
55
README.md
@@ -1,13 +1,16 @@
|
||||
# BayesNet
|
||||
# <img src="logo.png" alt="logo" width="50"/> BayesNet
|
||||
|
||||

|
||||
[](<https://opensource.org/licenses/MIT>)
|
||||

|
||||
[](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||

|
||||

|
||||
[](html/index.html)
|
||||
[](https://doi.org/10.5281/zenodo.14210344)
|
||||
|
||||
Bayesian Network Classifiers using libtorch from scratch
|
||||
Bayesian Network Classifiers library
|
||||
|
||||
## Dependencies
|
||||
|
||||
@@ -15,11 +18,17 @@ The only external dependency is [libtorch](https://pytorch.org/cppdocs/installin
|
||||
|
||||
```bash
|
||||
wget https://download.pytorch.org/libtorch/nightly/cpu/libtorch-shared-with-deps-latest.zip
|
||||
unzip libtorch-shared-with-deps-latest.zips
|
||||
unzip libtorch-shared-with-deps-latest.zip
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
### Getting the code
|
||||
|
||||
```bash
|
||||
git clone --recurse-submodules https://github.com/doctorado-ml/bayesnet
|
||||
```
|
||||
|
||||
### Release
|
||||
|
||||
```bash
|
||||
@@ -33,7 +42,13 @@ sudo make install
|
||||
```bash
|
||||
make debug
|
||||
make test
|
||||
```
|
||||
|
||||
### Coverage
|
||||
|
||||
```bash
|
||||
make coverage
|
||||
make viewcoverage
|
||||
```
|
||||
|
||||
### Sample app
|
||||
@@ -47,7 +62,37 @@ make sample fname=tests/data/glass.arff
|
||||
|
||||
## Models
|
||||
|
||||
### [BoostAODE](docs/BoostAODE.md)
|
||||
#### - TAN
|
||||
|
||||
#### - KDB
|
||||
|
||||
#### - SPODE
|
||||
|
||||
#### - SPnDE
|
||||
|
||||
#### - AODE
|
||||
|
||||
#### - A2DE
|
||||
|
||||
#### - [BoostAODE](docs/BoostAODE.md)
|
||||
|
||||
#### - BoostA2DE
|
||||
|
||||
### With Local Discretization
|
||||
|
||||
#### - TANLd
|
||||
|
||||
#### - KDBLd
|
||||
|
||||
#### - SPODELd
|
||||
|
||||
#### - AODELd
|
||||
|
||||
## Documentation
|
||||
|
||||
### [Manual](https://rmontanana.github.io/bayesnet/)
|
||||
|
||||
### [Coverage report](https://rmontanana.github.io/bayesnet/coverage/index.html)
|
||||
|
||||
## Diagrams
|
||||
|
||||
|
@@ -8,17 +8,19 @@
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "bayesnet/network/Network.h"
|
||||
|
||||
namespace bayesnet {
|
||||
enum status_t { NORMAL, WARNING, ERROR };
|
||||
class BaseClassifier {
|
||||
public:
|
||||
// X is nxm std::vector, y is nx1 std::vector
|
||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
// X is nxm tensor, y is nx1 tensor
|
||||
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0;
|
||||
virtual ~BaseClassifier() = default;
|
||||
// X is nxm std::vector, y is nx1 std::vector
|
||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
// X is nxm tensor, y is nx1 tensor
|
||||
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||
torch::Tensor virtual predict(torch::Tensor& X) = 0;
|
||||
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
|
||||
torch::Tensor virtual predict_proba(torch::Tensor& X) = 0;
|
||||
@@ -26,8 +28,8 @@ namespace bayesnet {
|
||||
status_t virtual getStatus() const = 0;
|
||||
float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0;
|
||||
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
|
||||
int virtual getNumberOfNodes()const = 0;
|
||||
int virtual getNumberOfEdges()const = 0;
|
||||
int virtual getNumberOfNodes() const = 0;
|
||||
int virtual getNumberOfEdges() const = 0;
|
||||
int virtual getNumberOfStates() const = 0;
|
||||
int virtual getClassNumStates() const = 0;
|
||||
std::vector<std::string> virtual show() const = 0;
|
||||
@@ -35,11 +37,13 @@ namespace bayesnet {
|
||||
virtual std::string getVersion() = 0;
|
||||
std::vector<std::string> virtual topological_order() = 0;
|
||||
std::vector<std::string> virtual getNotes() const = 0;
|
||||
std::string virtual dump_cpt()const = 0;
|
||||
std::string virtual dump_cpt() const = 0;
|
||||
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
||||
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
||||
protected:
|
||||
virtual void trainModel(const torch::Tensor& weights) = 0;
|
||||
virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||
std::vector<std::string> validHyperparameters;
|
||||
std::vector<std::string> notes; // Used to store messages occurred during the fit process
|
||||
status_t status = NORMAL;
|
||||
};
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
${BayesNet_SOURCE_DIR}/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/log
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${BayesNet_SOURCE_DIR}
|
||||
@@ -10,4 +10,4 @@ include_directories(
|
||||
file(GLOB_RECURSE Sources "*.cc")
|
||||
|
||||
add_library(BayesNet ${Sources})
|
||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
||||
target_link_libraries(BayesNet fimdlp "${TORCH_LIBRARIES}")
|
||||
|
@@ -10,8 +10,7 @@
|
||||
|
||||
namespace bayesnet {
|
||||
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
|
||||
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
||||
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
this->features = features;
|
||||
this->className = className;
|
||||
@@ -23,7 +22,7 @@ namespace bayesnet {
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
model.initialize();
|
||||
buildModel(weights);
|
||||
trainModel(weights);
|
||||
trainModel(weights, smoothing);
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
@@ -41,20 +40,20 @@ namespace bayesnet {
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
}
|
||||
void Classifier::trainModel(const torch::Tensor& weights)
|
||||
void Classifier::trainModel(const torch::Tensor& weights, Smoothing_t smoothing)
|
||||
{
|
||||
model.fit(dataset, weights, features, className, states);
|
||||
model.fit(dataset, weights, features, className, states, smoothing);
|
||||
}
|
||||
// X is nxm where n is the number of features and m the number of samples
|
||||
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
dataset = X;
|
||||
buildDataset(y);
|
||||
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
// X is nxm where n is the number of features and m the number of samples
|
||||
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
|
||||
for (int i = 0; i < X.size(); ++i) {
|
||||
@@ -63,18 +62,18 @@ namespace bayesnet {
|
||||
auto ytmp = torch::tensor(y, torch::kInt32);
|
||||
buildDataset(ytmp);
|
||||
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
this->dataset = dataset;
|
||||
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
this->dataset = dataset;
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
void Classifier::checkFitParameters()
|
||||
{
|
||||
|
@@ -8,7 +8,6 @@
|
||||
#define CLASSIFIER_H
|
||||
#include <torch/torch.h>
|
||||
#include "bayesnet/utils/BayesMetrics.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
#include "bayesnet/BaseClassifier.h"
|
||||
|
||||
namespace bayesnet {
|
||||
@@ -16,10 +15,10 @@ namespace bayesnet {
|
||||
public:
|
||||
Classifier(Network model);
|
||||
virtual ~Classifier() = default;
|
||||
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override;
|
||||
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void addNodes();
|
||||
int getNumberOfNodes() const override;
|
||||
int getNumberOfEdges() const override;
|
||||
@@ -47,14 +46,13 @@ namespace bayesnet {
|
||||
std::string className;
|
||||
std::map<std::string, std::vector<int>> states;
|
||||
torch::Tensor dataset; // (n+1)xm tensor
|
||||
status_t status = NORMAL;
|
||||
std::vector<std::string> notes; // Used to store messages occurred during the fit process
|
||||
void checkFitParameters();
|
||||
virtual void buildModel(const torch::Tensor& weights) = 0;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void buildDataset(torch::Tensor& y);
|
||||
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
||||
private:
|
||||
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
@@ -3,7 +3,7 @@
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "KDB.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@@ -7,15 +7,14 @@
|
||||
#ifndef KDB_H
|
||||
#define KDB_H
|
||||
#include <torch/torch.h>
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "Classifier.h"
|
||||
namespace bayesnet {
|
||||
class KDB : public Classifier {
|
||||
private:
|
||||
int k;
|
||||
float theta;
|
||||
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
|
||||
protected:
|
||||
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
public:
|
||||
explicit KDB(int k, float theta = 0.03);
|
||||
|
@@ -8,7 +8,7 @@
|
||||
|
||||
namespace bayesnet {
|
||||
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
|
||||
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
@@ -19,7 +19,7 @@ namespace bayesnet {
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
|
||||
KDB::fit(dataset, features, className, states);
|
||||
KDB::fit(dataset, features, className, states, smoothing);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
}
|
||||
|
@@ -15,7 +15,7 @@ namespace bayesnet {
|
||||
public:
|
||||
explicit KDBLd(int k);
|
||||
virtual ~KDBLd() = default;
|
||||
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
|
@@ -4,7 +4,6 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.h>
|
||||
#include "Proposal.h"
|
||||
|
||||
namespace bayesnet {
|
||||
@@ -54,8 +53,7 @@ namespace bayesnet {
|
||||
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
|
||||
}
|
||||
}
|
||||
auto arff = ArffFiles();
|
||||
auto yxv = arff.factorize(yJoinParents);
|
||||
auto yxv = factorize(yJoinParents);
|
||||
auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
|
||||
auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
|
||||
discretizers[feature]->fit(xvf, yxv);
|
||||
@@ -72,7 +70,7 @@ namespace bayesnet {
|
||||
states[pFeatures[index]] = xStates;
|
||||
}
|
||||
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
|
||||
model.fit(pDataset, weights, pFeatures, pClassName, states);
|
||||
model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::ORIGINAL);
|
||||
}
|
||||
return states;
|
||||
}
|
||||
@@ -113,4 +111,19 @@ namespace bayesnet {
|
||||
}
|
||||
return Xtd;
|
||||
}
|
||||
std::vector<int> Proposal::factorize(const std::vector<std::string>& labels_t)
|
||||
{
|
||||
std::vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
std::map<std::string, int> labelMap;
|
||||
int i = 0;
|
||||
for (const std::string& label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
bool allDigits = std::all_of(label.begin(), label.end(), ::isdigit);
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
||||
}
|
@@ -27,6 +27,7 @@ namespace bayesnet {
|
||||
torch::Tensor y; // y discrete nx1 tensor
|
||||
map<std::string, mdlp::CPPFImdlp*> discretizers;
|
||||
private:
|
||||
std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||
torch::Tensor& pDataset; // (n+1)xm tensor
|
||||
std::vector<std::string>& pFeatures;
|
||||
std::string& pClassName;
|
||||
|
@@ -8,14 +8,29 @@
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
SPODE::SPODE(int root) : Classifier(Network()), root(root) {}
|
||||
SPODE::SPODE(int root) : Classifier(Network()), root(root)
|
||||
{
|
||||
validHyperparameters = { "parent" };
|
||||
}
|
||||
|
||||
void SPODE::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("parent")) {
|
||||
root = hyperparameters["parent"];
|
||||
hyperparameters.erase("parent");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void SPODE::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
// 0. Add all nodes to the model
|
||||
addNodes();
|
||||
// 1. Add edges from the class node to all other nodes
|
||||
// 2. Add edges from the root node to all other nodes
|
||||
if (root >= static_cast<int>(features.size())) {
|
||||
throw std::invalid_argument("The parent node is not in the dataset");
|
||||
}
|
||||
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
|
||||
model.addEdge(className, features[i]);
|
||||
if (i != root) {
|
||||
|
@@ -10,14 +10,15 @@
|
||||
|
||||
namespace bayesnet {
|
||||
class SPODE : public Classifier {
|
||||
private:
|
||||
int root;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
public:
|
||||
explicit SPODE(int root);
|
||||
virtual ~SPODE() = default;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
private:
|
||||
int root;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -8,25 +8,25 @@
|
||||
|
||||
namespace bayesnet {
|
||||
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
|
||||
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
Xf = X_;
|
||||
y = y_;
|
||||
return commonFit(features_, className_, states_);
|
||||
return commonFit(features_, className_, states_, smoothing);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
if (!torch::is_floating_point(dataset)) {
|
||||
throw std::runtime_error("Dataset must be a floating point tensor");
|
||||
}
|
||||
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
|
||||
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
|
||||
return commonFit(features_, className_, states_);
|
||||
return commonFit(features_, className_, states_, smoothing);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
features = features_;
|
||||
className = className_;
|
||||
@@ -34,7 +34,7 @@ namespace bayesnet {
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
|
||||
SPODE::fit(dataset, features, className, states);
|
||||
SPODE::fit(dataset, features, className, states, smoothing);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
}
|
||||
|
@@ -14,10 +14,10 @@ namespace bayesnet {
|
||||
public:
|
||||
explicit SPODELd(int root);
|
||||
virtual ~SPODELd() = default;
|
||||
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states);
|
||||
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
|
||||
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
};
|
||||
|
38
bayesnet/classifiers/SPnDE.cc
Normal file
38
bayesnet/classifiers/SPnDE.cc
Normal file
@@ -0,0 +1,38 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "SPnDE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
SPnDE::SPnDE(std::vector<int> parents) : Classifier(Network()), parents(parents) {}
|
||||
|
||||
void SPnDE::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
// 0. Add all nodes to the model
|
||||
addNodes();
|
||||
std::vector<int> attributes;
|
||||
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
|
||||
if (std::find(parents.begin(), parents.end(), i) == parents.end()) {
|
||||
attributes.push_back(i);
|
||||
}
|
||||
}
|
||||
// 1. Add edges from the class node to all other nodes
|
||||
// 2. Add edges from the parents nodes to all other nodes
|
||||
for (const auto& attribute : attributes) {
|
||||
model.addEdge(className, features[attribute]);
|
||||
for (const auto& root : parents) {
|
||||
|
||||
model.addEdge(features[root], features[attribute]);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<std::string> SPnDE::graph(const std::string& name) const
|
||||
{
|
||||
return model.graph(name);
|
||||
}
|
||||
|
||||
}
|
26
bayesnet/classifiers/SPnDE.h
Normal file
26
bayesnet/classifiers/SPnDE.h
Normal file
@@ -0,0 +1,26 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef SPnDE_H
|
||||
#define SPnDE_H
|
||||
#include <vector>
|
||||
#include "Classifier.h"
|
||||
|
||||
namespace bayesnet {
|
||||
class SPnDE : public Classifier {
|
||||
public:
|
||||
explicit SPnDE(std::vector<int> parents);
|
||||
virtual ~SPnDE() = default;
|
||||
std::vector<std::string> graph(const std::string& name = "SPnDE") const override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
private:
|
||||
std::vector<int> parents;
|
||||
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -7,8 +7,20 @@
|
||||
#include "TAN.h"
|
||||
|
||||
namespace bayesnet {
|
||||
TAN::TAN() : Classifier(Network()) {}
|
||||
TAN::TAN() : Classifier(Network())
|
||||
{
|
||||
validHyperparameters = { "parent" };
|
||||
}
|
||||
|
||||
void TAN::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("parent")) {
|
||||
parent = hyperparameters["parent"];
|
||||
hyperparameters.erase("parent");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void TAN::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
// 0. Add all nodes to the model
|
||||
@@ -23,7 +35,10 @@ namespace bayesnet {
|
||||
mi.push_back({ i, mi_value });
|
||||
}
|
||||
sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;});
|
||||
auto root = mi[mi.size() - 1].first;
|
||||
auto root = parent == -1 ? mi[mi.size() - 1].first : parent;
|
||||
if (root >= static_cast<int>(features.size())) {
|
||||
throw std::invalid_argument("The parent node is not in the dataset");
|
||||
}
|
||||
// 2. Compute mutual information between each feature and the class
|
||||
auto weights_matrix = metrics.conditionalEdge(weights);
|
||||
// 3. Compute the maximum spanning tree
|
||||
|
@@ -9,13 +9,15 @@
|
||||
#include "Classifier.h"
|
||||
namespace bayesnet {
|
||||
class TAN : public Classifier {
|
||||
private:
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
public:
|
||||
TAN();
|
||||
virtual ~TAN() = default;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
std::vector<std::string> graph(const std::string& name = "TAN") const override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
private:
|
||||
int parent = -1;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -8,7 +8,7 @@
|
||||
|
||||
namespace bayesnet {
|
||||
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
|
||||
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
@@ -19,7 +19,7 @@ namespace bayesnet {
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
|
||||
TAN::fit(dataset, features, className, states);
|
||||
TAN::fit(dataset, features, className, states, smoothing);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
|
||||
|
@@ -15,10 +15,9 @@ namespace bayesnet {
|
||||
public:
|
||||
TANLd();
|
||||
virtual ~TANLd() = default;
|
||||
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
std::vector<std::string> graph(const std::string& name = "TAN") const override;
|
||||
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "TANLd") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
};
|
||||
}
|
||||
#endif // !TANLD_H
|
575
bayesnet/classifiers/XSP2DE.cc
Normal file
575
bayesnet/classifiers/XSP2DE.cc
Normal file
@@ -0,0 +1,575 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "XSP2DE.h"
|
||||
#include <pthread.h> // for pthread_setname_np on linux
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include "bayesnet/utils/TensorUtils.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
// --------------------------------------
|
||||
// Constructor
|
||||
// --------------------------------------
|
||||
XSp2de::XSp2de(int spIndex1, int spIndex2)
|
||||
: superParent1_{ spIndex1 }
|
||||
, superParent2_{ spIndex2 }
|
||||
, nFeatures_{0}
|
||||
, statesClass_{0}
|
||||
, alpha_{1.0}
|
||||
, initializer_{1.0}
|
||||
, semaphore_{ CountingSemaphore::getInstance() }
|
||||
, Classifier(Network())
|
||||
{
|
||||
validHyperparameters = { "parent1", "parent2" };
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// setHyperparameters
|
||||
// --------------------------------------
|
||||
void XSp2de::setHyperparameters(const nlohmann::json &hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("parent1")) {
|
||||
superParent1_ = hyperparameters["parent1"];
|
||||
hyperparameters.erase("parent1");
|
||||
}
|
||||
if (hyperparameters.contains("parent2")) {
|
||||
superParent2_ = hyperparameters["parent2"];
|
||||
hyperparameters.erase("parent2");
|
||||
}
|
||||
// Hand off anything else to base Classifier
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// fitx
|
||||
// --------------------------------------
|
||||
void XSp2de::fitx(torch::Tensor & X, torch::Tensor & y,
|
||||
torch::Tensor & weights_, const Smoothing_t smoothing)
|
||||
{
|
||||
m = X.size(1); // number of samples
|
||||
n = X.size(0); // number of features
|
||||
dataset = X;
|
||||
|
||||
// Build the dataset in your environment if needed:
|
||||
buildDataset(y);
|
||||
|
||||
// Construct the data structures needed for counting
|
||||
buildModel(weights_);
|
||||
|
||||
// Accumulate counts & convert to probabilities
|
||||
trainModel(weights_, smoothing);
|
||||
fitted = true;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// buildModel
|
||||
// --------------------------------------
|
||||
void XSp2de::buildModel(const torch::Tensor &weights)
|
||||
{
|
||||
nFeatures_ = n;
|
||||
|
||||
// Derive the number of states for each feature from the dataset
|
||||
// states_[f] = max value in dataset[f] + 1.
|
||||
states_.resize(nFeatures_);
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
// This is naive: we take max in feature f. You might adapt for real data.
|
||||
states_[f] = dataset[f].max().item<int>() + 1;
|
||||
}
|
||||
// Class states:
|
||||
statesClass_ = dataset[-1].max().item<int>() + 1;
|
||||
|
||||
// Initialize the class counts
|
||||
classCounts_.resize(statesClass_, 0.0);
|
||||
|
||||
// For sp1 -> p(sp1Val| c)
|
||||
sp1FeatureCounts_.resize(states_[superParent1_] * statesClass_, 0.0);
|
||||
|
||||
// For sp2 -> p(sp2Val| c)
|
||||
sp2FeatureCounts_.resize(states_[superParent2_] * statesClass_, 0.0);
|
||||
|
||||
// For child features, we store p(childVal | c, sp1Val, sp2Val).
|
||||
// childCounts_ will hold raw counts. We’ll gather them in one big vector.
|
||||
// We need an offset for each feature.
|
||||
childOffsets_.resize(nFeatures_, -1);
|
||||
|
||||
int totalSize = 0;
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent1_ || f == superParent2_) {
|
||||
// skip the superparents
|
||||
childOffsets_[f] = -1;
|
||||
continue;
|
||||
}
|
||||
childOffsets_[f] = totalSize;
|
||||
// block size for a single child f: states_[f] * statesClass_
|
||||
// * states_[superParent1_]
|
||||
// * states_[superParent2_].
|
||||
totalSize += (states_[f] * statesClass_
|
||||
* states_[superParent1_]
|
||||
* states_[superParent2_]);
|
||||
}
|
||||
childCounts_.resize(totalSize, 0.0);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// trainModel
|
||||
// --------------------------------------
|
||||
void XSp2de::trainModel(const torch::Tensor &weights,
|
||||
const bayesnet::Smoothing_t smoothing)
|
||||
{
|
||||
// Accumulate raw counts
|
||||
for (int i = 0; i < m; i++) {
|
||||
std::vector<int> instance(nFeatures_ + 1);
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
instance[f] = dataset[f][i].item<int>();
|
||||
}
|
||||
instance[nFeatures_] = dataset[-1][i].item<int>(); // class
|
||||
double w = weights[i].item<double>();
|
||||
addSample(instance, w);
|
||||
}
|
||||
|
||||
// Choose alpha based on smoothing:
|
||||
switch (smoothing) {
|
||||
case bayesnet::Smoothing_t::ORIGINAL:
|
||||
alpha_ = 1.0 / m;
|
||||
break;
|
||||
case bayesnet::Smoothing_t::LAPLACE:
|
||||
alpha_ = 1.0;
|
||||
break;
|
||||
default:
|
||||
alpha_ = 0.0; // no smoothing
|
||||
}
|
||||
|
||||
// Large initializer factor for numerical stability
|
||||
initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
|
||||
|
||||
// Convert raw counts to probabilities
|
||||
computeProbabilities();
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// addSample
|
||||
// --------------------------------------
|
||||
void XSp2de::addSample(const std::vector<int> &instance, double weight)
|
||||
{
|
||||
if (weight <= 0.0)
|
||||
return;
|
||||
|
||||
int c = instance.back();
|
||||
// increment classCounts
|
||||
classCounts_[c] += weight;
|
||||
|
||||
int sp1Val = instance[superParent1_];
|
||||
int sp2Val = instance[superParent2_];
|
||||
|
||||
// p(sp1|c)
|
||||
sp1FeatureCounts_[sp1Val * statesClass_ + c] += weight;
|
||||
|
||||
// p(sp2|c)
|
||||
sp2FeatureCounts_[sp2Val * statesClass_ + c] += weight;
|
||||
|
||||
// p(childVal| c, sp1Val, sp2Val)
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent1_ || f == superParent2_)
|
||||
continue;
|
||||
|
||||
int childVal = instance[f];
|
||||
int offset = childOffsets_[f];
|
||||
// block layout:
|
||||
// offset + (sp1Val*(states_[sp2_]* states_[f]* statesClass_))
|
||||
// + (sp2Val*(states_[f]* statesClass_))
|
||||
// + childVal*(statesClass_)
|
||||
// + c
|
||||
int blockSizeSp2 = states_[superParent2_]
|
||||
* states_[f]
|
||||
* statesClass_;
|
||||
int blockSizeChild = states_[f] * statesClass_;
|
||||
|
||||
int idx = offset
|
||||
+ sp1Val*blockSizeSp2
|
||||
+ sp2Val*blockSizeChild
|
||||
+ childVal*statesClass_
|
||||
+ c;
|
||||
childCounts_[idx] += weight;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// computeProbabilities
|
||||
// --------------------------------------
|
||||
void XSp2de::computeProbabilities()
|
||||
{
|
||||
double totalCount = std::accumulate(classCounts_.begin(),
|
||||
classCounts_.end(), 0.0);
|
||||
|
||||
// classPriors_
|
||||
classPriors_.resize(statesClass_, 0.0);
|
||||
if (totalCount <= 0.0) {
|
||||
// fallback => uniform
|
||||
double unif = 1.0 / static_cast<double>(statesClass_);
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
classPriors_[c] = unif;
|
||||
}
|
||||
} else {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
classPriors_[c] =
|
||||
(classCounts_[c] + alpha_)
|
||||
/ (totalCount + alpha_ * statesClass_);
|
||||
}
|
||||
}
|
||||
|
||||
// p(sp1Val| c)
|
||||
sp1FeatureProbs_.resize(sp1FeatureCounts_.size());
|
||||
int sp1Card = states_[superParent1_];
|
||||
for (int spVal = 0; spVal < sp1Card; spVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double denom = classCounts_[c] + alpha_ * sp1Card;
|
||||
double num = sp1FeatureCounts_[spVal * statesClass_ + c] + alpha_;
|
||||
sp1FeatureProbs_[spVal * statesClass_ + c] =
|
||||
(denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
|
||||
// p(sp2Val| c)
|
||||
sp2FeatureProbs_.resize(sp2FeatureCounts_.size());
|
||||
int sp2Card = states_[superParent2_];
|
||||
for (int spVal = 0; spVal < sp2Card; spVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double denom = classCounts_[c] + alpha_ * sp2Card;
|
||||
double num = sp2FeatureCounts_[spVal * statesClass_ + c] + alpha_;
|
||||
sp2FeatureProbs_[spVal * statesClass_ + c] =
|
||||
(denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
|
||||
// p(childVal| c, sp1Val, sp2Val)
|
||||
childProbs_.resize(childCounts_.size());
|
||||
int offset = 0;
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent1_ || f == superParent2_)
|
||||
continue;
|
||||
|
||||
int fCard = states_[f];
|
||||
int sp1Card_ = states_[superParent1_];
|
||||
int sp2Card_ = states_[superParent2_];
|
||||
int childBlockSizeSp2 = sp2Card_ * fCard * statesClass_;
|
||||
int childBlockSizeF = fCard * statesClass_;
|
||||
|
||||
int blockSize = fCard * sp1Card_ * sp2Card_ * statesClass_;
|
||||
for (int sp1Val = 0; sp1Val < sp1Card_; sp1Val++) {
|
||||
for (int sp2Val = 0; sp2Val < sp2Card_; sp2Val++) {
|
||||
for (int childVal = 0; childVal < fCard; childVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
// index in childCounts_
|
||||
int idx = offset
|
||||
+ sp1Val*childBlockSizeSp2
|
||||
+ sp2Val*childBlockSizeF
|
||||
+ childVal*statesClass_
|
||||
+ c;
|
||||
double num = childCounts_[idx] + alpha_;
|
||||
// denominator is the count of (sp1Val,sp2Val,c) plus alpha * fCard
|
||||
// We can find that by summing childVal dimension, but we already
|
||||
// have it in childCounts_[...] or we can re-check the superparent
|
||||
// counts if your approach is purely hierarchical.
|
||||
// Here we'll do it like the XSpode approach: sp1&sp2 are
|
||||
// conditionally independent given c, so denominators come from
|
||||
// summing the relevant block or we treat sp1,sp2 as "parents."
|
||||
// A simpler approach:
|
||||
double sumSp1Sp2C = 0.0;
|
||||
// sum over all childVal:
|
||||
for (int cv = 0; cv < fCard; cv++) {
|
||||
int idx2 = offset
|
||||
+ sp1Val*childBlockSizeSp2
|
||||
+ sp2Val*childBlockSizeF
|
||||
+ cv*statesClass_ + c;
|
||||
sumSp1Sp2C += childCounts_[idx2];
|
||||
}
|
||||
double denom = sumSp1Sp2C + alpha_ * fCard;
|
||||
childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
offset += blockSize;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict_proba (single instance)
|
||||
// --------------------------------------
|
||||
std::vector<double> XSp2de::predict_proba(const std::vector<int> &instance) const
|
||||
{
|
||||
if (!fitted) {
|
||||
throw std::logic_error(CLASSIFIER_NOT_FITTED);
|
||||
}
|
||||
std::vector<double> probs(statesClass_, 0.0);
|
||||
|
||||
int sp1Val = instance[superParent1_];
|
||||
int sp2Val = instance[superParent2_];
|
||||
|
||||
// Start with p(c) * p(sp1Val| c) * p(sp2Val| c)
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double pC = classPriors_[c];
|
||||
double pSp1C = sp1FeatureProbs_[sp1Val * statesClass_ + c];
|
||||
double pSp2C = sp2FeatureProbs_[sp2Val * statesClass_ + c];
|
||||
probs[c] = pC * pSp1C * pSp2C * initializer_;
|
||||
}
|
||||
|
||||
// Multiply by each child feature f
|
||||
int offset = 0;
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent1_ || f == superParent2_)
|
||||
continue;
|
||||
|
||||
int valF = instance[f];
|
||||
int fCard = states_[f];
|
||||
int sp1Card = states_[superParent1_];
|
||||
int sp2Card = states_[superParent2_];
|
||||
int blockSizeSp2 = sp2Card * fCard * statesClass_;
|
||||
int blockSizeF = fCard * statesClass_;
|
||||
|
||||
// base index for childProbs_ for this child and sp1Val, sp2Val
|
||||
int base = offset
|
||||
+ sp1Val*blockSizeSp2
|
||||
+ sp2Val*blockSizeF
|
||||
+ valF*statesClass_;
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
probs[c] *= childProbs_[base + c];
|
||||
}
|
||||
offset += (fCard * sp1Card * sp2Card * statesClass_);
|
||||
}
|
||||
|
||||
// Normalize
|
||||
normalize(probs);
|
||||
return probs;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict_proba (batch)
|
||||
// --------------------------------------
|
||||
std::vector<std::vector<double>> XSp2de::predict_proba(std::vector<std::vector<int>> &test_data)
|
||||
{
|
||||
int test_size = test_data[0].size(); // each feature is test_data[f], size = #samples
|
||||
int sample_size = test_data.size(); // = nFeatures_
|
||||
std::vector<std::vector<double>> probabilities(
|
||||
test_size, std::vector<double>(statesClass_, 0.0));
|
||||
|
||||
// same concurrency approach
|
||||
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
|
||||
std::vector<std::thread> threads;
|
||||
|
||||
auto worker = [&](const std::vector<std::vector<int>> &samples,
|
||||
int begin,
|
||||
int chunk,
|
||||
int sample_size,
|
||||
std::vector<std::vector<double>> &predictions) {
|
||||
std::string threadName =
|
||||
"XSp2de-" + std::to_string(begin) + "-" + std::to_string(chunk);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
|
||||
std::vector<int> instance(sample_size);
|
||||
for (int sample = begin; sample < begin + chunk; ++sample) {
|
||||
for (int feature = 0; feature < sample_size; ++feature) {
|
||||
instance[feature] = samples[feature][sample];
|
||||
}
|
||||
predictions[sample] = predict_proba(instance);
|
||||
}
|
||||
semaphore_.release();
|
||||
};
|
||||
|
||||
for (int begin = 0; begin < test_size; begin += chunk_size) {
|
||||
int chunk = std::min(chunk_size, test_size - begin);
|
||||
semaphore_.acquire();
|
||||
threads.emplace_back(worker, test_data, begin, chunk, sample_size,
|
||||
std::ref(probabilities));
|
||||
}
|
||||
for (auto &th : threads) {
|
||||
th.join();
|
||||
}
|
||||
return probabilities;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict (single instance)
|
||||
// --------------------------------------
|
||||
int XSp2de::predict(const std::vector<int> &instance) const
|
||||
{
|
||||
auto p = predict_proba(instance);
|
||||
return static_cast<int>(
|
||||
std::distance(p.begin(), std::max_element(p.begin(), p.end()))
|
||||
);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict (batch of data)
|
||||
// --------------------------------------
|
||||
std::vector<int> XSp2de::predict(std::vector<std::vector<int>> &test_data)
|
||||
{
|
||||
auto probabilities = predict_proba(test_data);
|
||||
std::vector<int> predictions(probabilities.size(), 0);
|
||||
|
||||
for (size_t i = 0; i < probabilities.size(); i++) {
|
||||
predictions[i] = static_cast<int>(
|
||||
std::distance(probabilities[i].begin(),
|
||||
std::max_element(probabilities[i].begin(),
|
||||
probabilities[i].end()))
|
||||
);
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict (torch::Tensor version)
|
||||
// --------------------------------------
|
||||
torch::Tensor XSp2de::predict(torch::Tensor &X)
|
||||
{
|
||||
auto X_ = TensorUtils::to_matrix(X);
|
||||
auto result_v = predict(X_);
|
||||
return torch::tensor(result_v, torch::kInt32);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict_proba (torch::Tensor version)
|
||||
// --------------------------------------
|
||||
torch::Tensor XSp2de::predict_proba(torch::Tensor &X)
|
||||
{
|
||||
auto X_ = TensorUtils::to_matrix(X);
|
||||
auto result_v = predict_proba(X_);
|
||||
int n_samples = X.size(1);
|
||||
torch::Tensor result =
|
||||
torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
|
||||
for (int i = 0; i < (int)result_v.size(); ++i) {
|
||||
result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// score (torch::Tensor version)
|
||||
// --------------------------------------
|
||||
float XSp2de::score(torch::Tensor &X, torch::Tensor &y)
|
||||
{
|
||||
torch::Tensor y_pred = predict(X);
|
||||
return (y_pred == y).sum().item<float>() / y.size(0);
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// score (vector version)
|
||||
// --------------------------------------
|
||||
float XSp2de::score(std::vector<std::vector<int>> &X, std::vector<int> &y)
|
||||
{
|
||||
auto y_pred = predict(X);
|
||||
int correct = 0;
|
||||
for (size_t i = 0; i < y_pred.size(); ++i) {
|
||||
if (y_pred[i] == y[i]) {
|
||||
correct++;
|
||||
}
|
||||
}
|
||||
return static_cast<float>(correct) / static_cast<float>(y_pred.size());
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// Utility: normalize
|
||||
// --------------------------------------
|
||||
void XSp2de::normalize(std::vector<double> &v) const
|
||||
{
|
||||
double sum = 0.0;
|
||||
for (auto &val : v) {
|
||||
sum += val;
|
||||
}
|
||||
if (sum > 0.0) {
|
||||
for (auto &val : v) {
|
||||
val /= sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// to_string
|
||||
// --------------------------------------
|
||||
std::string XSp2de::to_string() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "----- XSp2de Model -----\n"
|
||||
<< "nFeatures_ = " << nFeatures_ << "\n"
|
||||
<< "superParent1_ = " << superParent1_ << "\n"
|
||||
<< "superParent2_ = " << superParent2_ << "\n"
|
||||
<< "statesClass_ = " << statesClass_ << "\n\n";
|
||||
|
||||
oss << "States: [";
|
||||
for (auto s : states_) oss << s << " ";
|
||||
oss << "]\n";
|
||||
|
||||
oss << "classCounts_:\n";
|
||||
for (auto v : classCounts_) oss << v << " ";
|
||||
oss << "\nclassPriors_:\n";
|
||||
for (auto v : classPriors_) oss << v << " ";
|
||||
oss << "\nsp1FeatureCounts_ (size=" << sp1FeatureCounts_.size() << ")\n";
|
||||
for (auto v : sp1FeatureCounts_) oss << v << " ";
|
||||
oss << "\nsp2FeatureCounts_ (size=" << sp2FeatureCounts_.size() << ")\n";
|
||||
for (auto v : sp2FeatureCounts_) oss << v << " ";
|
||||
oss << "\nchildCounts_ (size=" << childCounts_.size() << ")\n";
|
||||
for (auto v : childCounts_) oss << v << " ";
|
||||
|
||||
oss << "\nchildOffsets_:\n";
|
||||
for (auto c : childOffsets_) oss << c << " ";
|
||||
|
||||
oss << "\n----------------------------------------\n";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// Some introspection about the graph
|
||||
// --------------------------------------
|
||||
int XSp2de::getNumberOfNodes() const
|
||||
{
|
||||
// nFeatures + 1 class node
|
||||
return nFeatures_ + 1;
|
||||
}
|
||||
|
||||
int XSp2de::getClassNumStates() const
|
||||
{
|
||||
return statesClass_;
|
||||
}
|
||||
|
||||
int XSp2de::getNFeatures() const
|
||||
{
|
||||
return nFeatures_;
|
||||
}
|
||||
|
||||
int XSp2de::getNumberOfStates() const
|
||||
{
|
||||
// purely an example. Possibly you want to sum up actual
|
||||
// cardinalities or something else.
|
||||
return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
|
||||
}
|
||||
|
||||
int XSp2de::getNumberOfEdges() const
|
||||
{
|
||||
// In an SPNDE with n=2, for each feature we have edges from class, sp1, sp2.
|
||||
// So that’s 3*(nFeatures_) edges, minus the ones for the superparents themselves,
|
||||
// plus the edges from class->superparent1, class->superparent2.
|
||||
// For a quick approximation:
|
||||
// - class->sp1, class->sp2 => 2 edges
|
||||
// - class->child => (nFeatures -2) edges
|
||||
// - sp1->child, sp2->child => 2*(nFeatures -2) edges
|
||||
// total = 2 + (nFeatures-2) + 2*(nFeatures-2) = 2 + 3*(nFeatures-2)
|
||||
// = 3nFeatures - 4 (just an example).
|
||||
// You can adapt to your liking:
|
||||
return 3 * nFeatures_ - 4;
|
||||
}
|
||||
|
||||
} // namespace bayesnet
|
||||
|
75
bayesnet/classifiers/XSP2DE.h
Normal file
75
bayesnet/classifiers/XSP2DE.h
Normal file
@@ -0,0 +1,75 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef XSP2DE_H
|
||||
#define XSP2DE_H
|
||||
|
||||
#include "Classifier.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
class XSp2de : public Classifier {
|
||||
public:
|
||||
XSp2de(int spIndex1, int spIndex2);
|
||||
void setHyperparameters(const nlohmann::json &hyperparameters_) override;
|
||||
void fitx(torch::Tensor &X, torch::Tensor &y, torch::Tensor &weights_, const Smoothing_t smoothing);
|
||||
std::vector<double> predict_proba(const std::vector<int> &instance) const;
|
||||
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>> &test_data) override;
|
||||
int predict(const std::vector<int> &instance) const;
|
||||
std::vector<int> predict(std::vector<std::vector<int>> &test_data) override;
|
||||
torch::Tensor predict(torch::Tensor &X) override;
|
||||
torch::Tensor predict_proba(torch::Tensor &X) override;
|
||||
|
||||
float score(torch::Tensor &X, torch::Tensor &y) override;
|
||||
float score(std::vector<std::vector<int>> &X, std::vector<int> &y) override;
|
||||
std::string to_string() const;
|
||||
std::vector<std::string> graph(const std::string &title) const override {
|
||||
return std::vector<std::string>({title});
|
||||
}
|
||||
|
||||
int getNumberOfNodes() const override;
|
||||
int getNumberOfEdges() const override;
|
||||
int getNFeatures() const;
|
||||
int getClassNumStates() const override;
|
||||
int getNumberOfStates() const override;
|
||||
|
||||
protected:
|
||||
void buildModel(const torch::Tensor &weights) override;
|
||||
void trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) override;
|
||||
|
||||
private:
|
||||
void addSample(const std::vector<int> &instance, double weight);
|
||||
void normalize(std::vector<double> &v) const;
|
||||
void computeProbabilities();
|
||||
|
||||
int superParent1_;
|
||||
int superParent2_;
|
||||
int nFeatures_;
|
||||
int statesClass_;
|
||||
double alpha_;
|
||||
double initializer_;
|
||||
|
||||
std::vector<int> states_;
|
||||
std::vector<double> classCounts_;
|
||||
std::vector<double> classPriors_;
|
||||
std::vector<double> sp1FeatureCounts_, sp1FeatureProbs_;
|
||||
std::vector<double> sp2FeatureCounts_, sp2FeatureProbs_;
|
||||
// childOffsets_[f] will be the offset into childCounts_ for feature f.
|
||||
// If f is either superParent1 or superParent2, childOffsets_[f] = -1
|
||||
std::vector<int> childOffsets_;
|
||||
// For each child f, we store p(x_f | c, sp1Val, sp2Val). We'll store the raw
|
||||
// counts in childCounts_, and the probabilities in childProbs_, with a
|
||||
// dimension block of size: states_[f]* statesClass_* states_[sp1]* states_[sp2].
|
||||
std::vector<double> childCounts_;
|
||||
std::vector<double> childProbs_;
|
||||
CountingSemaphore &semaphore_;
|
||||
};
|
||||
|
||||
} // namespace bayesnet
|
||||
#endif // XSP2DE_H
|
450
bayesnet/classifiers/XSPODE.cc
Normal file
450
bayesnet/classifiers/XSPODE.cc
Normal file
@@ -0,0 +1,450 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include "XSPODE.h"
|
||||
#include "bayesnet/utils/TensorUtils.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
// --------------------------------------
|
||||
// Constructor
|
||||
// --------------------------------------
|
||||
XSpode::XSpode(int spIndex)
|
||||
: superParent_{ spIndex }, nFeatures_{ 0 }, statesClass_{ 0 }, alpha_{ 1.0 },
|
||||
initializer_{ 1.0 }, semaphore_{ CountingSemaphore::getInstance() },
|
||||
Classifier(Network())
|
||||
{
|
||||
validHyperparameters = { "parent" };
|
||||
}
|
||||
|
||||
void XSpode::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("parent")) {
|
||||
superParent_ = hyperparameters["parent"];
|
||||
hyperparameters.erase("parent");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
|
||||
void XSpode::fitx(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
|
||||
{
|
||||
m = X.size(1);
|
||||
n = X.size(0);
|
||||
dataset = X;
|
||||
buildDataset(y);
|
||||
buildModel(weights_);
|
||||
trainModel(weights_, smoothing);
|
||||
fitted = true;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// trainModel
|
||||
// --------------------------------------
|
||||
// Initialize storage needed for the super-parent and child features counts and
|
||||
// probs.
|
||||
// --------------------------------------
|
||||
void XSpode::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
int numInstances = m;
|
||||
nFeatures_ = n;
|
||||
|
||||
// Derive the number of states for each feature and for the class.
|
||||
// (This is just one approach; adapt to match your environment.)
|
||||
// Here, we assume the user also gave us the total #states per feature in e.g.
|
||||
// statesMap. We'll simply reconstruct the integer states_ array. The last
|
||||
// entry is statesClass_.
|
||||
states_.resize(nFeatures_);
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
// Suppose you look up in “statesMap” by the feature name, or read directly
|
||||
// from X. We'll assume states_[f] = max value in X[f] + 1.
|
||||
states_[f] = dataset[f].max().item<int>() + 1;
|
||||
}
|
||||
// For the class: states_.back() = max(y)+1
|
||||
statesClass_ = dataset[-1].max().item<int>() + 1;
|
||||
|
||||
// Initialize counts
|
||||
classCounts_.resize(statesClass_, 0.0);
|
||||
// p(x_sp = spVal | c)
|
||||
// We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].
|
||||
spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0);
|
||||
|
||||
// For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of
|
||||
// childCounts_. childCounts_ will be sized as sum_{child≠sp} (states_[child]
|
||||
// * statesClass_ * states_[sp]). We also need an offset for each child to
|
||||
// index into childCounts_.
|
||||
childOffsets_.resize(nFeatures_, -1);
|
||||
int totalSize = 0;
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent_)
|
||||
continue; // skip sp
|
||||
childOffsets_[f] = totalSize;
|
||||
// block size for this child's counts: states_[f] * statesClass_ *
|
||||
// states_[superParent_]
|
||||
totalSize += (states_[f] * statesClass_ * states_[superParent_]);
|
||||
}
|
||||
childCounts_.resize(totalSize, 0.0);
|
||||
}
|
||||
// --------------------------------------
|
||||
// buildModel
|
||||
// --------------------------------------
|
||||
//
|
||||
// We only store conditional probabilities for:
|
||||
// p(x_sp| c) (the super-parent feature)
|
||||
// p(x_child| c, x_sp) for all child ≠ sp
|
||||
//
|
||||
// --------------------------------------
|
||||
void XSpode::trainModel(const torch::Tensor& weights,
|
||||
const bayesnet::Smoothing_t smoothing)
|
||||
{
|
||||
// Accumulate raw counts
|
||||
for (int i = 0; i < m; i++) {
|
||||
std::vector<int> instance(nFeatures_ + 1);
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
instance[f] = dataset[f][i].item<int>();
|
||||
}
|
||||
instance[nFeatures_] = dataset[-1][i].item<int>();
|
||||
addSample(instance, weights[i].item<double>());
|
||||
}
|
||||
switch (smoothing) {
|
||||
case bayesnet::Smoothing_t::ORIGINAL:
|
||||
alpha_ = 1.0 / m;
|
||||
break;
|
||||
case bayesnet::Smoothing_t::LAPLACE:
|
||||
alpha_ = 1.0;
|
||||
break;
|
||||
default:
|
||||
alpha_ = 0.0; // No smoothing
|
||||
}
|
||||
initializer_ = std::numeric_limits<double>::max() /
|
||||
(nFeatures_ * nFeatures_); // for numerical stability
|
||||
// Convert raw counts to probabilities
|
||||
computeProbabilities();
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// addSample
|
||||
// --------------------------------------
|
||||
//
|
||||
// instance has size nFeatures_ + 1, with the class at the end.
|
||||
// We add 1 to the appropriate counters for each (c, superParentVal, childVal).
|
||||
//
|
||||
void XSpode::addSample(const std::vector<int>& instance, double weight)
|
||||
{
|
||||
if (weight <= 0.0)
|
||||
return;
|
||||
|
||||
int c = instance.back();
|
||||
// (A) increment classCounts
|
||||
classCounts_[c] += weight;
|
||||
|
||||
// (B) increment super-parent counts => p(x_sp | c)
|
||||
int spVal = instance[superParent_];
|
||||
spFeatureCounts_[spVal * statesClass_ + c] += weight;
|
||||
|
||||
// (C) increment child counts => p(childVal | c, x_sp)
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent_)
|
||||
continue;
|
||||
int childVal = instance[f];
|
||||
int offset = childOffsets_[f];
|
||||
// Compute index in childCounts_.
|
||||
// Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]
|
||||
int blockSize = states_[f] * statesClass_;
|
||||
int idx = offset + spVal * blockSize + childVal * statesClass_ + c;
|
||||
childCounts_[idx] += weight;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// computeProbabilities
|
||||
// --------------------------------------
|
||||
//
|
||||
// Once all samples are added in COUNTS mode, call this to:
|
||||
// p(c)
|
||||
// p(x_sp = spVal | c)
|
||||
// p(x_child = v | c, x_sp = s_sp)
|
||||
//
|
||||
// --------------------------------------
|
||||
void XSpode::computeProbabilities()
|
||||
{
|
||||
double totalCount =
|
||||
std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);
|
||||
|
||||
// p(c) => classPriors_
|
||||
classPriors_.resize(statesClass_, 0.0);
|
||||
if (totalCount <= 0.0) {
|
||||
// fallback => uniform
|
||||
double unif = 1.0 / static_cast<double>(statesClass_);
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
classPriors_[c] = unif;
|
||||
}
|
||||
} else {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
classPriors_[c] =
|
||||
(classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_);
|
||||
}
|
||||
}
|
||||
|
||||
// p(x_sp | c)
|
||||
spFeatureProbs_.resize(spFeatureCounts_.size());
|
||||
// denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ *
|
||||
// (#states of sp)
|
||||
int spCard = states_[superParent_];
|
||||
for (int spVal = 0; spVal < spCard; spVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double denom = classCounts_[c] + alpha_ * spCard;
|
||||
double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_;
|
||||
spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
|
||||
// p(x_child | c, x_sp)
|
||||
childProbs_.resize(childCounts_.size());
|
||||
for (int f = 0; f < nFeatures_; f++) {
|
||||
if (f == superParent_)
|
||||
continue;
|
||||
int offset = childOffsets_[f];
|
||||
int childCard = states_[f];
|
||||
|
||||
// For each spVal, c, childVal in childCounts_:
|
||||
for (int spVal = 0; spVal < spCard; spVal++) {
|
||||
for (int childVal = 0; childVal < childCard; childVal++) {
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
int idx = offset + spVal * (childCard * statesClass_) +
|
||||
childVal * statesClass_ + c;
|
||||
|
||||
double num = childCounts_[idx] + alpha_;
|
||||
// denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ *
|
||||
// (#states of child)
|
||||
double denom =
|
||||
spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * childCard;
|
||||
childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// predict_proba
|
||||
// --------------------------------------
|
||||
//
|
||||
// For a single instance x of dimension nFeatures_:
|
||||
// P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).
|
||||
//
|
||||
// --------------------------------------
|
||||
std::vector<double> XSpode::predict_proba(const std::vector<int>& instance) const
|
||||
{
|
||||
if (!fitted) {
|
||||
throw std::logic_error(CLASSIFIER_NOT_FITTED);
|
||||
}
|
||||
std::vector<double> probs(statesClass_, 0.0);
|
||||
// Multiply p(c) × p(x_sp | c)
|
||||
int spVal = instance[superParent_];
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
double pc = classPriors_[c];
|
||||
double pSpC = spFeatureProbs_[spVal * statesClass_ + c];
|
||||
probs[c] = pc * pSpC * initializer_;
|
||||
}
|
||||
|
||||
// Multiply by each child’s probability p(x_child | c, x_sp)
|
||||
for (int feature = 0; feature < nFeatures_; feature++) {
|
||||
if (feature == superParent_)
|
||||
continue; // skip sp
|
||||
int sf = instance[feature];
|
||||
int offset = childOffsets_[feature];
|
||||
int childCard = states_[feature]; // not used directly, but for clarity
|
||||
// Index into childProbs_ = offset + spVal*(childCard*statesClass_) +
|
||||
// childVal*statesClass_ + c
|
||||
int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_;
|
||||
for (int c = 0; c < statesClass_; c++) {
|
||||
probs[c] *= childProbs_[base + c];
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize
|
||||
normalize(probs);
|
||||
return probs;
|
||||
}
|
||||
std::vector<std::vector<double>> XSpode::predict_proba(std::vector<std::vector<int>>& test_data)
|
||||
{
|
||||
int test_size = test_data[0].size();
|
||||
int sample_size = test_data.size();
|
||||
auto probabilities = std::vector<std::vector<double>>(
|
||||
test_size, std::vector<double>(statesClass_));
|
||||
|
||||
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
|
||||
std::vector<std::thread> threads;
|
||||
auto worker = [&](const std::vector<std::vector<int>>& samples, int begin,
|
||||
int chunk, int sample_size,
|
||||
std::vector<std::vector<double>>& predictions) {
|
||||
std::string threadName =
|
||||
"(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
|
||||
std::vector<int> instance(sample_size);
|
||||
for (int sample = begin; sample < begin + chunk; ++sample) {
|
||||
for (int feature = 0; feature < sample_size; ++feature) {
|
||||
instance[feature] = samples[feature][sample];
|
||||
}
|
||||
predictions[sample] = predict_proba(instance);
|
||||
}
|
||||
semaphore_.release();
|
||||
};
|
||||
for (int begin = 0; begin < test_size; begin += chunk_size) {
|
||||
int chunk = std::min(chunk_size, test_size - begin);
|
||||
semaphore_.acquire();
|
||||
threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
return probabilities;
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// Utility: normalize
|
||||
// --------------------------------------
|
||||
void XSpode::normalize(std::vector<double>& v) const
|
||||
{
|
||||
double sum = 0.0;
|
||||
for (auto val : v) {
|
||||
sum += val;
|
||||
}
|
||||
if (sum <= 0.0) {
|
||||
return;
|
||||
}
|
||||
for (auto& val : v) {
|
||||
val /= sum;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------
|
||||
// representation of the model
|
||||
// --------------------------------------
|
||||
std::string XSpode::to_string() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "----- XSpode Model -----" << std::endl
|
||||
<< "nFeatures_ = " << nFeatures_ << std::endl
|
||||
<< "superParent_ = " << superParent_ << std::endl
|
||||
<< "statesClass_ = " << statesClass_ << std::endl
|
||||
<< std::endl;
|
||||
|
||||
oss << "States: [";
|
||||
for (int s : states_)
|
||||
oss << s << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "classCounts_: [";
|
||||
for (double c : classCounts_)
|
||||
oss << c << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "classPriors_: [";
|
||||
for (double c : classPriors_)
|
||||
oss << c << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << std::endl
|
||||
<< "[";
|
||||
for (double c : spFeatureCounts_)
|
||||
oss << c << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << std::endl
|
||||
<< "[";
|
||||
for (double c : spFeatureProbs_)
|
||||
oss << c << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "childCounts_: size = " << childCounts_.size() << std::endl << "[";
|
||||
for (double cc : childCounts_)
|
||||
oss << cc << " ";
|
||||
oss << "]" << std::endl;
|
||||
|
||||
for (double cp : childProbs_)
|
||||
oss << cp << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << "childOffsets_: [";
|
||||
for (int co : childOffsets_)
|
||||
oss << co << " ";
|
||||
oss << "]" << std::endl;
|
||||
oss << std::string(40,'-') << std::endl;
|
||||
return oss.str();
|
||||
}
|
||||
int XSpode::getNumberOfNodes() const { return nFeatures_ + 1; }
|
||||
int XSpode::getClassNumStates() const { return statesClass_; }
|
||||
int XSpode::getNFeatures() const { return nFeatures_; }
|
||||
int XSpode::getNumberOfStates() const
|
||||
{
|
||||
return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
|
||||
}
|
||||
int XSpode::getNumberOfEdges() const
|
||||
{
|
||||
return 2 * nFeatures_ + 1;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Predict overrides (classifier interface)
|
||||
// ------------------------------------------------------
|
||||
int XSpode::predict(const std::vector<int>& instance) const
|
||||
{
|
||||
auto p = predict_proba(instance);
|
||||
return static_cast<int>(std::distance(p.begin(), std::max_element(p.begin(), p.end())));
|
||||
}
|
||||
std::vector<int> XSpode::predict(std::vector<std::vector<int>>& test_data)
|
||||
{
|
||||
auto probabilities = predict_proba(test_data);
|
||||
std::vector<int> predictions(probabilities.size(), 0);
|
||||
|
||||
for (size_t i = 0; i < probabilities.size(); i++) {
|
||||
predictions[i] = std::distance(
|
||||
probabilities[i].begin(),
|
||||
std::max_element(probabilities[i].begin(), probabilities[i].end()));
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
torch::Tensor XSpode::predict(torch::Tensor& X)
|
||||
{
|
||||
auto X_ = TensorUtils::to_matrix(X);
|
||||
auto result_v = predict(X_);
|
||||
return torch::tensor(result_v, torch::kInt32);
|
||||
}
|
||||
torch::Tensor XSpode::predict_proba(torch::Tensor& X)
|
||||
{
|
||||
auto X_ = TensorUtils::to_matrix(X);
|
||||
auto result_v = predict_proba(X_);
|
||||
int n_samples = X.size(1);
|
||||
torch::Tensor result =
|
||||
torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
|
||||
for (int i = 0; i < result_v.size(); ++i) {
|
||||
result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
float XSpode::score(torch::Tensor& X, torch::Tensor& y)
|
||||
{
|
||||
torch::Tensor y_pred = predict(X);
|
||||
return (y_pred == y).sum().item<float>() / y.size(0);
|
||||
}
|
||||
float XSpode::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
|
||||
{
|
||||
auto y_pred = this->predict(X);
|
||||
int correct = 0;
|
||||
for (int i = 0; i < y_pred.size(); ++i) {
|
||||
if (y_pred[i] == y[i]) {
|
||||
correct++;
|
||||
}
|
||||
}
|
||||
return (double)correct / y_pred.size();
|
||||
}
|
||||
} // namespace bayesnet
|
76
bayesnet/classifiers/XSPODE.h
Normal file
76
bayesnet/classifiers/XSPODE.h
Normal file
@@ -0,0 +1,76 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef XSPODE_H
|
||||
#define XSPODE_H
|
||||
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
#include "Classifier.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
class XSpode : public Classifier {
|
||||
public:
|
||||
explicit XSpode(int spIndex);
|
||||
std::vector<double> predict_proba(const std::vector<int>& instance) const;
|
||||
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
|
||||
int predict(const std::vector<int>& instance) const;
|
||||
void normalize(std::vector<double>& v) const;
|
||||
std::string to_string() const;
|
||||
int getNFeatures() const;
|
||||
int getNumberOfNodes() const override;
|
||||
int getNumberOfEdges() const override;
|
||||
int getNumberOfStates() const override;
|
||||
int getClassNumStates() const override;
|
||||
std::vector<int>& getStates();
|
||||
std::vector<std::string> graph(const std::string& title) const override { return std::vector<std::string>({ title }); }
|
||||
void fitx(torch::Tensor& X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing);
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
|
||||
//
|
||||
// Classifier interface
|
||||
//
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
|
||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||
float score(torch::Tensor& X, torch::Tensor& y) override;
|
||||
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
||||
private:
|
||||
void addSample(const std::vector<int>& instance, double weight);
|
||||
void computeProbabilities();
|
||||
int superParent_;
|
||||
int nFeatures_;
|
||||
int statesClass_;
|
||||
std::vector<int> states_; // [states_feat0, ..., states_feat(N-1)] (class not included in this array)
|
||||
|
||||
// Class counts
|
||||
std::vector<double> classCounts_; // [c], accumulative
|
||||
std::vector<double> classPriors_; // [c], after normalization
|
||||
|
||||
// For p(x_sp = spVal | c)
|
||||
std::vector<double> spFeatureCounts_; // [spVal * statesClass_ + c]
|
||||
std::vector<double> spFeatureProbs_; // same shape, after normalization
|
||||
|
||||
// For p(x_child = childVal | x_sp = spVal, c)
|
||||
// childCounts_ is big enough to hold all child features except sp:
|
||||
// For each child f, we store childOffsets_[f] as the start index, then
|
||||
// childVal, spVal, c => the data.
|
||||
std::vector<double> childCounts_;
|
||||
std::vector<double> childProbs_;
|
||||
std::vector<int> childOffsets_;
|
||||
|
||||
double alpha_ = 1.0;
|
||||
double initializer_; // for numerical stability
|
||||
CountingSemaphore& semaphore_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // XSPODE_H
|
40
bayesnet/ensembles/A2DE.cc
Normal file
40
bayesnet/ensembles/A2DE.cc
Normal file
@@ -0,0 +1,40 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "A2DE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
A2DE::A2DE(bool predict_voting) : Ensemble(predict_voting)
|
||||
{
|
||||
validHyperparameters = { "predict_voting" };
|
||||
}
|
||||
void A2DE::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void A2DE::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
models.clear();
|
||||
significanceModels.clear();
|
||||
for (int i = 0; i < features.size() - 1; ++i) {
|
||||
for (int j = i + 1; j < features.size(); ++j) {
|
||||
auto model = std::make_unique<SPnDE>(std::vector<int>({ i, j }));
|
||||
models.push_back(std::move(model));
|
||||
}
|
||||
}
|
||||
n_models = static_cast<unsigned>(models.size());
|
||||
significanceModels = std::vector<double>(n_models, 1.0);
|
||||
}
|
||||
std::vector<std::string> A2DE::graph(const std::string& title) const
|
||||
{
|
||||
return Ensemble::graph(title);
|
||||
}
|
||||
}
|
22
bayesnet/ensembles/A2DE.h
Normal file
22
bayesnet/ensembles/A2DE.h
Normal file
@@ -0,0 +1,22 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef A2DE_H
|
||||
#define A2DE_H
|
||||
#include "bayesnet/classifiers/SPnDE.h"
|
||||
#include "Ensemble.h"
|
||||
namespace bayesnet {
|
||||
class A2DE : public Ensemble {
|
||||
public:
|
||||
A2DE(bool predict_voting = false);
|
||||
virtual ~A2DE() {};
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
std::vector<std::string> graph(const std::string& title = "A2DE") const override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -10,7 +10,7 @@ namespace bayesnet {
|
||||
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
|
||||
{
|
||||
}
|
||||
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
@@ -20,8 +20,9 @@ namespace bayesnet {
|
||||
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
|
||||
Ensemble::fit(dataset, features, className, states);
|
||||
// 1st we need to fit the model to build the normal AODE structure, Ensemble::fit
|
||||
// calls buildModel to initialize the base models
|
||||
Ensemble::fit(dataset, features, className, states, smoothing);
|
||||
return *this;
|
||||
|
||||
}
|
||||
@@ -34,10 +35,10 @@ namespace bayesnet {
|
||||
n_models = models.size();
|
||||
significanceModels = std::vector<double>(n_models, 1.0);
|
||||
}
|
||||
void AODELd::trainModel(const torch::Tensor& weights)
|
||||
void AODELd::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
for (const auto& model : models) {
|
||||
model->fit(Xf, y, features, className, states);
|
||||
model->fit(Xf, y, features, className, states, smoothing);
|
||||
}
|
||||
}
|
||||
std::vector<std::string> AODELd::graph(const std::string& name) const
|
||||
|
@@ -15,10 +15,10 @@ namespace bayesnet {
|
||||
public:
|
||||
AODELd(bool predict_voting = true);
|
||||
virtual ~AODELd() = default;
|
||||
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
|
||||
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
};
|
||||
}
|
||||
|
268
bayesnet/ensembles/Boost.cc
Normal file
268
bayesnet/ensembles/Boost.cc
Normal file
@@ -0,0 +1,268 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
#include "Boost.h"
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include <folding.hpp>
|
||||
|
||||
namespace bayesnet {
|
||||
Boost::Boost(bool predict_voting) : Ensemble(predict_voting) {
|
||||
validHyperparameters = {"alpha_block", "order", "convergence", "convergence_best", "bisection",
|
||||
"threshold", "maxTolerance", "predict_voting", "select_features", "block_update"};
|
||||
}
|
||||
void Boost::setHyperparameters(const nlohmann::json &hyperparameters_) {
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("order")) {
|
||||
std::vector<std::string> algos = {Orders.ASC, Orders.DESC, Orders.RAND};
|
||||
order_algorithm = hyperparameters["order"];
|
||||
if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC +
|
||||
", " + Orders.RAND + "]");
|
||||
}
|
||||
hyperparameters.erase("order");
|
||||
}
|
||||
if (hyperparameters.contains("alpha_block")) {
|
||||
alpha_block = hyperparameters["alpha_block"];
|
||||
hyperparameters.erase("alpha_block");
|
||||
}
|
||||
if (hyperparameters.contains("convergence")) {
|
||||
convergence = hyperparameters["convergence"];
|
||||
hyperparameters.erase("convergence");
|
||||
}
|
||||
if (hyperparameters.contains("convergence_best")) {
|
||||
convergence_best = hyperparameters["convergence_best"];
|
||||
hyperparameters.erase("convergence_best");
|
||||
}
|
||||
if (hyperparameters.contains("bisection")) {
|
||||
bisection = hyperparameters["bisection"];
|
||||
hyperparameters.erase("bisection");
|
||||
}
|
||||
if (hyperparameters.contains("threshold")) {
|
||||
threshold = hyperparameters["threshold"];
|
||||
hyperparameters.erase("threshold");
|
||||
}
|
||||
if (hyperparameters.contains("maxTolerance")) {
|
||||
maxTolerance = hyperparameters["maxTolerance"];
|
||||
if (maxTolerance < 1 || maxTolerance > 6)
|
||||
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 6]");
|
||||
hyperparameters.erase("maxTolerance");
|
||||
}
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
if (hyperparameters.contains("select_features")) {
|
||||
auto selectedAlgorithm = hyperparameters["select_features"];
|
||||
std::vector<std::string> algos = {SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF};
|
||||
selectFeatures = true;
|
||||
select_features_algorithm = selectedAlgorithm;
|
||||
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " +
|
||||
SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
|
||||
}
|
||||
hyperparameters.erase("select_features");
|
||||
}
|
||||
if (hyperparameters.contains("block_update")) {
|
||||
block_update = hyperparameters["block_update"];
|
||||
hyperparameters.erase("block_update");
|
||||
}
|
||||
if (block_update && alpha_block) {
|
||||
throw std::invalid_argument("alpha_block and block_update cannot be true at the same time");
|
||||
}
|
||||
if (block_update && !bisection) {
|
||||
throw std::invalid_argument("block_update needs bisection to be true");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void Boost::add_model(std::unique_ptr<Classifier> model, double significance) {
|
||||
models.push_back(std::move(model));
|
||||
n_models++;
|
||||
significanceModels.push_back(significance);
|
||||
}
|
||||
void Boost::remove_last_model() {
|
||||
models.pop_back();
|
||||
significanceModels.pop_back();
|
||||
n_models--;
|
||||
}
|
||||
void Boost::buildModel(const torch::Tensor &weights) {
|
||||
// Models shall be built in trainModel
|
||||
models.clear();
|
||||
significanceModels.clear();
|
||||
n_models = 0;
|
||||
// Prepare the validation dataset
|
||||
auto y_ = dataset.index({-1, "..."});
|
||||
if (convergence) {
|
||||
// Prepare train & validation sets from train data
|
||||
auto fold = folding::StratifiedKFold(5, y_, 271);
|
||||
auto [train, test] = fold.getFold(0);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
// Get train and validation sets
|
||||
X_train = dataset.index({torch::indexing::Slice(0, dataset.size(0) - 1), train_t});
|
||||
y_train = dataset.index({-1, train_t});
|
||||
X_test = dataset.index({torch::indexing::Slice(0, dataset.size(0) - 1), test_t});
|
||||
y_test = dataset.index({-1, test_t});
|
||||
dataset = X_train;
|
||||
m = X_train.size(1);
|
||||
auto n_classes = states.at(className).size();
|
||||
// Build dataset with train data
|
||||
buildDataset(y_train);
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
} else {
|
||||
// Use all data to train
|
||||
X_train = dataset.index({torch::indexing::Slice(0, dataset.size(0) - 1), "..."});
|
||||
y_train = y_;
|
||||
}
|
||||
}
|
||||
std::vector<int> Boost::featureSelection(torch::Tensor &weights_) {
|
||||
int maxFeatures = 0;
|
||||
if (select_features_algorithm == SelectFeatures.CFS) {
|
||||
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||
} else if (select_features_algorithm == SelectFeatures.IWSS) {
|
||||
if (threshold < 0 || threshold > 0.5) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]");
|
||||
}
|
||||
featureSelector =
|
||||
new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
} else if (select_features_algorithm == SelectFeatures.FCBF) {
|
||||
if (threshold < 1e-7 || threshold > 1) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]");
|
||||
}
|
||||
featureSelector =
|
||||
new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
}
|
||||
featureSelector->fit();
|
||||
auto featuresUsed = featureSelector->getFeatures();
|
||||
delete featureSelector;
|
||||
return featuresUsed;
|
||||
}
|
||||
std::tuple<torch::Tensor &, double, bool> Boost::update_weights(torch::Tensor &ytrain, torch::Tensor &ypred,
|
||||
torch::Tensor &weights) {
|
||||
bool terminate = false;
|
||||
double alpha_t = 0;
|
||||
auto mask_wrong = ypred != ytrain;
|
||||
auto mask_right = ypred == ytrain;
|
||||
auto masked_weights = weights * mask_wrong.to(weights.dtype());
|
||||
double epsilon_t = masked_weights.sum().item<double>();
|
||||
// std::cout << "epsilon_t: " << epsilon_t << " count wrong: " << mask_wrong.sum().item<int>() << " count right: "
|
||||
// << mask_right.sum().item<int>() << std::endl;
|
||||
if (epsilon_t > 0.5) {
|
||||
// Inverse the weights policy (plot ln(wt))
|
||||
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
|
||||
// learner is better than random guess" (Zhi-Hua Zhou, 2012)
|
||||
terminate = true;
|
||||
} else {
|
||||
double wt = (1 - epsilon_t) / epsilon_t;
|
||||
alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
|
||||
// Step 3.2: Update weights for next classifier
|
||||
// Step 3.2.1: Update weights of wrong samples
|
||||
weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights;
|
||||
// Step 3.2.2: Update weights of right samples
|
||||
weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights;
|
||||
// Step 3.3: Normalise the weights
|
||||
double totalWeights = torch::sum(weights).item<double>();
|
||||
weights = weights / totalWeights;
|
||||
}
|
||||
return {weights, alpha_t, terminate};
|
||||
}
|
||||
std::tuple<torch::Tensor &, double, bool> Boost::update_weights_block(int k, torch::Tensor &ytrain,
|
||||
torch::Tensor &weights) {
|
||||
/* Update Block algorithm
|
||||
k = # of models in block
|
||||
n_models = # of models in ensemble to make predictions
|
||||
n_models_bak = # models saved
|
||||
models = vector of models to make predictions
|
||||
models_bak = models not used to make predictions
|
||||
significances_bak = backup of significances vector
|
||||
|
||||
Case list
|
||||
A) k = 1, n_models = 1 => n = 0 , n_models = n + k
|
||||
B) k = 1, n_models = n + 1 => n_models = n + k
|
||||
C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
|
||||
D) k > 1, n_models = k => n = 0, n_models = n + k
|
||||
E) k > 1, n_models = k + n => n_models = n + k
|
||||
|
||||
A, D) n=0, k > 0, n_models == k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Don’t move any classifiers out of models
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Don’t restore any classifiers to models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
|
||||
B, C, E) n > 0, k > 0, n_models == n + k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Move first n classifiers to models_bak
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Insert classifiers in models_bak to be the first n models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
*/
|
||||
//
|
||||
// Make predict with only the last k models
|
||||
//
|
||||
std::unique_ptr<Classifier> model;
|
||||
std::vector<std::unique_ptr<Classifier>> models_bak;
|
||||
// 1. n_models_bak <- n_models 2. significances_bak <- significances
|
||||
auto significance_bak = significanceModels;
|
||||
auto n_models_bak = n_models;
|
||||
// 3. significances = vector(k, 1)
|
||||
significanceModels = std::vector<double>(k, 1.0);
|
||||
// 4. Move first n classifiers to models_bak
|
||||
// backup the first n_models - k models (if n_models == k, don't backup any)
|
||||
for (int i = 0; i < n_models - k; ++i) {
|
||||
model = std::move(models[0]);
|
||||
models.erase(models.begin());
|
||||
models_bak.push_back(std::move(model));
|
||||
}
|
||||
assert(models.size() == k);
|
||||
// 5. n_models <- k
|
||||
n_models = k;
|
||||
// 6. Make prediction, compute alpha, update weights
|
||||
auto ypred = predict(X_train);
|
||||
//
|
||||
// Update weights
|
||||
//
|
||||
double alpha_t;
|
||||
bool terminate;
|
||||
std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
|
||||
//
|
||||
// Restore the models if needed
|
||||
//
|
||||
// 7. Insert classifiers in models_bak to be the first n models
|
||||
// if n_models_bak == k, don't restore any, because none of them were moved
|
||||
if (k != n_models_bak) {
|
||||
// Insert in the same order as they were extracted
|
||||
int bak_size = models_bak.size();
|
||||
for (int i = 0; i < bak_size; ++i) {
|
||||
model = std::move(models_bak[bak_size - 1 - i]);
|
||||
models_bak.erase(models_bak.end() - 1);
|
||||
models.insert(models.begin(), std::move(model));
|
||||
}
|
||||
}
|
||||
// 8. significances <- significances_bak
|
||||
significanceModels = significance_bak;
|
||||
//
|
||||
// Update the significance of the last k models
|
||||
//
|
||||
// 9. Update last k significances
|
||||
for (int i = 0; i < k; ++i) {
|
||||
significanceModels[n_models_bak - k + i] = alpha_t;
|
||||
}
|
||||
// 10. n_models <- n_models_bak
|
||||
n_models = n_models_bak;
|
||||
return {weights, alpha_t, terminate};
|
||||
}
|
||||
} // namespace bayesnet
|
57
bayesnet/ensembles/Boost.h
Normal file
57
bayesnet/ensembles/Boost.h
Normal file
@@ -0,0 +1,57 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BOOST_H
|
||||
#define BOOST_H
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <torch/torch.h>
|
||||
#include "Ensemble.h"
|
||||
#include "bayesnet/feature_selection/FeatureSelect.h"
|
||||
namespace bayesnet {
|
||||
const struct {
|
||||
std::string CFS = "CFS";
|
||||
std::string FCBF = "FCBF";
|
||||
std::string IWSS = "IWSS";
|
||||
}SelectFeatures;
|
||||
const struct {
|
||||
std::string ASC = "asc";
|
||||
std::string DESC = "desc";
|
||||
std::string RAND = "rand";
|
||||
}Orders;
|
||||
class Boost : public Ensemble {
|
||||
public:
|
||||
explicit Boost(bool predict_voting = false);
|
||||
virtual ~Boost() override = default;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
protected:
|
||||
std::vector<int> featureSelection(torch::Tensor& weights_);
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights);
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
|
||||
void add_model(std::unique_ptr<Classifier> model, double significance);
|
||||
void remove_last_model();
|
||||
//
|
||||
// Attributes
|
||||
//
|
||||
torch::Tensor X_train, y_train, X_test, y_test;
|
||||
// Hyperparameters
|
||||
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
||||
int maxTolerance = 3;
|
||||
std::string order_algorithm = Orders.DESC; // order to process the KBest features asc, desc, rand
|
||||
bool convergence = true; //if true, stop when the model does not improve
|
||||
bool convergence_best = false; // wether to keep the best accuracy to the moment or the last accuracy as prior accuracy
|
||||
bool selectFeatures = false; // if true, use feature selection
|
||||
std::string select_features_algorithm; // Selected feature selection algorithm
|
||||
FeatureSelect* featureSelector = nullptr;
|
||||
double threshold = -1;
|
||||
bool block_update = false; // if true, use block update algorithm, only meaningful if bisection is true
|
||||
bool alpha_block = false; // if true, the alpha is computed with the ensemble built so far and the new model
|
||||
};
|
||||
}
|
||||
#endif
|
165
bayesnet/ensembles/BoostA2DE.cc
Normal file
165
bayesnet/ensembles/BoostA2DE.cc
Normal file
@@ -0,0 +1,165 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <limits.h>
|
||||
#include <tuple>
|
||||
#include <folding.hpp>
|
||||
#include "BoostA2DE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
BoostA2DE::BoostA2DE(bool predict_voting) : Boost(predict_voting)
|
||||
{
|
||||
}
|
||||
std::vector<int> BoostA2DE::initializeModels(const Smoothing_t smoothing)
|
||||
{
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
if (featuresSelected.size() < 2) {
|
||||
notes.push_back("No features selected in initialization");
|
||||
status = ERROR;
|
||||
return std::vector<int>();
|
||||
}
|
||||
for (int i = 0; i < featuresSelected.size() - 1; i++) {
|
||||
for (int j = i + 1; j < featuresSelected.size(); j++) {
|
||||
auto parents = { featuresSelected[i], featuresSelected[j] };
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPnDE>(parents);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
n_models++;
|
||||
}
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void BoostA2DE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
//
|
||||
// Logging setup
|
||||
//
|
||||
// loguru::set_thread_name("BoostA2DE");
|
||||
// loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
|
||||
// loguru::add_file("boostA2DE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
||||
|
||||
// Algorithm based on the adaboost algorithm for classification
|
||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||
fitted = true;
|
||||
double alpha_t = 0;
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
if (featuresUsed.size() == 0) {
|
||||
return;
|
||||
}
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
significanceModels[i] = alpha_t;
|
||||
}
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||
// Variables to control the accuracy finish condition
|
||||
double priorAccuracy = 0.0;
|
||||
double improvement = 1.0;
|
||||
double convergence_threshold = 1e-4;
|
||||
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
// Step 0: Set the finish condition
|
||||
// epsilon sub t > 0.5 => inverse the weights policy
|
||||
// validation error is not decreasing
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == Orders.ASC;
|
||||
std::mt19937 g{ 173 };
|
||||
std::vector<std::pair<int, int>> pairSelection;
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
pairSelection = metrics.SelectKPairs(weights_, featuresUsed, ascending, 0); // Get all the pairs sorted
|
||||
if (order_algorithm == Orders.RAND) {
|
||||
std::shuffle(pairSelection.begin(), pairSelection.end(), g);
|
||||
}
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
while (counter++ < k && pairSelection.size() > 0) {
|
||||
auto feature_pair = pairSelection[0];
|
||||
pairSelection.erase(pairSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPnDE>(std::vector<int>({ feature_pair.first, feature_pair.second }));
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
}
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
numItemsPack++;
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
// VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
|
||||
}
|
||||
if (block_update) {
|
||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||
}
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
} else {
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (improvement < convergence_threshold) {
|
||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
} else {
|
||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
if (convergence_best) {
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
} else {
|
||||
// Keep the last accuray obtained as the prior accuracy
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
||||
finished = finished || tolerance > maxTolerance || pairSelection.size() == 0;
|
||||
}
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||
for (int i = 0; i < numItemsPack; ++i) {
|
||||
significanceModels.pop_back();
|
||||
models.pop_back();
|
||||
n_models--;
|
||||
}
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (pairSelection.size() > 0) {
|
||||
notes.push_back("Pairs not used in train: " + std::to_string(pairSelection.size()));
|
||||
status = WARNING;
|
||||
}
|
||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||
}
|
||||
std::vector<std::string> BoostA2DE::graph(const std::string& title) const
|
||||
{
|
||||
return Ensemble::graph(title);
|
||||
}
|
||||
}
|
25
bayesnet/ensembles/BoostA2DE.h
Normal file
25
bayesnet/ensembles/BoostA2DE.h
Normal file
@@ -0,0 +1,25 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BOOSTA2DE_H
|
||||
#define BOOSTA2DE_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "bayesnet/classifiers/SPnDE.h"
|
||||
#include "Boost.h"
|
||||
namespace bayesnet {
|
||||
class BoostA2DE : public Boost {
|
||||
public:
|
||||
explicit BoostA2DE(bool predict_voting = false);
|
||||
virtual ~BoostA2DE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "BoostA2DE") const override;
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -4,264 +4,43 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <random>
|
||||
#include <set>
|
||||
#include <functional>
|
||||
#include <limits.h>
|
||||
#include <tuple>
|
||||
#include <folding.hpp>
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "BoostAODE.h"
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
#include <loguru.hpp>
|
||||
#include <loguru.cpp>
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
|
||||
BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting)
|
||||
{
|
||||
validHyperparameters = {
|
||||
"maxModels", "bisection", "order", "convergence", "threshold",
|
||||
"select_features", "maxTolerance", "predict_voting", "block_update"
|
||||
};
|
||||
|
||||
}
|
||||
void BoostAODE::buildModel(const torch::Tensor& weights)
|
||||
std::vector<int> BoostAODE::initializeModels(const Smoothing_t smoothing)
|
||||
{
|
||||
// Models shall be built in trainModel
|
||||
models.clear();
|
||||
significanceModels.clear();
|
||||
n_models = 0;
|
||||
// Prepare the validation dataset
|
||||
auto y_ = dataset.index({ -1, "..." });
|
||||
if (convergence) {
|
||||
// Prepare train & validation sets from train data
|
||||
auto fold = folding::StratifiedKFold(5, y_, 271);
|
||||
auto [train, test] = fold.getFold(0);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
// Get train and validation sets
|
||||
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
|
||||
y_train = dataset.index({ -1, train_t });
|
||||
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
|
||||
y_test = dataset.index({ -1, test_t });
|
||||
dataset = X_train;
|
||||
m = X_train.size(1);
|
||||
auto n_classes = states.at(className).size();
|
||||
// Build dataset with train data
|
||||
buildDataset(y_train);
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
} else {
|
||||
// Use all data to train
|
||||
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
||||
y_train = y_;
|
||||
}
|
||||
}
|
||||
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("order")) {
|
||||
std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
|
||||
order_algorithm = hyperparameters["order"];
|
||||
if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC + ", " + Orders.RAND + "]");
|
||||
}
|
||||
hyperparameters.erase("order");
|
||||
}
|
||||
if (hyperparameters.contains("convergence")) {
|
||||
convergence = hyperparameters["convergence"];
|
||||
hyperparameters.erase("convergence");
|
||||
}
|
||||
if (hyperparameters.contains("bisection")) {
|
||||
bisection = hyperparameters["bisection"];
|
||||
hyperparameters.erase("bisection");
|
||||
}
|
||||
if (hyperparameters.contains("threshold")) {
|
||||
threshold = hyperparameters["threshold"];
|
||||
hyperparameters.erase("threshold");
|
||||
}
|
||||
if (hyperparameters.contains("maxTolerance")) {
|
||||
maxTolerance = hyperparameters["maxTolerance"];
|
||||
if (maxTolerance < 1 || maxTolerance > 4)
|
||||
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]");
|
||||
hyperparameters.erase("maxTolerance");
|
||||
}
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
if (hyperparameters.contains("select_features")) {
|
||||
auto selectedAlgorithm = hyperparameters["select_features"];
|
||||
std::vector<std::string> algos = { SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF };
|
||||
selectFeatures = true;
|
||||
select_features_algorithm = selectedAlgorithm;
|
||||
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " + SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
|
||||
}
|
||||
hyperparameters.erase("select_features");
|
||||
}
|
||||
if (hyperparameters.contains("block_update")) {
|
||||
block_update = hyperparameters["block_update"];
|
||||
hyperparameters.erase("block_update");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
|
||||
{
|
||||
bool terminate = false;
|
||||
double alpha_t = 0;
|
||||
auto mask_wrong = ypred != ytrain;
|
||||
auto mask_right = ypred == ytrain;
|
||||
auto masked_weights = weights * mask_wrong.to(weights.dtype());
|
||||
double epsilon_t = masked_weights.sum().item<double>();
|
||||
if (epsilon_t > 0.5) {
|
||||
// Inverse the weights policy (plot ln(wt))
|
||||
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
|
||||
// learner is better than random guess" (Zhi-Hua Zhou, 2012)
|
||||
terminate = true;
|
||||
} else {
|
||||
double wt = (1 - epsilon_t) / epsilon_t;
|
||||
alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
|
||||
// Step 3.2: Update weights for next classifier
|
||||
// Step 3.2.1: Update weights of wrong samples
|
||||
weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights;
|
||||
// Step 3.2.2: Update weights of right samples
|
||||
weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights;
|
||||
// Step 3.3: Normalise the weights
|
||||
double totalWeights = torch::sum(weights).item<double>();
|
||||
weights = weights / totalWeights;
|
||||
}
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
|
||||
{
|
||||
/* Update Block algorithm
|
||||
k = # of models in block
|
||||
n_models = # of models in ensemble to make predictions
|
||||
n_models_bak = # models saved
|
||||
models = vector of models to make predictions
|
||||
models_bak = models not used to make predictions
|
||||
significances_bak = backup of significances vector
|
||||
|
||||
Case list
|
||||
A) k = 1, n_models = 1 => n = 0 , n_models = n + k
|
||||
B) k = 1, n_models = n + 1 => n_models = n + k
|
||||
C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
|
||||
D) k > 1, n_models = k => n = 0, n_models = n + k
|
||||
E) k > 1, n_models = k + n => n_models = n + k
|
||||
|
||||
A, D) n=0, k > 0, n_models == k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Don’t move any classifiers out of models
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Don’t restore any classifiers to models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
|
||||
B, C, E) n > 0, k > 0, n_models == n + k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Move first n classifiers to models_bak
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Insert classifiers in models_bak to be the first n models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
*/
|
||||
//
|
||||
// Make predict with only the last k models
|
||||
//
|
||||
std::unique_ptr<Classifier> model;
|
||||
std::vector<std::unique_ptr<Classifier>> models_bak;
|
||||
// 1. n_models_bak <- n_models 2. significances_bak <- significances
|
||||
auto significance_bak = significanceModels;
|
||||
auto n_models_bak = n_models;
|
||||
// 3. significances = vector(k, 1)
|
||||
significanceModels = std::vector<double>(k, 1.0);
|
||||
// 4. Move first n classifiers to models_bak
|
||||
// backup the first n_models - k models (if n_models == k, don't backup any)
|
||||
for (int i = 0; i < n_models - k; ++i) {
|
||||
model = std::move(models[0]);
|
||||
models.erase(models.begin());
|
||||
models_bak.push_back(std::move(model));
|
||||
}
|
||||
assert(models.size() == k);
|
||||
// 5. n_models <- k
|
||||
n_models = k;
|
||||
// 6. Make prediction, compute alpha, update weights
|
||||
auto ypred = predict(X_train);
|
||||
//
|
||||
// Update weights
|
||||
//
|
||||
double alpha_t;
|
||||
bool terminate;
|
||||
std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
|
||||
//
|
||||
// Restore the models if needed
|
||||
//
|
||||
// 7. Insert classifiers in models_bak to be the first n models
|
||||
// if n_models_bak == k, don't restore any, because none of them were moved
|
||||
if (k != n_models_bak) {
|
||||
// Insert in the same order as they were extracted
|
||||
int bak_size = models_bak.size();
|
||||
for (int i = 0; i < bak_size; ++i) {
|
||||
model = std::move(models_bak[bak_size - 1 - i]);
|
||||
models_bak.erase(models_bak.end() - 1);
|
||||
models.insert(models.begin(), std::move(model));
|
||||
}
|
||||
}
|
||||
// 8. significances <- significances_bak
|
||||
significanceModels = significance_bak;
|
||||
//
|
||||
// Update the significance of the last k models
|
||||
//
|
||||
// 9. Update last k significances
|
||||
for (int i = 0; i < k; ++i) {
|
||||
significanceModels[n_models_bak - k + i] = alpha_t;
|
||||
}
|
||||
// 10. n_models <- n_models_bak
|
||||
n_models = n_models_bak;
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::vector<int> BoostAODE::initializeModels()
|
||||
{
|
||||
std::vector<int> featuresUsed;
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
int maxFeatures = 0;
|
||||
if (select_features_algorithm == SelectFeatures.CFS) {
|
||||
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||
} else if (select_features_algorithm == SelectFeatures.IWSS) {
|
||||
if (threshold < 0 || threshold >0.5) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]");
|
||||
}
|
||||
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
} else if (select_features_algorithm == SelectFeatures.FCBF) {
|
||||
if (threshold < 1e-7 || threshold > 1) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]");
|
||||
}
|
||||
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
}
|
||||
featureSelector->fit();
|
||||
auto cfsFeatures = featureSelector->getFeatures();
|
||||
auto scores = featureSelector->getScores();
|
||||
for (const int& feature : cfsFeatures) {
|
||||
featuresUsed.push_back(feature);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
for (const int& feature : featuresSelected) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
n_models++;
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
delete featureSelector;
|
||||
return featuresUsed;
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
//
|
||||
// Logging setup
|
||||
//
|
||||
// loguru::set_thread_name("BoostAODE");
|
||||
// loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
|
||||
// loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
||||
|
||||
// Algorithm based on the adaboost algorithm for classification
|
||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||
fitted = true;
|
||||
@@ -269,14 +48,16 @@ namespace bayesnet {
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
n_models = 0;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels();
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
significanceModels[i] = alpha_t;
|
||||
significanceModels.push_back(alpha_t);
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models);
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
@@ -304,17 +85,36 @@ namespace bayesnet {
|
||||
{ return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
|
||||
end(featureSelection)
|
||||
);
|
||||
int k = pow(2, tolerance);
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
while (counter++ < k && featureSelection.size() > 0) {
|
||||
auto feature = featureSelection[0];
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
torch::Tensor ypred;
|
||||
if (alpha_block) {
|
||||
//
|
||||
// Compute the prediction with the current ensemble + model
|
||||
//
|
||||
// Add the model to the ensemble
|
||||
n_models++;
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1);
|
||||
// Compute the prediction
|
||||
ypred = predict(X_train);
|
||||
// Remove the model from the ensemble
|
||||
model = std::move(models.back());
|
||||
models.pop_back();
|
||||
significanceModels.pop_back();
|
||||
n_models--;
|
||||
} else {
|
||||
ypred = model->predict(X_train);
|
||||
}
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
}
|
||||
@@ -324,6 +124,7 @@ namespace bayesnet {
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
// VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size());
|
||||
}
|
||||
if (block_update) {
|
||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||
@@ -337,20 +138,28 @@ namespace bayesnet {
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (improvement < convergence_threshold) {
|
||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
} else {
|
||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
// priorAccuracy = accuracy;
|
||||
if (convergence_best) {
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
} else {
|
||||
// Keep the last accuray obtained as the prior accuracy
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||
}
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||
for (int i = 0; i < numItemsPack; ++i) {
|
||||
significanceModels.pop_back();
|
||||
models.pop_back();
|
||||
@@ -358,6 +167,7 @@ namespace bayesnet {
|
||||
}
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (featuresUsed.size() != features.size()) {
|
||||
|
@@ -6,44 +6,20 @@
|
||||
|
||||
#ifndef BOOSTAODE_H
|
||||
#define BOOSTAODE_H
|
||||
#include <map>
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
#include "bayesnet/feature_selection/FeatureSelect.h"
|
||||
#include "Ensemble.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "Boost.h"
|
||||
|
||||
namespace bayesnet {
|
||||
struct {
|
||||
std::string CFS = "CFS";
|
||||
std::string FCBF = "FCBF";
|
||||
std::string IWSS = "IWSS";
|
||||
}SelectFeatures;
|
||||
struct {
|
||||
std::string ASC = "asc";
|
||||
std::string DESC = "desc";
|
||||
std::string RAND = "rand";
|
||||
}Orders;
|
||||
class BoostAODE : public Ensemble {
|
||||
class BoostAODE : public Boost {
|
||||
public:
|
||||
BoostAODE(bool predict_voting = false);
|
||||
explicit BoostAODE(bool predict_voting = false);
|
||||
virtual ~BoostAODE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
|
||||
std::vector<int> initializeModels();
|
||||
torch::Tensor X_train, y_train, X_test, y_test;
|
||||
// Hyperparameters
|
||||
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
||||
int maxTolerance = 3;
|
||||
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
||||
bool convergence = true; //if true, stop when the model does not improve
|
||||
bool selectFeatures = false; // if true, use feature selection
|
||||
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
||||
FeatureSelect* featureSelector = nullptr;
|
||||
double threshold = -1;
|
||||
bool block_update = false;
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -3,22 +3,20 @@
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "Ensemble.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting)
|
||||
{
|
||||
|
||||
};
|
||||
const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted";
|
||||
void Ensemble::trainModel(const torch::Tensor& weights)
|
||||
void Ensemble::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
n_models = models.size();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
// fit with std::vectors
|
||||
models[i]->fit(dataset, features, className, states);
|
||||
models[i]->fit(dataset, features, className, states, smoothing);
|
||||
}
|
||||
}
|
||||
std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X)
|
||||
@@ -85,17 +83,10 @@ namespace bayesnet {
|
||||
{
|
||||
auto n_states = models[0]->getClassNumStates();
|
||||
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
y_pred += ypredict * significanceModels[i];
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
/*std::cout << "model " << i << " prediction: " << ypredict << " significance " << significanceModels[i] << std::endl;*/
|
||||
y_pred += ypredict * significanceModels[i];
|
||||
}
|
||||
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
|
||||
y_pred /= sum;
|
||||
@@ -105,23 +96,15 @@ namespace bayesnet {
|
||||
{
|
||||
auto n_states = models[0]->getClassNumStates();
|
||||
std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0));
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
assert(ypredict.size() == y_pred.size());
|
||||
assert(ypredict[0].size() == y_pred[0].size());
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
// Multiply each prediction by the significance of the model and then add it to the final prediction
|
||||
for (auto j = 0; j < ypredict.size(); ++j) {
|
||||
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
|
||||
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
|
||||
}
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
assert(ypredict.size() == y_pred.size());
|
||||
assert(ypredict[0].size() == y_pred[0].size());
|
||||
// Multiply each prediction by the significance of the model and then add it to the final prediction
|
||||
for (auto j = 0; j < ypredict.size(); ++j) {
|
||||
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
|
||||
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
|
||||
}
|
||||
}
|
||||
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
|
||||
//Divide each element of the prediction by the sum of the significances
|
||||
@@ -141,17 +124,9 @@ namespace bayesnet {
|
||||
{
|
||||
// Build a m x n_models tensor with the predictions of each model
|
||||
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict(X);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
y_pred.index_put_({ "...", i }, ypredict);
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
auto ypredict = models[i]->predict(X);
|
||||
y_pred.index_put_({ "...", i }, ypredict);
|
||||
}
|
||||
return voting(y_pred);
|
||||
}
|
||||
|
@@ -33,9 +33,15 @@ namespace bayesnet {
|
||||
}
|
||||
std::string dump_cpt() const override
|
||||
{
|
||||
return "";
|
||||
std::string output;
|
||||
for (auto& model : models) {
|
||||
output += model->dump_cpt();
|
||||
output += std::string(80, '-') + "\n";
|
||||
}
|
||||
return output;
|
||||
}
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
torch::Tensor predict_average_voting(torch::Tensor& X);
|
||||
std::vector<std::vector<double>> predict_average_voting(std::vector<std::vector<int>>& X);
|
||||
torch::Tensor predict_average_proba(torch::Tensor& X);
|
||||
@@ -43,10 +49,10 @@ namespace bayesnet {
|
||||
torch::Tensor compute_arg_max(torch::Tensor& X);
|
||||
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X);
|
||||
torch::Tensor voting(torch::Tensor& votes);
|
||||
// Attributes
|
||||
unsigned n_models;
|
||||
std::vector<std::unique_ptr<Classifier>> models;
|
||||
std::vector<double> significanceModels;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
bool predict_voting;
|
||||
};
|
||||
}
|
||||
|
168
bayesnet/ensembles/XBA2DE.cc
Normal file
168
bayesnet/ensembles/XBA2DE.cc
Normal file
@@ -0,0 +1,168 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <folding.hpp>
|
||||
#include <limits.h>
|
||||
#include "XBA2DE.h"
|
||||
#include "bayesnet/classifiers/XSP2DE.h"
|
||||
#include "bayesnet/utils/TensorUtils.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
XBA2DE::XBA2DE(bool predict_voting) : Boost(predict_voting) {}
|
||||
std::vector<int> XBA2DE::initializeModels(const Smoothing_t smoothing) {
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
if (featuresSelected.size() < 2) {
|
||||
notes.push_back("No features selected in initialization");
|
||||
status = ERROR;
|
||||
return std::vector<int>();
|
||||
}
|
||||
for (int i = 0; i < featuresSelected.size() - 1; i++) {
|
||||
for (int j = i + 1; j < featuresSelected.size(); j++) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<XSp2de>(featuresSelected[i], featuresSelected[j]);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
add_model(std::move(model), 1.0);
|
||||
}
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " +
|
||||
std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void XBA2DE::trainModel(const torch::Tensor &weights, const Smoothing_t smoothing) {
|
||||
//
|
||||
// Logging setup
|
||||
//
|
||||
// loguru::set_thread_name("XBA2DE");
|
||||
// loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
|
||||
// loguru::add_file("boostA2DE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
||||
|
||||
// Algorithm based on the adaboost algorithm for classification
|
||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||
X_train_ = TensorUtils::to_matrix(X_train);
|
||||
y_train_ = TensorUtils::to_vector<int>(y_train);
|
||||
if (convergence) {
|
||||
X_test_ = TensorUtils::to_matrix(X_test);
|
||||
y_test_ = TensorUtils::to_vector<int>(y_test);
|
||||
}
|
||||
fitted = true;
|
||||
double alpha_t = 0;
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
if (featuresUsed.size() == 0) {
|
||||
return;
|
||||
}
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
significanceModels[i] = alpha_t;
|
||||
}
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||
// Variables to control the accuracy finish condition
|
||||
double priorAccuracy = 0.0;
|
||||
double improvement = 1.0;
|
||||
double convergence_threshold = 1e-4;
|
||||
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
// Step 0: Set the finish condition
|
||||
// epsilon sub t > 0.5 => inverse the weights policy
|
||||
// validation error is not decreasing
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == Orders.ASC;
|
||||
std::mt19937 g{173};
|
||||
std::vector<std::pair<int, int>> pairSelection;
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
pairSelection = metrics.SelectKPairs(weights_, featuresUsed, ascending, 0); // Get all the pairs sorted
|
||||
if (order_algorithm == Orders.RAND) {
|
||||
std::shuffle(pairSelection.begin(), pairSelection.end(), g);
|
||||
}
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
while (counter++ < k && pairSelection.size() > 0) {
|
||||
auto feature_pair = pairSelection[0];
|
||||
pairSelection.erase(pairSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<XSp2de>(feature_pair.first, feature_pair.second);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
}
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
numItemsPack++;
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
// VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models,
|
||||
// featuresUsed.size());
|
||||
}
|
||||
if (block_update) {
|
||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||
}
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
} else {
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (improvement < convergence_threshold) {
|
||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f
|
||||
// current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
} else {
|
||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f
|
||||
// prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
if (convergence_best) {
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
} else {
|
||||
// Keep the last accuray obtained as the prior accuracy
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(),
|
||||
// features.size());
|
||||
finished = finished || tolerance > maxTolerance || pairSelection.size() == 0;
|
||||
}
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||
for (int i = 0; i < numItemsPack; ++i) {
|
||||
significanceModels.pop_back();
|
||||
models.pop_back();
|
||||
n_models--;
|
||||
}
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d",
|
||||
// n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (pairSelection.size() > 0) {
|
||||
notes.push_back("Pairs not used in train: " + std::to_string(pairSelection.size()));
|
||||
status = WARNING;
|
||||
}
|
||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||
}
|
||||
std::vector<std::string> XBA2DE::graph(const std::string &title) const { return Ensemble::graph(title); }
|
||||
} // namespace bayesnet
|
28
bayesnet/ensembles/XBA2DE.h
Normal file
28
bayesnet/ensembles/XBA2DE.h
Normal file
@@ -0,0 +1,28 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef XBA2DE_H
|
||||
#define XBA2DE_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "Boost.h"
|
||||
namespace bayesnet {
|
||||
class XBA2DE : public Boost {
|
||||
public:
|
||||
explicit XBA2DE(bool predict_voting = false);
|
||||
virtual ~XBA2DE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "XBA2DE") const override;
|
||||
std::string getVersion() override { return version; };
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
std::vector<std::vector<int>> X_train_, X_test_;
|
||||
std::vector<int> y_train_, y_test_;
|
||||
std::string version = "0.9.7";
|
||||
};
|
||||
}
|
||||
#endif
|
184
bayesnet/ensembles/XBAODE.cc
Normal file
184
bayesnet/ensembles/XBAODE.cc
Normal file
@@ -0,0 +1,184 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
#include "XBAODE.h"
|
||||
#include "bayesnet/classifiers/XSPODE.h"
|
||||
#include "bayesnet/utils/TensorUtils.h"
|
||||
#include <limits.h>
|
||||
#include <random>
|
||||
#include <tuple>
|
||||
|
||||
namespace bayesnet {
|
||||
XBAODE::XBAODE() : Boost(false) {
|
||||
validHyperparameters = {"alpha_block", "order", "convergence", "convergence_best", "bisection",
|
||||
"threshold", "maxTolerance", "predict_voting", "select_features"};
|
||||
}
|
||||
std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing) {
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
for (const int &feature : featuresSelected) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
add_model(std::move(model), 1.0);
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " +
|
||||
std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void XBAODE::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) {
|
||||
X_train_ = TensorUtils::to_matrix(X_train);
|
||||
y_train_ = TensorUtils::to_vector<int>(y_train);
|
||||
if (convergence) {
|
||||
X_test_ = TensorUtils::to_matrix(X_test);
|
||||
y_test_ = TensorUtils::to_vector<int>(y_test);
|
||||
}
|
||||
fitted = true;
|
||||
double alpha_t;
|
||||
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
n_models = 0;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
auto ypred = predict(X_train_);
|
||||
auto ypred_t = torch::tensor(ypred);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
|
||||
// Update significance of the models
|
||||
for (const int &feature : featuresUsed) {
|
||||
significanceModels.pop_back();
|
||||
}
|
||||
for (const int &feature : featuresUsed) {
|
||||
significanceModels.push_back(alpha_t);
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t,
|
||||
// n_models);
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||
// Variables to control the accuracy finish condition
|
||||
double priorAccuracy = 0.0;
|
||||
double improvement = 1.0;
|
||||
double convergence_threshold = 1e-4;
|
||||
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
// Step 0: Set the finish condition
|
||||
// epsilon sub t > 0.5 => inverse the weights_ policy
|
||||
// validation error is not decreasing
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == bayesnet::Orders.ASC;
|
||||
std::mt19937 g{173};
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||
if (order_algorithm == bayesnet::Orders.RAND) {
|
||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||
}
|
||||
// Remove used features
|
||||
featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(),
|
||||
[&](auto x) {
|
||||
return std::find(featuresUsed.begin(), featuresUsed.end(), x) !=
|
||||
featuresUsed.end();
|
||||
}),
|
||||
featureSelection.end());
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k,
|
||||
// featureSelection.size());
|
||||
while (counter++ < k && featureSelection.size() > 0) {
|
||||
auto feature = featureSelection[0];
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<XSpode>(feature);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
/*dynamic_cast<XSpode*>(model.get())->fitx(X_train, y_train, weights_,
|
||||
* smoothing); // using exclusive XSpode fit method*/
|
||||
// DEBUG
|
||||
/*std::cout << dynamic_cast<XSpode*>(model.get())->to_string() <<
|
||||
* std::endl;*/
|
||||
// DEBUG
|
||||
std::vector<int> ypred;
|
||||
if (alpha_block) {
|
||||
//
|
||||
// Compute the prediction with the current ensemble + model
|
||||
//
|
||||
// Add the model to the ensemble
|
||||
add_model(std::move(model), 1.0);
|
||||
// Compute the prediction
|
||||
ypred = predict(X_train_);
|
||||
model = std::move(models.back());
|
||||
// Remove the model from the ensemble
|
||||
remove_last_model();
|
||||
} else {
|
||||
ypred = model->predict(X_train_);
|
||||
}
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
auto ypred_t = torch::tensor(ypred);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
numItemsPack++;
|
||||
featuresUsed.push_back(feature);
|
||||
add_model(std::move(model), alpha_t);
|
||||
// VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d
|
||||
// featuresUsed: %zu", finished, numItemsPack, n_models,
|
||||
// featuresUsed.size());
|
||||
} // End of the pack
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
} else {
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (improvement < convergence_threshold) {
|
||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d
|
||||
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
|
||||
// numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
} else {
|
||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d
|
||||
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
|
||||
// numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
if (convergence_best) {
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
} else {
|
||||
// Keep the last accuray obtained as the prior accuracy
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size:
|
||||
// %zu", tolerance, featuresUsed.size(), features.size());
|
||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||
}
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated
|
||||
// of %d", numItemsPack, n_models);
|
||||
for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) {
|
||||
remove_last_model();
|
||||
}
|
||||
// VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features
|
||||
// used.", n_models, featuresUsed.size());
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated
|
||||
// n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (featuresUsed.size() != features.size()) {
|
||||
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " +
|
||||
std::to_string(features.size()));
|
||||
status = bayesnet::WARNING;
|
||||
}
|
||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||
return;
|
||||
}
|
||||
} // namespace bayesnet
|
27
bayesnet/ensembles/XBAODE.h
Normal file
27
bayesnet/ensembles/XBAODE.h
Normal file
@@ -0,0 +1,27 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef XBAODE_H
|
||||
#define XBAODE_H
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include "Boost.h"
|
||||
|
||||
namespace bayesnet {
|
||||
class XBAODE : public Boost {
|
||||
public:
|
||||
XBAODE();
|
||||
std::string getVersion() override { return version; };
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
std::vector<std::vector<int>> X_train_, X_test_;
|
||||
std::vector<int> y_train_, y_test_;
|
||||
std::string version = "0.9.7";
|
||||
};
|
||||
}
|
||||
#endif // XBAODE_H
|
@@ -5,20 +5,20 @@
|
||||
// ***************************************************************
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include "Network.h"
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
#include <pthread.h>
|
||||
#include <fstream>
|
||||
namespace bayesnet {
|
||||
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
Network::Network() : fitted{ false }, classNumStates{ 0 }
|
||||
{
|
||||
}
|
||||
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
{
|
||||
|
||||
}
|
||||
Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
|
||||
maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples)
|
||||
Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
|
||||
fitted(other.fitted), samples(other.samples)
|
||||
{
|
||||
if (samples.defined())
|
||||
samples = samples.clone();
|
||||
@@ -35,16 +35,15 @@ namespace bayesnet {
|
||||
nodes.clear();
|
||||
samples = torch::Tensor();
|
||||
}
|
||||
float Network::getMaxThreads() const
|
||||
{
|
||||
return maxThreads;
|
||||
}
|
||||
torch::Tensor& Network::getSamples()
|
||||
{
|
||||
return samples;
|
||||
}
|
||||
void Network::addNode(const std::string& name)
|
||||
{
|
||||
if (fitted) {
|
||||
throw std::invalid_argument("Cannot add node to a fitted network. Initialize first.");
|
||||
}
|
||||
if (name == "") {
|
||||
throw std::invalid_argument("Node name cannot be empty");
|
||||
}
|
||||
@@ -94,12 +93,21 @@ namespace bayesnet {
|
||||
}
|
||||
void Network::addEdge(const std::string& parent, const std::string& child)
|
||||
{
|
||||
if (fitted) {
|
||||
throw std::invalid_argument("Cannot add edge to a fitted network. Initialize first.");
|
||||
}
|
||||
if (nodes.find(parent) == nodes.end()) {
|
||||
throw std::invalid_argument("Parent node " + parent + " does not exist");
|
||||
}
|
||||
if (nodes.find(child) == nodes.end()) {
|
||||
throw std::invalid_argument("Child node " + child + " does not exist");
|
||||
}
|
||||
// Check if the edge is already in the graph
|
||||
for (auto& node : nodes[parent]->getChildren()) {
|
||||
if (node->getName() == child) {
|
||||
throw std::invalid_argument("Edge " + parent + " -> " + child + " already exists");
|
||||
}
|
||||
}
|
||||
// Temporarily add edge to check for cycles
|
||||
nodes[parent]->addChild(nodes[child].get());
|
||||
nodes[child]->addParent(nodes[parent].get());
|
||||
@@ -155,7 +163,7 @@ namespace bayesnet {
|
||||
classNumStates = nodes.at(className)->getNumStates();
|
||||
}
|
||||
// X comes in nxm, where n is the number of features and m the number of samples
|
||||
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
|
||||
this->className = className;
|
||||
@@ -164,17 +172,17 @@ namespace bayesnet {
|
||||
for (int i = 0; i < featureNames.size(); ++i) {
|
||||
auto row_feature = X.index({ i, "..." });
|
||||
}
|
||||
completeFit(states, weights);
|
||||
completeFit(states, weights, smoothing);
|
||||
}
|
||||
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
|
||||
this->className = className;
|
||||
this->samples = samples;
|
||||
completeFit(states, weights);
|
||||
completeFit(states, weights, smoothing);
|
||||
}
|
||||
// input_data comes in nxm, where n is the number of features and m the number of samples
|
||||
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
|
||||
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
|
||||
@@ -185,17 +193,43 @@ namespace bayesnet {
|
||||
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
|
||||
}
|
||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||
completeFit(states, weights);
|
||||
completeFit(states, weights, smoothing);
|
||||
}
|
||||
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
setStates(states);
|
||||
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
|
||||
std::vector<std::thread> threads;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
const double n_samples = static_cast<double>(samples.size(1));
|
||||
auto worker = [&](std::pair<const std::string, std::unique_ptr<Node>>& node, int i) {
|
||||
std::string threadName = "FitWorker-" + std::to_string(i);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
double numStates = static_cast<double>(node.second->getNumStates());
|
||||
double smoothing_factor;
|
||||
switch (smoothing) {
|
||||
case Smoothing_t::ORIGINAL:
|
||||
smoothing_factor = 1.0 / n_samples;
|
||||
break;
|
||||
case Smoothing_t::LAPLACE:
|
||||
smoothing_factor = 1.0;
|
||||
break;
|
||||
case Smoothing_t::CESTNIK:
|
||||
smoothing_factor = 1 / numStates;
|
||||
break;
|
||||
default:
|
||||
smoothing_factor = 0.0; // No smoothing
|
||||
}
|
||||
node.second->computeCPT(samples, features, smoothing_factor, weights);
|
||||
semaphore.release();
|
||||
};
|
||||
int i = 0;
|
||||
for (auto& node : nodes) {
|
||||
threads.emplace_back([this, &node, &weights]() {
|
||||
node.second->computeCPT(samples, features, laplaceSmoothing, weights);
|
||||
});
|
||||
semaphore.acquire();
|
||||
threads.emplace_back(worker, std::ref(node), i++);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
@@ -207,14 +241,38 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw std::logic_error("You must call fit() before calling predict()");
|
||||
}
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (samples.size(0) != features.size() - 1) {
|
||||
throw std::invalid_argument("(T) Sample size (" + std::to_string(samples.size(0)) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
torch::Tensor result;
|
||||
std::vector<std::thread> threads;
|
||||
std::mutex mtx;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
|
||||
for (int i = 0; i < samples.size(1); ++i) {
|
||||
const torch::Tensor sample = samples.index({ "...", i });
|
||||
auto worker = [&](const torch::Tensor& sample, int i) {
|
||||
std::string threadName = "PredictWorker-" + std::to_string(i);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
auto psample = predict_sample(sample);
|
||||
auto temp = torch::tensor(psample, torch::kFloat64);
|
||||
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
|
||||
result.index_put_({ i, "..." }, temp);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
result.index_put_({ i, "..." }, temp);
|
||||
}
|
||||
semaphore.release();
|
||||
};
|
||||
for (int i = 0; i < samples.size(1); ++i) {
|
||||
semaphore.acquire();
|
||||
const torch::Tensor sample = samples.index({ "...", i });
|
||||
threads.emplace_back(worker, sample, i);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
if (proba)
|
||||
return result;
|
||||
@@ -239,18 +297,38 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw std::logic_error("You must call fit() before calling predict()");
|
||||
}
|
||||
std::vector<int> predictions;
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (tsamples.size() != features.size() - 1) {
|
||||
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::vector<int> predictions(tsamples[0].size(), 0);
|
||||
std::vector<int> sample;
|
||||
std::vector<std::thread> threads;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
auto worker = [&](const std::vector<int>& sample, const int row, int& prediction) {
|
||||
std::string threadName = "(V)PWorker-" + std::to_string(row);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
auto classProbabilities = predict_sample(sample);
|
||||
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
|
||||
int predictedClass = distance(classProbabilities.begin(), maxElem);
|
||||
prediction = predictedClass;
|
||||
semaphore.release();
|
||||
};
|
||||
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||
sample.clear();
|
||||
for (int col = 0; col < tsamples.size(); ++col) {
|
||||
sample.push_back(tsamples[col][row]);
|
||||
}
|
||||
std::vector<double> classProbabilities = predict_sample(sample);
|
||||
// Find the class with the maximum posterior probability
|
||||
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
|
||||
int predictedClass = distance(classProbabilities.begin(), maxElem);
|
||||
predictions.push_back(predictedClass);
|
||||
semaphore.acquire();
|
||||
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
@@ -261,14 +339,36 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw std::logic_error("You must call fit() before calling predict_proba()");
|
||||
}
|
||||
std::vector<std::vector<double>> predictions;
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (tsamples.size() != features.size() - 1) {
|
||||
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::vector<std::vector<double>> predictions(tsamples[0].size(), std::vector<double>(classNumStates, 0.0));
|
||||
std::vector<int> sample;
|
||||
std::vector<std::thread> threads;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
auto worker = [&](const std::vector<int>& sample, int row, std::vector<double>& predictions) {
|
||||
std::string threadName = "(V)PWorker-" + std::to_string(row);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
std::vector<double> classProbabilities = predict_sample(sample);
|
||||
predictions = classProbabilities;
|
||||
semaphore.release();
|
||||
};
|
||||
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||
sample.clear();
|
||||
for (int col = 0; col < tsamples.size(); ++col) {
|
||||
sample.push_back(tsamples[col][row]);
|
||||
}
|
||||
predictions.push_back(predict_sample(sample));
|
||||
semaphore.acquire();
|
||||
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
@@ -286,11 +386,6 @@ namespace bayesnet {
|
||||
// Return 1xn std::vector of probabilities
|
||||
std::vector<double> Network::predict_sample(const std::vector<int>& sample)
|
||||
{
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (sample.size() != features.size() - 1) {
|
||||
throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::map<std::string, int> evidence;
|
||||
for (int i = 0; i < sample.size(); ++i) {
|
||||
evidence[features[i]] = sample[i];
|
||||
@@ -300,44 +395,26 @@ namespace bayesnet {
|
||||
// Return 1xn std::vector of probabilities
|
||||
std::vector<double> Network::predict_sample(const torch::Tensor& sample)
|
||||
{
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (sample.size(0) != features.size() - 1) {
|
||||
throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::map<std::string, int> evidence;
|
||||
for (int i = 0; i < sample.size(0); ++i) {
|
||||
evidence[features[i]] = sample[i].item<int>();
|
||||
}
|
||||
return exactInference(evidence);
|
||||
}
|
||||
double Network::computeFactor(std::map<std::string, int>& completeEvidence)
|
||||
{
|
||||
double result = 1.0;
|
||||
for (auto& node : getNodes()) {
|
||||
result *= node.second->getFactorValue(completeEvidence);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
|
||||
{
|
||||
std::vector<double> result(classNumStates, 0.0);
|
||||
std::vector<std::thread> threads;
|
||||
std::mutex mtx;
|
||||
auto completeEvidence = std::map<std::string, int>(evidence);
|
||||
for (int i = 0; i < classNumStates; ++i) {
|
||||
threads.emplace_back([this, &result, &evidence, i, &mtx]() {
|
||||
auto completeEvidence = std::map<std::string, int>(evidence);
|
||||
completeEvidence[getClassName()] = i;
|
||||
double factor = computeFactor(completeEvidence);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
result[i] = factor;
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
completeEvidence[getClassName()] = i;
|
||||
double partial = 1.0;
|
||||
for (auto& node : getNodes()) {
|
||||
partial *= node.second->getFactorValue(completeEvidence);
|
||||
}
|
||||
result[i] = partial;
|
||||
}
|
||||
// Normalize result
|
||||
double sum = accumulate(result.begin(), result.end(), 0.0);
|
||||
double sum = std::accumulate(result.begin(), result.end(), 0.0);
|
||||
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
|
||||
return result;
|
||||
}
|
||||
@@ -410,11 +487,7 @@ namespace bayesnet {
|
||||
result.insert(it2, fatherName);
|
||||
ending = false;
|
||||
}
|
||||
} else {
|
||||
throw std::logic_error("Error in topological sort because of node " + feature + " is not in result");
|
||||
}
|
||||
} else {
|
||||
throw std::logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -10,16 +10,16 @@
|
||||
#include <vector>
|
||||
#include "bayesnet/config.h"
|
||||
#include "Node.h"
|
||||
#include "Smoothing.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
class Network {
|
||||
public:
|
||||
Network();
|
||||
explicit Network(float);
|
||||
explicit Network(const Network&);
|
||||
~Network() = default;
|
||||
torch::Tensor& getSamples();
|
||||
float getMaxThreads() const;
|
||||
void addNode(const std::string&);
|
||||
void addEdge(const std::string&, const std::string&);
|
||||
std::map<std::string, std::unique_ptr<Node>>& getNodes();
|
||||
@@ -32,9 +32,9 @@ namespace bayesnet {
|
||||
/*
|
||||
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
|
||||
*/
|
||||
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
|
||||
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
|
||||
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
|
||||
@@ -50,19 +50,16 @@ namespace bayesnet {
|
||||
private:
|
||||
std::map<std::string, std::unique_ptr<Node>> nodes;
|
||||
bool fitted;
|
||||
float maxThreads = 0.95;
|
||||
int classNumStates;
|
||||
std::vector<std::string> features; // Including classname
|
||||
std::string className;
|
||||
double laplaceSmoothing;
|
||||
torch::Tensor samples; // n+1xm tensor used to fit the model
|
||||
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
|
||||
std::vector<double> predict_sample(const std::vector<int>&);
|
||||
std::vector<double> predict_sample(const torch::Tensor&);
|
||||
std::vector<double> exactInference(std::map<std::string, int>&);
|
||||
double computeFactor(std::map<std::string, int>&);
|
||||
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
|
||||
void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void setStates(const std::map<std::string, std::vector<int>>&);
|
||||
};
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@
|
||||
namespace bayesnet {
|
||||
|
||||
Node::Node(const std::string& name)
|
||||
: name(name), numStates(0), cpTable(torch::Tensor()), parents(std::vector<Node*>()), children(std::vector<Node*>())
|
||||
: name(name)
|
||||
{
|
||||
}
|
||||
void Node::clear()
|
||||
@@ -90,52 +90,60 @@ namespace bayesnet {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
|
||||
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
|
||||
{
|
||||
dimensions.clear();
|
||||
dimensions.reserve(parents.size() + 1);
|
||||
// Get dimensions of the CPT
|
||||
dimensions.push_back(numStates);
|
||||
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
|
||||
|
||||
// Create a tensor of zeros with the dimensions of the CPT
|
||||
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
|
||||
// Fill table with counts
|
||||
auto pos = find(features.begin(), features.end(), name);
|
||||
if (pos == features.end()) {
|
||||
throw std::logic_error("Feature " + name + " not found in dataset");
|
||||
for (const auto& parent : parents) {
|
||||
dimensions.push_back(parent->getNumStates());
|
||||
}
|
||||
int name_index = pos - features.begin();
|
||||
//transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
|
||||
// Create a tensor initialized with smoothing
|
||||
cpTable = torch::full(dimensions, smoothing, torch::kDouble);
|
||||
// Create a map for quick feature index lookup
|
||||
std::unordered_map<std::string, int> featureIndexMap;
|
||||
for (size_t i = 0; i < features.size(); ++i) {
|
||||
featureIndexMap[features[i]] = i;
|
||||
}
|
||||
// Fill table with counts
|
||||
// Get the index of this node's feature
|
||||
int name_index = featureIndexMap[name];
|
||||
// Get parent indices in dataset
|
||||
std::vector<int> parent_indices;
|
||||
parent_indices.reserve(parents.size());
|
||||
for (const auto& parent : parents) {
|
||||
parent_indices.push_back(featureIndexMap[parent->getName()]);
|
||||
}
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
coordinates.push_back(dataset.index({ name_index, n_sample }));
|
||||
for (auto parent : parents) {
|
||||
pos = find(features.begin(), features.end(), parent->getName());
|
||||
if (pos == features.end()) {
|
||||
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
|
||||
}
|
||||
int parent_index = pos - features.begin();
|
||||
coordinates.push_back(dataset.index({ parent_index, n_sample }));
|
||||
coordinates.clear();
|
||||
auto sample = dataset.index({ "...", n_sample });
|
||||
coordinates.push_back(sample[name_index]);
|
||||
for (size_t i = 0; i < parent_indices.size(); ++i) {
|
||||
coordinates.push_back(sample[parent_indices[i]]);
|
||||
}
|
||||
// Increment the count of the corresponding coordinate
|
||||
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>());
|
||||
cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
|
||||
}
|
||||
// Normalize the counts
|
||||
cpTable = cpTable / cpTable.sum(0);
|
||||
// Normalize the counts (dividing each row by the sum of the row)
|
||||
cpTable /= cpTable.sum(0, true);
|
||||
}
|
||||
float Node::getFactorValue(std::map<std::string, int>& evidence)
|
||||
double Node::getFactorValue(std::map<std::string, int>& evidence)
|
||||
{
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
// following predetermined order of indices in the cpTable (see Node.h)
|
||||
coordinates.push_back(at::tensor(evidence[name]));
|
||||
transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
|
||||
return cpTable.index({ coordinates }).item<float>();
|
||||
return cpTable.index({ coordinates }).item<double>();
|
||||
}
|
||||
std::vector<std::string> Node::graph(const std::string& className)
|
||||
{
|
||||
auto output = std::vector<std::string>();
|
||||
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
|
||||
output.push_back(name + " [shape=circle" + suffix + "] \n");
|
||||
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); });
|
||||
output.push_back("\"" + name + "\" [shape=circle" + suffix + "] \n");
|
||||
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return "\"" + name + "\" -> \"" + child->getName() + "\""; });
|
||||
return output;
|
||||
}
|
||||
}
|
@@ -12,14 +12,6 @@
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
class Node {
|
||||
private:
|
||||
std::string name;
|
||||
std::vector<Node*> parents;
|
||||
std::vector<Node*> children;
|
||||
int numStates; // number of states of the variable
|
||||
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
|
||||
std::vector<int64_t> dimensions; // dimensions of the cpTable
|
||||
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
|
||||
public:
|
||||
explicit Node(const std::string&);
|
||||
void clear();
|
||||
@@ -31,12 +23,20 @@ namespace bayesnet {
|
||||
std::vector<Node*>& getParents();
|
||||
std::vector<Node*>& getChildren();
|
||||
torch::Tensor& getCPT();
|
||||
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
|
||||
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights);
|
||||
int getNumStates() const;
|
||||
void setNumStates(int);
|
||||
unsigned minFill();
|
||||
std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
|
||||
float getFactorValue(std::map<std::string, int>&);
|
||||
double getFactorValue(std::map<std::string, int>&);
|
||||
private:
|
||||
std::string name;
|
||||
std::vector<Node*> parents;
|
||||
std::vector<Node*> children;
|
||||
int numStates = 0; // number of states of the variable
|
||||
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
|
||||
std::vector<int64_t> dimensions; // dimensions of the cpTable
|
||||
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
|
||||
};
|
||||
}
|
||||
#endif
|
17
bayesnet/network/Smoothing.h
Normal file
17
bayesnet/network/Smoothing.h
Normal file
@@ -0,0 +1,17 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef SMOOTHING_H
|
||||
#define SMOOTHING_H
|
||||
namespace bayesnet {
|
||||
enum class Smoothing_t {
|
||||
NONE = -1,
|
||||
ORIGINAL = 0,
|
||||
LAPLACE,
|
||||
CESTNIK
|
||||
};
|
||||
}
|
||||
#endif // SMOOTHING_H
|
@@ -4,29 +4,79 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <tuple>
|
||||
#include "Mst.h"
|
||||
#include "BayesMetrics.h"
|
||||
namespace bayesnet {
|
||||
//samples is n+1xm tensor used to fit the model
|
||||
Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
|
||||
: samples(samples)
|
||||
, features(features)
|
||||
, className(className)
|
||||
, features(features)
|
||||
, classNumStates(classNumStates)
|
||||
{
|
||||
}
|
||||
//samples is n+1xm std::vector used to fit the model
|
||||
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
|
||||
: features(features)
|
||||
: samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
|
||||
, className(className)
|
||||
, features(features)
|
||||
, classNumStates(classNumStates)
|
||||
, samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
|
||||
{
|
||||
for (int i = 0; i < vsamples.size(); ++i) {
|
||||
samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32));
|
||||
}
|
||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||
}
|
||||
std::vector<std::pair<int, int>> Metrics::SelectKPairs(const torch::Tensor& weights, std::vector<int>& featuresExcluded, bool ascending, unsigned k)
|
||||
{
|
||||
// Return the K Best features
|
||||
auto n = features.size();
|
||||
// compute scores
|
||||
scoresKPairs.clear();
|
||||
pairsKBest.clear();
|
||||
auto labels = samples.index({ -1, "..." });
|
||||
for (int i = 0; i < n - 1; ++i) {
|
||||
if (std::find(featuresExcluded.begin(), featuresExcluded.end(), i) != featuresExcluded.end()) {
|
||||
continue;
|
||||
}
|
||||
for (int j = i + 1; j < n; ++j) {
|
||||
if (std::find(featuresExcluded.begin(), featuresExcluded.end(), j) != featuresExcluded.end()) {
|
||||
continue;
|
||||
}
|
||||
auto key = std::make_pair(i, j);
|
||||
auto value = conditionalMutualInformation(samples.index({ i, "..." }), samples.index({ j, "..." }), labels, weights);
|
||||
scoresKPairs.push_back({ key, value });
|
||||
}
|
||||
}
|
||||
// sort scores
|
||||
if (ascending) {
|
||||
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
|
||||
{ return a.second < b.second; });
|
||||
|
||||
} else {
|
||||
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
|
||||
{ return a.second > b.second; });
|
||||
}
|
||||
for (auto& [pairs, score] : scoresKPairs) {
|
||||
pairsKBest.push_back(pairs);
|
||||
}
|
||||
if (k != 0 && k < pairsKBest.size()) {
|
||||
if (ascending) {
|
||||
int limit = pairsKBest.size() - k;
|
||||
for (int i = 0; i < limit; i++) {
|
||||
pairsKBest.erase(pairsKBest.begin());
|
||||
scoresKPairs.erase(scoresKPairs.begin());
|
||||
}
|
||||
} else {
|
||||
pairsKBest.resize(k);
|
||||
scoresKPairs.resize(k);
|
||||
}
|
||||
}
|
||||
return pairsKBest;
|
||||
}
|
||||
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
|
||||
{
|
||||
// Return the K Best features
|
||||
@@ -66,7 +116,10 @@ namespace bayesnet {
|
||||
{
|
||||
return scoresKBest;
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::pair<int, int>, double>> Metrics::getScoresKPairs() const
|
||||
{
|
||||
return scoresKPairs;
|
||||
}
|
||||
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
|
||||
{
|
||||
auto result = std::vector<double>();
|
||||
@@ -105,14 +158,8 @@ namespace bayesnet {
|
||||
}
|
||||
return matrix;
|
||||
}
|
||||
// To use in Python
|
||||
std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_)
|
||||
{
|
||||
const torch::Tensor weights = torch::tensor(weights_);
|
||||
auto matrix = conditionalEdge(weights);
|
||||
std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
|
||||
return v;
|
||||
}
|
||||
// Measured in nats (natural logarithm (log) base e)
|
||||
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
|
||||
double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
|
||||
{
|
||||
torch::Tensor counts = feature.bincount(weights);
|
||||
@@ -151,10 +198,54 @@ namespace bayesnet {
|
||||
}
|
||||
return entropyValue;
|
||||
}
|
||||
// I(X;Y) = H(Y) - H(Y|X)
|
||||
// H(X|Y,C) = sum_{y in Y, c in C} p(x,c) H(X|Y=y,C=c)
|
||||
double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
|
||||
{
|
||||
// Ensure the tensors are of the same length
|
||||
assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0));
|
||||
// Convert tensors to vectors for easier processing
|
||||
auto firstFeatureData = firstFeature.accessor<int, 1>();
|
||||
auto secondFeatureData = secondFeature.accessor<int, 1>();
|
||||
auto labelsData = labels.accessor<int, 1>();
|
||||
auto weightsData = weights.accessor<double, 1>();
|
||||
int numSamples = firstFeature.size(0);
|
||||
// Maps for joint and marginal probabilities
|
||||
std::map<std::tuple<int, int, int>, double> jointCount;
|
||||
std::map<std::tuple<int, int>, double> marginalCount;
|
||||
// Compute joint and marginal counts
|
||||
for (int i = 0; i < numSamples; ++i) {
|
||||
auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]);
|
||||
auto keyMarginal = std::make_tuple(firstFeatureData[i], labelsData[i]);
|
||||
|
||||
jointCount[keyJoint] += weightsData[i];
|
||||
marginalCount[keyMarginal] += weightsData[i];
|
||||
}
|
||||
// Total weight sum
|
||||
double totalWeight = torch::sum(weights).item<double>();
|
||||
if (totalWeight == 0)
|
||||
return 0;
|
||||
// Compute the conditional entropy
|
||||
double conditionalEntropy = 0.0;
|
||||
for (const auto& [keyJoint, jointFreq] : jointCount) {
|
||||
auto [x, c, y] = keyJoint;
|
||||
auto keyMarginal = std::make_tuple(x, c);
|
||||
//double p_xc = marginalCount[keyMarginal] / totalWeight;
|
||||
double p_y_given_xc = jointFreq / marginalCount[keyMarginal];
|
||||
if (p_y_given_xc > 0) {
|
||||
conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
|
||||
}
|
||||
}
|
||||
return conditionalEntropy;
|
||||
}
|
||||
// I(X;Y) = H(Y) - H(Y|X) ; I(X;Y) >= 0
|
||||
double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
|
||||
{
|
||||
return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
|
||||
return std::max(entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights), 0.0);
|
||||
}
|
||||
// I(X;Y|C) = H(X|C) - H(X|Y,C) >= 0
|
||||
double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
|
||||
{
|
||||
return std::max(conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights), 0.0);
|
||||
}
|
||||
/*
|
||||
Compute the maximum spanning tree considering the weights as distances
|
||||
|
@@ -16,21 +16,26 @@ namespace bayesnet {
|
||||
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
|
||||
std::vector<std::pair<int, int>> SelectKPairs(const torch::Tensor& weights, std::vector<int>& featuresExcluded, bool ascending = false, unsigned k = 0);
|
||||
std::vector<double> getScoresKBest() const;
|
||||
std::vector<std::pair<std::pair<int, int>, double>> getScoresKPairs() const;
|
||||
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
|
||||
double conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
|
||||
torch::Tensor conditionalEdge(const torch::Tensor& weights);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
// Measured in nats (natural logarithm (log) base e)
|
||||
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
|
||||
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
|
||||
protected:
|
||||
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
|
||||
std::string className;
|
||||
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
|
||||
std::vector<std::string> features;
|
||||
template <class T>
|
||||
std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
|
||||
{
|
||||
std::vector<std::pair<T, T>> result;
|
||||
for (int i = 0; i < source.size(); ++i) {
|
||||
for (int i = 0; i < source.size() - 1; ++i) {
|
||||
T temp = source[i];
|
||||
for (int j = i + 1; j < source.size(); ++j) {
|
||||
result.push_back({ temp, source[j] });
|
||||
@@ -49,6 +54,8 @@ namespace bayesnet {
|
||||
int classNumStates = 0;
|
||||
std::vector<double> scoresKBest;
|
||||
std::vector<int> featuresKBest; // sorted indices of the features
|
||||
std::vector<std::pair<int, int>> pairsKBest; // sorted indices of the pairs
|
||||
std::vector<std::pair<std::pair<int, int>, double>> scoresKPairs;
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
};
|
||||
}
|
||||
|
54
bayesnet/utils/CountingSemaphore.h
Normal file
54
bayesnet/utils/CountingSemaphore.h
Normal file
@@ -0,0 +1,54 @@
|
||||
#ifndef COUNTING_SEMAPHORE_H
|
||||
#define COUNTING_SEMAPHORE_H
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <algorithm>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <thread>
|
||||
|
||||
class CountingSemaphore {
|
||||
public:
|
||||
static CountingSemaphore& getInstance()
|
||||
{
|
||||
static CountingSemaphore instance;
|
||||
return instance;
|
||||
}
|
||||
// Delete copy constructor and assignment operator
|
||||
CountingSemaphore(const CountingSemaphore&) = delete;
|
||||
CountingSemaphore& operator=(const CountingSemaphore&) = delete;
|
||||
void acquire()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
cv_.wait(lock, [this]() { return count_ > 0; });
|
||||
--count_;
|
||||
}
|
||||
void release()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
++count_;
|
||||
if (count_ <= max_count_) {
|
||||
cv_.notify_one();
|
||||
}
|
||||
}
|
||||
uint getCount() const
|
||||
{
|
||||
return count_;
|
||||
}
|
||||
uint getMaxCount() const
|
||||
{
|
||||
return max_count_;
|
||||
}
|
||||
private:
|
||||
CountingSemaphore()
|
||||
: max_count_(std::max(1u, static_cast<uint>(0.95 * std::thread::hardware_concurrency()))),
|
||||
count_(max_count_)
|
||||
{
|
||||
}
|
||||
std::mutex mtx_;
|
||||
std::condition_variable cv_;
|
||||
const uint max_count_;
|
||||
uint count_;
|
||||
};
|
||||
#endif
|
@@ -53,14 +53,14 @@ namespace bayesnet {
|
||||
}
|
||||
}
|
||||
|
||||
void insertElement(std::list<int>& variables, int variable)
|
||||
void MST::insertElement(std::list<int>& variables, int variable)
|
||||
{
|
||||
if (std::find(variables.begin(), variables.end(), variable) == variables.end()) {
|
||||
variables.push_front(variable);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
|
||||
std::vector<std::pair<int, int>> MST::reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
|
||||
{
|
||||
// Create the edges of a DAG from the MST
|
||||
// replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted
|
||||
|
@@ -14,6 +14,8 @@ namespace bayesnet {
|
||||
public:
|
||||
MST() = default;
|
||||
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
void insertElement(std::list<int>& variables, int variable);
|
||||
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree();
|
||||
private:
|
||||
torch::Tensor weights;
|
||||
|
51
bayesnet/utils/TensorUtils.h
Normal file
51
bayesnet/utils/TensorUtils.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef TENSORUTILS_H
|
||||
#define TENSORUTILS_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
namespace bayesnet {
|
||||
class TensorUtils {
|
||||
public:
|
||||
static std::vector<std::vector<int>> to_matrix(const torch::Tensor& X)
|
||||
{
|
||||
// Ensure tensor is contiguous in memory
|
||||
auto X_contig = X.contiguous();
|
||||
|
||||
// Access tensor data pointer directly
|
||||
auto data_ptr = X_contig.data_ptr<int>();
|
||||
|
||||
// IF you are using int64_t as the data type, use the following line
|
||||
//auto data_ptr = X_contig.data_ptr<int64_t>();
|
||||
//std::vector<std::vector<int64_t>> data(X.size(0), std::vector<int64_t>(X.size(1)));
|
||||
|
||||
// Prepare output container
|
||||
std::vector<std::vector<int>> data(X.size(0), std::vector<int>(X.size(1)));
|
||||
|
||||
// Fill the 2D vector in a single loop using pointer arithmetic
|
||||
int rows = X.size(0);
|
||||
int cols = X.size(1);
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
std::copy(data_ptr + i * cols, data_ptr + (i + 1) * cols, data[i].begin());
|
||||
}
|
||||
return data;
|
||||
}
|
||||
template <typename T>
|
||||
static std::vector<T> to_vector(const torch::Tensor& y)
|
||||
{
|
||||
// Ensure the tensor is contiguous in memory
|
||||
auto y_contig = y.contiguous();
|
||||
|
||||
// Access data pointer
|
||||
auto data_ptr = y_contig.data_ptr<T>();
|
||||
|
||||
// Prepare output container
|
||||
std::vector<T> data(y.size(0));
|
||||
|
||||
// Copy data efficiently
|
||||
std::copy(data_ptr, data_ptr + y.size(0), data.begin());
|
||||
|
||||
return data;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // TENSORUTILS_H
|
@@ -137,7 +137,7 @@
|
||||
|
||||
include(CMakeParseArguments)
|
||||
|
||||
option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE)
|
||||
option(CODE_COVERAGE_VERBOSE "Verbose information" TRUE)
|
||||
|
||||
# Check prereqs
|
||||
find_program( GCOV_PATH gcov )
|
||||
@@ -160,7 +160,11 @@ foreach(LANG ${LANGUAGES})
|
||||
endif()
|
||||
elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
|
||||
AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
|
||||
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
|
||||
if ("${LANG}" MATCHES "CUDA")
|
||||
message(STATUS "Ignoring CUDA")
|
||||
else()
|
||||
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
|
@@ -1,36 +1,16 @@
|
||||
@startuml
|
||||
title clang-uml class diagram model
|
||||
class "bayesnet::Metrics" as C_0000736965376885623323
|
||||
class C_0000736965376885623323 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Metrics() = default : void
|
||||
+Metrics(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
+Metrics(const std::vector<std::vector<int>> & vsamples, const std::vector<int> & labels, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
..
|
||||
+SelectKBestWeighted(const torch::Tensor & weights, bool ascending = false, unsigned int k = 0) : std::vector<int>
|
||||
+conditionalEdge(const torch::Tensor & weights) : torch::Tensor
|
||||
+conditionalEdgeWeights(std::vector<float> & weights) : std::vector<float>
|
||||
#doCombinations<T>(const std::vector<T> & source) : std::vector<std::pair<T, T> >
|
||||
#entropy(const torch::Tensor & feature, const torch::Tensor & weights) : double
|
||||
+getScoresKBest() const : std::vector<double>
|
||||
+maximumSpanningTree(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : std::vector<std::pair<int,int>>
|
||||
+mutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & weights) : double
|
||||
#pop_first<T>(std::vector<T> & v) : T
|
||||
__
|
||||
#className : std::string
|
||||
#features : std::vector<std::string>
|
||||
#samples : torch::Tensor
|
||||
}
|
||||
class "bayesnet::Node" as C_0001303524929067080934
|
||||
class C_0001303524929067080934 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Node" as C_0010428199432536647474
|
||||
class C_0010428199432536647474 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Node(const std::string &) : void
|
||||
..
|
||||
+addChild(Node *) : void
|
||||
+addParent(Node *) : void
|
||||
+clear() : void
|
||||
+computeCPT(const torch::Tensor & dataset, const std::vector<std::string> & features, const double laplaceSmoothing, const torch::Tensor & weights) : void
|
||||
+computeCPT(const torch::Tensor & dataset, const std::vector<std::string> & features, const double smoothing, const torch::Tensor & weights) : void
|
||||
+getCPT() : torch::Tensor &
|
||||
+getChildren() : std::vector<Node *> &
|
||||
+getFactorValue(std::map<std::string,int> &) : float
|
||||
+getFactorValue(std::map<std::string,int> &) : double
|
||||
+getName() const : std::string
|
||||
+getNumStates() const : int
|
||||
+getParents() : std::vector<Node *> &
|
||||
@@ -41,24 +21,29 @@ class C_0001303524929067080934 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+setNumStates(int) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::Network" as C_0001186707649890429575
|
||||
class C_0001186707649890429575 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
enum "bayesnet::Smoothing_t" as C_0013393078277439680282
|
||||
enum C_0013393078277439680282 {
|
||||
NONE
|
||||
ORIGINAL
|
||||
LAPLACE
|
||||
CESTNIK
|
||||
}
|
||||
class "bayesnet::Network" as C_0009493661199123436603
|
||||
class C_0009493661199123436603 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Network() : void
|
||||
+Network(float) : void
|
||||
+Network(const Network &) : void
|
||||
+~Network() = default : void
|
||||
..
|
||||
+addEdge(const std::string &, const std::string &) : void
|
||||
+addNode(const std::string &) : void
|
||||
+dump_cpt() const : std::string
|
||||
+fit(const torch::Tensor & samples, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||
+fit(const torch::Tensor & X, const torch::Tensor & y, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||
+fit(const std::vector<std::vector<int>> & input_data, const std::vector<int> & labels, const std::vector<double> & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states) : void
|
||||
+fit(const torch::Tensor & samples, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
|
||||
+fit(const torch::Tensor & X, const torch::Tensor & y, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
|
||||
+fit(const std::vector<std::vector<int>> & input_data, const std::vector<int> & labels, const std::vector<double> & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
|
||||
+getClassName() const : std::string
|
||||
+getClassNumStates() const : int
|
||||
+getEdges() const : std::vector<std::pair<std::string,std::string>>
|
||||
+getFeatures() const : std::vector<std::string>
|
||||
+getMaxThreads() const : float
|
||||
+getNodes() : std::map<std::string,std::unique_ptr<Node>> &
|
||||
+getNumEdges() const : int
|
||||
+getSamples() : torch::Tensor &
|
||||
@@ -76,21 +61,21 @@ class C_0001186707649890429575 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+version() : std::string
|
||||
__
|
||||
}
|
||||
enum "bayesnet::status_t" as C_0000738420730783851375
|
||||
enum C_0000738420730783851375 {
|
||||
enum "bayesnet::status_t" as C_0005907365846270811004
|
||||
enum C_0005907365846270811004 {
|
||||
NORMAL
|
||||
WARNING
|
||||
ERROR
|
||||
}
|
||||
abstract "bayesnet::BaseClassifier" as C_0000327135989451974539
|
||||
abstract C_0000327135989451974539 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
abstract "bayesnet::BaseClassifier" as C_0002617087915615796317
|
||||
abstract C_0002617087915615796317 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+~BaseClassifier() = default : void
|
||||
..
|
||||
{abstract} +dump_cpt() const = 0 : std::string
|
||||
{abstract} +fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights) = 0 : BaseClassifier &
|
||||
{abstract} +fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +getClassNumStates() const = 0 : int
|
||||
{abstract} +getNotes() const = 0 : std::vector<std::string>
|
||||
{abstract} +getNumberOfEdges() const = 0 : int
|
||||
@@ -109,12 +94,35 @@ abstract C_0000327135989451974539 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
{abstract} +setHyperparameters(const nlohmann::json & hyperparameters) = 0 : void
|
||||
{abstract} +show() const = 0 : std::vector<std::string>
|
||||
{abstract} +topological_order() = 0 : std::vector<std::string>
|
||||
{abstract} #trainModel(const torch::Tensor & weights) = 0 : void
|
||||
{abstract} #trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) = 0 : void
|
||||
__
|
||||
#validHyperparameters : std::vector<std::string>
|
||||
}
|
||||
abstract "bayesnet::Classifier" as C_0002043996622900301644
|
||||
abstract C_0002043996622900301644 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Metrics" as C_0005895723015084986588
|
||||
class C_0005895723015084986588 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Metrics() = default : void
|
||||
+Metrics(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
+Metrics(const std::vector<std::vector<int>> & vsamples, const std::vector<int> & labels, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
..
|
||||
+SelectKBestWeighted(const torch::Tensor & weights, bool ascending = false, unsigned int k = 0) : std::vector<int>
|
||||
+SelectKPairs(const torch::Tensor & weights, std::vector<int> & featuresExcluded, bool ascending = false, unsigned int k = 0) : std::vector<std::pair<int,int>>
|
||||
+conditionalEdge(const torch::Tensor & weights) : torch::Tensor
|
||||
+conditionalEntropy(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & labels, const torch::Tensor & weights) : double
|
||||
+conditionalMutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & labels, const torch::Tensor & weights) : double
|
||||
#doCombinations<T>(const std::vector<T> & source) : std::vector<std::pair<T, T> >
|
||||
+entropy(const torch::Tensor & feature, const torch::Tensor & weights) : double
|
||||
+getScoresKBest() const : std::vector<double>
|
||||
+getScoresKPairs() const : std::vector<std::pair<std::pair<int,int>,double>>
|
||||
+maximumSpanningTree(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : std::vector<std::pair<int,int>>
|
||||
+mutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & weights) : double
|
||||
#pop_first<T>(std::vector<T> & v) : T
|
||||
__
|
||||
#className : std::string
|
||||
#features : std::vector<std::string>
|
||||
#samples : torch::Tensor
|
||||
}
|
||||
abstract "bayesnet::Classifier" as C_0016351972983202413152
|
||||
abstract C_0016351972983202413152 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Classifier(Network model) : void
|
||||
+~Classifier() = default : void
|
||||
..
|
||||
@@ -123,10 +131,10 @@ abstract C_0002043996622900301644 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
{abstract} #buildModel(const torch::Tensor & weights) = 0 : void
|
||||
#checkFitParameters() : void
|
||||
+dump_cpt() const : std::string
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||
+fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights) : Classifier &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
|
||||
+fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights, const Smoothing_t smoothing) : Classifier &
|
||||
+getClassNumStates() const : int
|
||||
+getNotes() const : std::vector<std::string>
|
||||
+getNumberOfEdges() const : int
|
||||
@@ -143,7 +151,7 @@ abstract C_0002043996622900301644 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_order() : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
#className : std::string
|
||||
#dataset : torch::Tensor
|
||||
@@ -157,8 +165,8 @@ __
|
||||
#states : std::map<std::string,std::vector<int>>
|
||||
#status : status_t
|
||||
}
|
||||
class "bayesnet::KDB" as C_0001112865019015250005
|
||||
class C_0001112865019015250005 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::KDB" as C_0008902920152122000044
|
||||
class C_0008902920152122000044 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDB(int k, float theta = 0.03) : void
|
||||
+~KDB() = default : void
|
||||
..
|
||||
@@ -167,8 +175,26 @@ class C_0001112865019015250005 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::TAN" as C_0001760994424884323017
|
||||
class C_0001760994424884323017 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::SPODE" as C_0004096182510460307610
|
||||
class C_0004096182510460307610 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODE(int root) : void
|
||||
+~SPODE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPnDE" as C_0016268916386101512883
|
||||
class C_0016268916386101512883 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPnDE(std::vector<int> parents) : void
|
||||
+~SPnDE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPnDE") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::TAN" as C_0014087955399074584137
|
||||
class C_0014087955399074584137 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TAN() : void
|
||||
+~TAN() = default : void
|
||||
..
|
||||
@@ -176,8 +202,8 @@ class C_0001760994424884323017 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Proposal" as C_0002219995589162262979
|
||||
class C_0002219995589162262979 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Proposal" as C_0017759964713298103839
|
||||
class C_0017759964713298103839 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Proposal(torch::Tensor & pDataset, std::vector<std::string> & features_, std::string & className_) : void
|
||||
+~Proposal() : void
|
||||
..
|
||||
@@ -190,74 +216,42 @@ __
|
||||
#discretizers : map<std::string,mdlp::CPPFImdlp *>
|
||||
#y : torch::Tensor
|
||||
}
|
||||
class "bayesnet::TANLd" as C_0001668829096702037834
|
||||
class C_0001668829096702037834 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TANLd() : void
|
||||
+~TANLd() = default : void
|
||||
class "bayesnet::KDBLd" as C_0002756018222998454702
|
||||
class C_0002756018222998454702 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDBLd(int k) : void
|
||||
+~KDBLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : TANLd &
|
||||
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : KDBLd &
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
abstract "bayesnet::FeatureSelect" as C_0001695326193250580823
|
||||
abstract C_0001695326193250580823 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FeatureSelect(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~FeatureSelect() : void
|
||||
class "bayesnet::SPODELd" as C_0010957245114062042836
|
||||
class C_0010957245114062042836 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODELd(int root) : void
|
||||
+~SPODELd() = default : void
|
||||
..
|
||||
#computeMeritCFS() : double
|
||||
#computeSuFeatures(const int a, const int b) : double
|
||||
#computeSuLabels() : void
|
||||
{abstract} +fit() = 0 : void
|
||||
+getFeatures() const : std::vector<int>
|
||||
+getScores() const : std::vector<double>
|
||||
#initialize() : void
|
||||
#symmetricalUncertainty(int a, int b) : double
|
||||
__
|
||||
#fitted : bool
|
||||
#maxFeatures : int
|
||||
#selectedFeatures : std::vector<int>
|
||||
#selectedScores : std::vector<double>
|
||||
#suFeatures : std::map<std::pair<int,int>,double>
|
||||
#suLabels : std::vector<double>
|
||||
#weights : const torch::Tensor &
|
||||
}
|
||||
class "bayesnet::CFS" as C_0000011627355691342494
|
||||
class C_0000011627355691342494 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+CFS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~CFS() : void
|
||||
..
|
||||
+fit() : void
|
||||
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+graph(const std::string & name = "SPODELd") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::FCBF" as C_0000144682015341746929
|
||||
class C_0000144682015341746929 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FCBF(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~FCBF() : void
|
||||
class "bayesnet::TANLd" as C_0013350632773616302678
|
||||
class C_0013350632773616302678 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TANLd() : void
|
||||
+~TANLd() = default : void
|
||||
..
|
||||
+fit() : void
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : TANLd &
|
||||
+graph(const std::string & name = "TANLd") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
__
|
||||
}
|
||||
class "bayesnet::IWSS" as C_0000008268514674428553
|
||||
class C_0000008268514674428553 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+IWSS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~IWSS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODE" as C_0000512022813807538451
|
||||
class C_0000512022813807538451 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODE(int root) : void
|
||||
+~SPODE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Ensemble" as C_0001985241386355360576
|
||||
class C_0001985241386355360576 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Ensemble" as C_0015881931090842884611
|
||||
class C_0015881931090842884611 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Ensemble(bool predict_voting = true) : void
|
||||
+~Ensemble() = default : void
|
||||
..
|
||||
@@ -280,7 +274,7 @@ class C_0001985241386355360576 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_order() : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
#voting(torch::Tensor & votes) : torch::Tensor
|
||||
__
|
||||
#models : std::vector<std::unique_ptr<Classifier>>
|
||||
@@ -288,41 +282,223 @@ __
|
||||
#predict_voting : bool
|
||||
#significanceModels : std::vector<double>
|
||||
}
|
||||
class "bayesnet::(anonymous_45089536)" as C_0001186398587753535158
|
||||
class C_0001186398587753535158 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::A2DE" as C_0001410789567057647859
|
||||
class C_0001410789567057647859 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+A2DE(bool predict_voting = false) : void
|
||||
+~A2DE() : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "A2DE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODE" as C_0006288892608974306258
|
||||
class C_0006288892608974306258 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODE(bool predict_voting = false) : void
|
||||
+~AODE() : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "AODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
abstract "bayesnet::FeatureSelect" as C_0013562609546004646591
|
||||
abstract C_0013562609546004646591 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FeatureSelect(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~FeatureSelect() : void
|
||||
..
|
||||
#computeMeritCFS() : double
|
||||
#computeSuFeatures(const int a, const int b) : double
|
||||
#computeSuLabels() : void
|
||||
{abstract} +fit() = 0 : void
|
||||
+getFeatures() const : std::vector<int>
|
||||
+getScores() const : std::vector<double>
|
||||
#initialize() : void
|
||||
#symmetricalUncertainty(int a, int b) : double
|
||||
__
|
||||
#fitted : bool
|
||||
#maxFeatures : int
|
||||
#selectedFeatures : std::vector<int>
|
||||
#selectedScores : std::vector<double>
|
||||
#suFeatures : std::map<std::pair<int,int>,double>
|
||||
#suLabels : std::vector<double>
|
||||
#weights : const torch::Tensor &
|
||||
}
|
||||
class "bayesnet::(anonymous_60342586)" as C_0005584545181746538542
|
||||
class C_0005584545181746538542 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_45090163)" as C_0000602764946063116717
|
||||
class C_0000602764946063116717 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::(anonymous_60343240)" as C_0016227156982041949444
|
||||
class C_0016227156982041949444 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostAODE" as C_0000358471592399852382
|
||||
class C_0000358471592399852382 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Boost" as C_0009819322948617116148
|
||||
class C_0009819322948617116148 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Boost(bool predict_voting = false) : void
|
||||
+~Boost() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
#featureSelection(torch::Tensor & weights_) : std::vector<int>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
#update_weights(torch::Tensor & ytrain, torch::Tensor & ypred, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
|
||||
#update_weights_block(int k, torch::Tensor & ytrain, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
|
||||
__
|
||||
#X_test : torch::Tensor
|
||||
#X_train : torch::Tensor
|
||||
#bisection : bool
|
||||
#block_update : bool
|
||||
#convergence : bool
|
||||
#convergence_best : bool
|
||||
#featureSelector : FeatureSelect *
|
||||
#maxTolerance : int
|
||||
#order_algorithm : std::string
|
||||
#selectFeatures : bool
|
||||
#select_features_algorithm : std::string
|
||||
#threshold : double
|
||||
#y_test : torch::Tensor
|
||||
#y_train : torch::Tensor
|
||||
}
|
||||
class "bayesnet::AODELd" as C_0003898187834670349177
|
||||
class C_0003898187834670349177 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODELd(bool predict_voting = true) : void
|
||||
+~AODELd() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_, const Smoothing_t smoothing) : AODELd &
|
||||
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60275628)" as C_0009086919615463763584
|
||||
class C_0009086919615463763584 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60276282)" as C_0015251985607563196159
|
||||
class C_0015251985607563196159 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostA2DE" as C_0000272055465257861326
|
||||
class C_0000272055465257861326 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+BoostA2DE(bool predict_voting = false) : void
|
||||
+~BoostA2DE() = default : void
|
||||
..
|
||||
+graph(const std::string & title = "BoostA2DE") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60275502)" as C_0016033655851510053155
|
||||
class C_0016033655851510053155 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60276156)" as C_0000379522761622473555
|
||||
class C_0000379522761622473555 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostAODE" as C_0002867772739198819061
|
||||
class C_0002867772739198819061 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+BoostAODE(bool predict_voting = false) : void
|
||||
+~BoostAODE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "BoostAODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::MST" as C_0000131858426172291700
|
||||
class C_0000131858426172291700 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::CFS" as C_0000093018845530739957
|
||||
class C_0000093018845530739957 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+CFS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~CFS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::FCBF" as C_0001157456122733975432
|
||||
class C_0001157456122733975432 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FCBF(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~FCBF() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::IWSS" as C_0000066148117395428429
|
||||
class C_0000066148117395428429 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+IWSS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~IWSS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60730495)" as C_0004857727320042830573
|
||||
class C_0004857727320042830573 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60731150)" as C_0000076541533312623385
|
||||
class C_0000076541533312623385 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60653004)" as C_0001444063444142949758
|
||||
class C_0001444063444142949758 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60653658)" as C_0007139277546931322856
|
||||
class C_0007139277546931322856 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60731375)" as C_0010493853592456211189
|
||||
class C_0010493853592456211189 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60732030)" as C_0007011438637915849564
|
||||
class C_0007011438637915849564 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::MST" as C_0001054867409378333602
|
||||
class C_0001054867409378333602 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+MST() = default : void
|
||||
+MST(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : void
|
||||
..
|
||||
+insertElement(std::list<int> & variables, int variable) : void
|
||||
+maximumSpanningTree() : std::vector<std::pair<int,int>>
|
||||
+reorder(std::vector<std::pair<float,std::pair<int,int>>> T, int root_original) : std::vector<std::pair<int,int>>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Graph" as C_0001197041682001898467
|
||||
class C_0001197041682001898467 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
class "bayesnet::Graph" as C_0009576333456015187741
|
||||
class C_0009576333456015187741 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Graph(int V) : void
|
||||
..
|
||||
+addEdge(int u, int v, float wt) : void
|
||||
@@ -332,81 +508,73 @@ class C_0001197041682001898467 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+union_set(int u, int v) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::KDBLd" as C_0000344502277874806837
|
||||
class C_0000344502277874806837 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDBLd(int k) : void
|
||||
+~KDBLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : KDBLd &
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODE" as C_0000786111576121788282
|
||||
class C_0000786111576121788282 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODE(bool predict_voting = false) : void
|
||||
+~AODE() : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "AODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODELd" as C_0001369655639257755354
|
||||
class C_0001369655639257755354 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODELd(int root) : void
|
||||
+~SPODELd() = default : void
|
||||
..
|
||||
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states) : SPODELd &
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODELd" as C_0000487273479333793647
|
||||
class C_0000487273479333793647 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODELd(bool predict_voting = true) : void
|
||||
+~AODELd() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_) : AODELd &
|
||||
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights) : void
|
||||
__
|
||||
}
|
||||
C_0001303524929067080934 --> C_0001303524929067080934 : -parents
|
||||
C_0001303524929067080934 --> C_0001303524929067080934 : -children
|
||||
C_0001186707649890429575 o-- C_0001303524929067080934 : -nodes
|
||||
C_0000327135989451974539 ..> C_0000738420730783851375
|
||||
C_0002043996622900301644 o-- C_0001186707649890429575 : #model
|
||||
C_0002043996622900301644 o-- C_0000736965376885623323 : #metrics
|
||||
C_0002043996622900301644 o-- C_0000738420730783851375 : #status
|
||||
C_0000327135989451974539 <|-- C_0002043996622900301644
|
||||
C_0002043996622900301644 <|-- C_0001112865019015250005
|
||||
C_0002043996622900301644 <|-- C_0001760994424884323017
|
||||
C_0002219995589162262979 ..> C_0001186707649890429575
|
||||
C_0001760994424884323017 <|-- C_0001668829096702037834
|
||||
C_0002219995589162262979 <|-- C_0001668829096702037834
|
||||
C_0000736965376885623323 <|-- C_0001695326193250580823
|
||||
C_0001695326193250580823 <|-- C_0000011627355691342494
|
||||
C_0001695326193250580823 <|-- C_0000144682015341746929
|
||||
C_0001695326193250580823 <|-- C_0000008268514674428553
|
||||
C_0002043996622900301644 <|-- C_0000512022813807538451
|
||||
C_0001985241386355360576 o-- C_0002043996622900301644 : #models
|
||||
C_0002043996622900301644 <|-- C_0001985241386355360576
|
||||
C_0000358471592399852382 --> C_0001695326193250580823 : -featureSelector
|
||||
C_0001985241386355360576 <|-- C_0000358471592399852382
|
||||
C_0001112865019015250005 <|-- C_0000344502277874806837
|
||||
C_0002219995589162262979 <|-- C_0000344502277874806837
|
||||
C_0001985241386355360576 <|-- C_0000786111576121788282
|
||||
C_0000512022813807538451 <|-- C_0001369655639257755354
|
||||
C_0002219995589162262979 <|-- C_0001369655639257755354
|
||||
C_0001985241386355360576 <|-- C_0000487273479333793647
|
||||
C_0002219995589162262979 <|-- C_0000487273479333793647
|
||||
C_0010428199432536647474 --> C_0010428199432536647474 : -parents
|
||||
C_0010428199432536647474 --> C_0010428199432536647474 : -children
|
||||
C_0009493661199123436603 ..> C_0013393078277439680282
|
||||
C_0009493661199123436603 o-- C_0010428199432536647474 : -nodes
|
||||
C_0002617087915615796317 ..> C_0013393078277439680282
|
||||
C_0002617087915615796317 ..> C_0005907365846270811004
|
||||
C_0016351972983202413152 ..> C_0013393078277439680282
|
||||
C_0016351972983202413152 o-- C_0009493661199123436603 : #model
|
||||
C_0016351972983202413152 o-- C_0005895723015084986588 : #metrics
|
||||
C_0016351972983202413152 o-- C_0005907365846270811004 : #status
|
||||
C_0002617087915615796317 <|-- C_0016351972983202413152
|
||||
|
||||
'Generated with clang-uml, version 0.5.1
|
||||
'LLVM version clang version 17.0.6 (Fedora 17.0.6-2.fc39)
|
||||
C_0016351972983202413152 <|-- C_0008902920152122000044
|
||||
|
||||
C_0016351972983202413152 <|-- C_0004096182510460307610
|
||||
|
||||
C_0016351972983202413152 <|-- C_0016268916386101512883
|
||||
|
||||
C_0016351972983202413152 <|-- C_0014087955399074584137
|
||||
|
||||
C_0017759964713298103839 ..> C_0009493661199123436603
|
||||
C_0002756018222998454702 ..> C_0013393078277439680282
|
||||
C_0008902920152122000044 <|-- C_0002756018222998454702
|
||||
|
||||
C_0017759964713298103839 <|-- C_0002756018222998454702
|
||||
|
||||
C_0010957245114062042836 ..> C_0013393078277439680282
|
||||
C_0004096182510460307610 <|-- C_0010957245114062042836
|
||||
|
||||
C_0017759964713298103839 <|-- C_0010957245114062042836
|
||||
|
||||
C_0013350632773616302678 ..> C_0013393078277439680282
|
||||
C_0014087955399074584137 <|-- C_0013350632773616302678
|
||||
|
||||
C_0017759964713298103839 <|-- C_0013350632773616302678
|
||||
|
||||
C_0015881931090842884611 ..> C_0013393078277439680282
|
||||
C_0015881931090842884611 o-- C_0016351972983202413152 : #models
|
||||
C_0016351972983202413152 <|-- C_0015881931090842884611
|
||||
|
||||
C_0015881931090842884611 <|-- C_0001410789567057647859
|
||||
|
||||
C_0015881931090842884611 <|-- C_0006288892608974306258
|
||||
|
||||
C_0005895723015084986588 <|-- C_0013562609546004646591
|
||||
|
||||
C_0009819322948617116148 --> C_0013562609546004646591 : #featureSelector
|
||||
C_0015881931090842884611 <|-- C_0009819322948617116148
|
||||
|
||||
C_0003898187834670349177 ..> C_0013393078277439680282
|
||||
C_0015881931090842884611 <|-- C_0003898187834670349177
|
||||
|
||||
C_0017759964713298103839 <|-- C_0003898187834670349177
|
||||
|
||||
C_0000272055465257861326 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0000272055465257861326
|
||||
|
||||
C_0002867772739198819061 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0002867772739198819061
|
||||
|
||||
C_0013562609546004646591 <|-- C_0000093018845530739957
|
||||
|
||||
C_0013562609546004646591 <|-- C_0001157456122733975432
|
||||
|
||||
C_0013562609546004646591 <|-- C_0000066148117395428429
|
||||
|
||||
|
||||
'Generated with clang-uml, version 0.5.5
|
||||
'LLVM version clang version 18.1.8 (Fedora 18.1.8-5.fc41)
|
||||
@enduml
|
||||
|
File diff suppressed because one or more lines are too long
Before Width: | Height: | Size: 139 KiB After Width: | Height: | Size: 196 KiB |
@@ -1,128 +1,314 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 8.1.0 (20230707.0739)
|
||||
<!-- Generated by graphviz version 12.1.0 (20240811.2233)
|
||||
-->
|
||||
<!-- Title: BayesNet Pages: 1 -->
|
||||
<svg width="1632pt" height="288pt"
|
||||
viewBox="0.00 0.00 1631.95 287.80" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 283.8)">
|
||||
<svg width="3725pt" height="432pt"
|
||||
viewBox="0.00 0.00 3724.84 431.80" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 427.8)">
|
||||
<title>BayesNet</title>
|
||||
<polygon fill="white" stroke="none" points="-4,4 -4,-283.8 1627.95,-283.8 1627.95,4 -4,4"/>
|
||||
<!-- node1 -->
|
||||
<polygon fill="white" stroke="none" points="-4,4 -4,-427.8 3720.84,-427.8 3720.84,4 -4,4"/>
|
||||
<!-- node0 -->
|
||||
<g id="node1" class="node">
|
||||
<title>node0</title>
|
||||
<polygon fill="none" stroke="black" points="1655.43,-398.35 1655.43,-413.26 1625.69,-423.8 1583.63,-423.8 1553.89,-413.26 1553.89,-398.35 1583.63,-387.8 1625.69,-387.8 1655.43,-398.35"/>
|
||||
<text text-anchor="middle" x="1604.66" y="-401.53" font-family="Times,serif" font-size="12.00">BayesNet</text>
|
||||
</g>
|
||||
<!-- node1 -->
|
||||
<g id="node2" class="node">
|
||||
<title>node1</title>
|
||||
<polygon fill="none" stroke="black" points="826.43,-254.35 826.43,-269.26 796.69,-279.8 754.63,-279.8 724.89,-269.26 724.89,-254.35 754.63,-243.8 796.69,-243.8 826.43,-254.35"/>
|
||||
<text text-anchor="middle" x="775.66" y="-257.53" font-family="Times,serif" font-size="12.00">BayesNet</text>
|
||||
<polygon fill="none" stroke="black" points="413.32,-257.8 372.39,-273.03 206.66,-279.8 40.93,-273.03 0,-257.8 114.69,-245.59 298.64,-245.59 413.32,-257.8"/>
|
||||
<text text-anchor="middle" x="206.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10.so</text>
|
||||
</g>
|
||||
<!-- node0->node1 -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>node0->node1</title>
|
||||
<path fill="none" stroke="black" d="M1553.59,-400.53C1451.65,-391.91 1215.69,-371.61 1017.66,-351.8 773.36,-327.37 488.07,-295.22 329.31,-277.01"/>
|
||||
<polygon fill="black" stroke="black" points="329.93,-273.56 319.6,-275.89 329.14,-280.51 329.93,-273.56"/>
|
||||
</g>
|
||||
<!-- node2 -->
|
||||
<g id="node2" class="node">
|
||||
<g id="node3" class="node">
|
||||
<title>node2</title>
|
||||
<polygon fill="none" stroke="black" points="413.32,-185.8 372.39,-201.03 206.66,-207.8 40.93,-201.03 0,-185.8 114.69,-173.59 298.64,-173.59 413.32,-185.8"/>
|
||||
<text text-anchor="middle" x="206.66" y="-185.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10.so</text>
|
||||
<polygon fill="none" stroke="black" points="894.21,-257.8 848.35,-273.03 662.66,-279.8 476.98,-273.03 431.12,-257.8 559.61,-245.59 765.71,-245.59 894.21,-257.8"/>
|
||||
<text text-anchor="middle" x="662.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10_cuda.so</text>
|
||||
</g>
|
||||
<!-- node1->node2 -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>node1->node2</title>
|
||||
<path fill="none" stroke="black" d="M724.41,-254.5C634.7,-243.46 447.04,-220.38 324.01,-205.24"/>
|
||||
<polygon fill="black" stroke="black" points="324.77,-201.69 314.42,-203.94 323.92,-208.63 324.77,-201.69"/>
|
||||
<!-- node0->node2 -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>node0->node2</title>
|
||||
<path fill="none" stroke="black" d="M1555.34,-397.37C1408.12,-375.18 969.52,-309.06 767.13,-278.55"/>
|
||||
<polygon fill="black" stroke="black" points="767.81,-275.12 757.4,-277.09 766.77,-282.04 767.81,-275.12"/>
|
||||
</g>
|
||||
<!-- node3 -->
|
||||
<g id="node3" class="node">
|
||||
<g id="node4" class="node">
|
||||
<title>node3</title>
|
||||
<polygon fill="none" stroke="black" points="857.68,-185.8 815.49,-201.03 644.66,-207.8 473.84,-201.03 431.65,-185.8 549.86,-173.59 739.46,-173.59 857.68,-185.8"/>
|
||||
<text text-anchor="middle" x="644.66" y="-185.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libkineto.a</text>
|
||||
<polygon fill="none" stroke="black" points="1338.68,-257.8 1296.49,-273.03 1125.66,-279.8 954.84,-273.03 912.65,-257.8 1030.86,-245.59 1220.46,-245.59 1338.68,-257.8"/>
|
||||
<text text-anchor="middle" x="1125.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libkineto.a</text>
|
||||
</g>
|
||||
<!-- node1->node3 -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>node1->node3</title>
|
||||
<path fill="none" stroke="black" d="M747.56,-245.79C729.21,-235.98 704.97,-223.03 684.63,-212.16"/>
|
||||
<polygon fill="black" stroke="black" points="686.47,-208.64 676,-207.02 683.17,-214.82 686.47,-208.64"/>
|
||||
<!-- node0->node3 -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>node0->node3</title>
|
||||
<path fill="none" stroke="black" d="M1566.68,-393.54C1484.46,-369.17 1289.3,-311.32 1188.44,-281.41"/>
|
||||
<polygon fill="black" stroke="black" points="1189.53,-278.09 1178.95,-278.6 1187.54,-284.8 1189.53,-278.09"/>
|
||||
</g>
|
||||
<!-- node4 -->
|
||||
<g id="node4" class="node">
|
||||
<title>node4</title>
|
||||
<polygon fill="none" stroke="black" points="939.33,-182.35 939.33,-197.26 920.78,-207.8 894.54,-207.8 875.99,-197.26 875.99,-182.35 894.54,-171.8 920.78,-171.8 939.33,-182.35"/>
|
||||
<text text-anchor="middle" x="907.66" y="-185.53" font-family="Times,serif" font-size="12.00">mdlp</text>
|
||||
</g>
|
||||
<!-- node1->node4 -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>node1->node4</title>
|
||||
<path fill="none" stroke="black" d="M803.66,-245.96C824.66,-234.82 853.45,-219.56 875.41,-207.91"/>
|
||||
<polygon fill="black" stroke="black" points="876.78,-210.61 883.97,-202.84 873.5,-204.43 876.78,-210.61"/>
|
||||
</g>
|
||||
<!-- node9 -->
|
||||
<g id="node5" class="node">
|
||||
<title>node9</title>
|
||||
<polygon fill="none" stroke="black" points="1107.74,-195.37 1032.66,-207.8 957.58,-195.37 986.26,-175.24 1079.06,-175.24 1107.74,-195.37"/>
|
||||
<text text-anchor="middle" x="1032.66" y="-185.53" font-family="Times,serif" font-size="12.00">torch_library</text>
|
||||
<title>node4</title>
|
||||
<polygon fill="none" stroke="black" points="1552.26,-257.8 1532.93,-273.03 1454.66,-279.8 1376.4,-273.03 1357.07,-257.8 1411.23,-245.59 1498.1,-245.59 1552.26,-257.8"/>
|
||||
<text text-anchor="middle" x="1454.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/lib64/libcuda.so</text>
|
||||
</g>
|
||||
<!-- node1->node9 -->
|
||||
<!-- node0->node4 -->
|
||||
<g id="edge4" class="edge">
|
||||
<title>node1->node9</title>
|
||||
<path fill="none" stroke="black" d="M815.25,-250.02C860.25,-237.77 933.77,-217.74 982.68,-204.42"/>
|
||||
<polygon fill="black" stroke="black" points="983.3,-207.61 992.02,-201.6 981.46,-200.85 983.3,-207.61"/>
|
||||
</g>
|
||||
<!-- node10 -->
|
||||
<g id="node6" class="node">
|
||||
<title>node10</title>
|
||||
<polygon fill="none" stroke="black" points="1159.81,-113.8 1086.89,-129.03 791.66,-135.8 496.43,-129.03 423.52,-113.8 627.82,-101.59 955.5,-101.59 1159.81,-113.8"/>
|
||||
<text text-anchor="middle" x="791.66" y="-113.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node9->node10 -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>node9->node10</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M985.62,-175.14C949.2,-164.56 898.31,-149.78 857.79,-138.01"/>
|
||||
<polygon fill="black" stroke="black" points="859.04,-134.44 848.46,-135.01 857.09,-141.16 859.04,-134.44"/>
|
||||
<title>node0->node4</title>
|
||||
<path fill="none" stroke="black" d="M1586.27,-387.39C1559.5,-362.05 1509.72,-314.92 1479.65,-286.46"/>
|
||||
<polygon fill="black" stroke="black" points="1482.13,-283.99 1472.46,-279.65 1477.31,-289.07 1482.13,-283.99"/>
|
||||
</g>
|
||||
<!-- node5 -->
|
||||
<g id="node7" class="node">
|
||||
<g id="node6" class="node">
|
||||
<title>node5</title>
|
||||
<polygon fill="none" stroke="black" points="1371.56,-123.37 1274.66,-135.8 1177.77,-123.37 1214.78,-103.24 1334.55,-103.24 1371.56,-123.37"/>
|
||||
<text text-anchor="middle" x="1274.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch_cpu_library</text>
|
||||
<polygon fill="none" stroke="black" points="1873.26,-257.8 1843.23,-273.03 1721.66,-279.8 1600.09,-273.03 1570.06,-257.8 1654.19,-245.59 1789.13,-245.59 1873.26,-257.8"/>
|
||||
<text text-anchor="middle" x="1721.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libcudart.so</text>
|
||||
</g>
|
||||
<!-- node9->node5 -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>node9->node5</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1079.61,-175.22C1120.66,-163.35 1180.2,-146.13 1222.68,-133.84"/>
|
||||
<polygon fill="black" stroke="black" points="1223.46,-136.97 1232.09,-130.83 1221.51,-130.24 1223.46,-136.97"/>
|
||||
<!-- node0->node5 -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>node0->node5</title>
|
||||
<path fill="none" stroke="black" d="M1619.76,-387.77C1628.83,-377.46 1640.53,-363.98 1650.66,-351.8 1668.32,-330.59 1687.84,-306.03 1701.94,-288.1"/>
|
||||
<polygon fill="black" stroke="black" points="1704.43,-290.59 1707.84,-280.56 1698.92,-286.27 1704.43,-290.59"/>
|
||||
</g>
|
||||
<!-- node6 -->
|
||||
<g id="node8" class="node">
|
||||
<g id="node7" class="node">
|
||||
<title>node6</title>
|
||||
<polygon fill="none" stroke="black" points="1191.4,-27.9 1114.6,-43.12 803.66,-49.9 492.72,-43.12 415.93,-27.9 631.1,-15.68 976.22,-15.68 1191.4,-27.9"/>
|
||||
<text text-anchor="middle" x="803.66" y="-27.63" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cpu.so" -Wl,--as-needed</text>
|
||||
<polygon fill="none" stroke="black" points="2231.79,-257.8 2198.1,-273.03 2061.66,-279.8 1925.23,-273.03 1891.53,-257.8 1985.95,-245.59 2137.38,-245.59 2231.79,-257.8"/>
|
||||
<text text-anchor="middle" x="2061.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libnvToolsExt.so</text>
|
||||
</g>
|
||||
<!-- node5->node6 -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>node5->node6</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1210.16,-105.31C1130.55,-91.13 994.37,-66.87 901.77,-50.38"/>
|
||||
<polygon fill="black" stroke="black" points="902.44,-46.77 891.98,-48.46 901.22,-53.66 902.44,-46.77"/>
|
||||
<!-- node0->node6 -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>node0->node6</title>
|
||||
<path fill="none" stroke="black" d="M1642.06,-393.18C1721.31,-368.56 1906.71,-310.95 2002.32,-281.24"/>
|
||||
<polygon fill="black" stroke="black" points="2003.28,-284.61 2011.79,-278.3 2001.21,-277.92 2003.28,-284.61"/>
|
||||
</g>
|
||||
<!-- node7 -->
|
||||
<g id="node9" class="node">
|
||||
<g id="node8" class="node">
|
||||
<title>node7</title>
|
||||
<polygon fill="none" stroke="black" points="1339.72,-37.46 1274.66,-49.9 1209.61,-37.46 1234.46,-17.34 1314.87,-17.34 1339.72,-37.46"/>
|
||||
<text text-anchor="middle" x="1274.66" y="-27.63" font-family="Times,serif" font-size="12.00">caffe2::mkl</text>
|
||||
<polygon fill="none" stroke="black" points="2541.44,-257.8 2512.56,-273.03 2395.66,-279.8 2278.76,-273.03 2249.89,-257.8 2330.79,-245.59 2460.54,-245.59 2541.44,-257.8"/>
|
||||
<text text-anchor="middle" x="2395.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libnvrtc.so</text>
|
||||
</g>
|
||||
<!-- node5->node7 -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>node5->node7</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1274.66,-102.95C1274.66,-91.56 1274.66,-75.07 1274.66,-60.95"/>
|
||||
<polygon fill="black" stroke="black" points="1278.16,-61.27 1274.66,-51.27 1271.16,-61.27 1278.16,-61.27"/>
|
||||
<!-- node0->node7 -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>node0->node7</title>
|
||||
<path fill="none" stroke="black" d="M1651.19,-396.45C1780.36,-373.26 2144.76,-307.85 2311.05,-277.99"/>
|
||||
<polygon fill="black" stroke="black" points="2311.47,-281.47 2320.7,-276.26 2310.24,-274.58 2311.47,-281.47"/>
|
||||
</g>
|
||||
<!-- node8 -->
|
||||
<g id="node10" class="node">
|
||||
<g id="node9" class="node">
|
||||
<title>node8</title>
|
||||
<polygon fill="none" stroke="black" points="1623.95,-41.76 1490.66,-63.8 1357.37,-41.76 1408.28,-6.09 1573.04,-6.09 1623.95,-41.76"/>
|
||||
<text text-anchor="middle" x="1490.66" y="-34.75" font-family="Times,serif" font-size="12.00">dummy</text>
|
||||
<text text-anchor="middle" x="1490.66" y="-20.5" font-family="Times,serif" font-size="12.00">(protobuf::libprotobuf)</text>
|
||||
<polygon fill="none" stroke="black" points="1642.01,-326.35 1642.01,-341.26 1620.13,-351.8 1589.19,-351.8 1567.31,-341.26 1567.31,-326.35 1589.19,-315.8 1620.13,-315.8 1642.01,-326.35"/>
|
||||
<text text-anchor="middle" x="1604.66" y="-329.53" font-family="Times,serif" font-size="12.00">fimdlp</text>
|
||||
</g>
|
||||
<!-- node5->node8 -->
|
||||
<!-- node0->node8 -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>node0->node8</title>
|
||||
<path fill="none" stroke="black" d="M1604.66,-387.5C1604.66,-380.21 1604.66,-371.53 1604.66,-363.34"/>
|
||||
<polygon fill="black" stroke="black" points="1608.16,-363.42 1604.66,-353.42 1601.16,-363.42 1608.16,-363.42"/>
|
||||
</g>
|
||||
<!-- node19 -->
|
||||
<g id="node10" class="node">
|
||||
<title>node19</title>
|
||||
<polygon fill="none" stroke="black" points="2709.74,-267.37 2634.66,-279.8 2559.58,-267.37 2588.26,-247.24 2681.06,-247.24 2709.74,-267.37"/>
|
||||
<text text-anchor="middle" x="2634.66" y="-257.53" font-family="Times,serif" font-size="12.00">torch_library</text>
|
||||
</g>
|
||||
<!-- node0->node19 -->
|
||||
<g id="edge29" class="edge">
|
||||
<title>node0->node19</title>
|
||||
<path fill="none" stroke="black" d="M1655.87,-399.32C1798.23,-383.79 2210.64,-336.94 2550.66,-279.8 2559.43,-278.33 2568.68,-276.62 2577.72,-274.86"/>
|
||||
<polygon fill="black" stroke="black" points="2578.38,-278.3 2587.5,-272.92 2577.01,-271.43 2578.38,-278.3"/>
|
||||
</g>
|
||||
<!-- node8->node1 -->
|
||||
<g id="edge9" class="edge">
|
||||
<title>node5->node8</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M1310.82,-102.76C1341.68,-90.77 1386.88,-73.21 1424.25,-58.7"/>
|
||||
<polygon fill="black" stroke="black" points="1425.01,-61.77 1433.06,-54.89 1422.47,-55.25 1425.01,-61.77"/>
|
||||
<title>node8->node1</title>
|
||||
<path fill="none" stroke="black" d="M1566.84,-331.58C1419.81,-326.72 872.06,-307.69 421.66,-279.8 401.07,-278.53 379.38,-277.02 358.03,-275.43"/>
|
||||
<polygon fill="black" stroke="black" points="358.3,-271.94 348.06,-274.67 357.77,-278.92 358.3,-271.94"/>
|
||||
</g>
|
||||
<!-- node8->node2 -->
|
||||
<g id="edge10" class="edge">
|
||||
<title>node8->node2</title>
|
||||
<path fill="none" stroke="black" d="M1566.86,-330C1445.11,-320.95 1057.97,-292.18 831.67,-275.36"/>
|
||||
<polygon fill="black" stroke="black" points="832.09,-271.89 821.86,-274.63 831.57,-278.87 832.09,-271.89"/>
|
||||
</g>
|
||||
<!-- node8->node3 -->
|
||||
<g id="edge11" class="edge">
|
||||
<title>node8->node3</title>
|
||||
<path fill="none" stroke="black" d="M1567.08,-327.31C1495.4,-316.84 1336.86,-293.67 1230.62,-278.14"/>
|
||||
<polygon fill="black" stroke="black" points="1231.44,-274.72 1221.04,-276.74 1230.42,-281.65 1231.44,-274.72"/>
|
||||
</g>
|
||||
<!-- node8->node4 -->
|
||||
<g id="edge12" class="edge">
|
||||
<title>node8->node4</title>
|
||||
<path fill="none" stroke="black" d="M1578.53,-320.61C1555.96,-310.08 1522.92,-294.66 1496.64,-282.4"/>
|
||||
<polygon fill="black" stroke="black" points="1498.12,-279.22 1487.58,-278.17 1495.16,-285.57 1498.12,-279.22"/>
|
||||
</g>
|
||||
<!-- node8->node5 -->
|
||||
<g id="edge13" class="edge">
|
||||
<title>node8->node5</title>
|
||||
<path fill="none" stroke="black" d="M1627.78,-318.97C1644.15,-309.18 1666.44,-295.84 1685.2,-284.62"/>
|
||||
<polygon fill="black" stroke="black" points="1686.83,-287.73 1693.61,-279.59 1683.23,-281.72 1686.83,-287.73"/>
|
||||
</g>
|
||||
<!-- node8->node6 -->
|
||||
<g id="edge14" class="edge">
|
||||
<title>node8->node6</title>
|
||||
<path fill="none" stroke="black" d="M1642.45,-327.02C1712.36,-316.31 1863.89,-293.1 1964.32,-277.71"/>
|
||||
<polygon fill="black" stroke="black" points="1964.84,-281.18 1974.2,-276.2 1963.78,-274.26 1964.84,-281.18"/>
|
||||
</g>
|
||||
<!-- node8->node7 -->
|
||||
<g id="edge15" class="edge">
|
||||
<title>node8->node7</title>
|
||||
<path fill="none" stroke="black" d="M1642.33,-330.01C1740.75,-322.64 2013.75,-301.7 2240.66,-279.8 2254.16,-278.5 2268.32,-277.06 2282.35,-275.58"/>
|
||||
<polygon fill="black" stroke="black" points="2282.49,-279.08 2292.06,-274.54 2281.75,-272.12 2282.49,-279.08"/>
|
||||
</g>
|
||||
<!-- node8->node19 -->
|
||||
<g id="edge16" class="edge">
|
||||
<title>node8->node19</title>
|
||||
<path fill="none" stroke="black" d="M1642.25,-332.63C1770.06,-331.64 2199.48,-324.94 2550.66,-279.8 2560.1,-278.59 2570.07,-276.92 2579.71,-275.1"/>
|
||||
<polygon fill="black" stroke="black" points="2580.21,-278.57 2589.34,-273.21 2578.86,-271.7 2580.21,-278.57"/>
|
||||
</g>
|
||||
<!-- node20 -->
|
||||
<g id="node11" class="node">
|
||||
<title>node20</title>
|
||||
<polygon fill="none" stroke="black" points="2606.81,-185.8 2533.89,-201.03 2238.66,-207.8 1943.43,-201.03 1870.52,-185.8 2074.82,-173.59 2402.5,-173.59 2606.81,-185.8"/>
|
||||
<text text-anchor="middle" x="2238.66" y="-185.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node19->node20 -->
|
||||
<g id="edge17" class="edge">
|
||||
<title>node19->node20</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2583.63,-250.21C2572.76,-248.03 2561.34,-245.79 2550.66,-243.8 2482.14,-231.05 2404.92,-217.93 2344.44,-207.93"/>
|
||||
<polygon fill="black" stroke="black" points="2345.28,-204.52 2334.84,-206.34 2344.14,-211.42 2345.28,-204.52"/>
|
||||
</g>
|
||||
<!-- node9 -->
|
||||
<g id="node12" class="node">
|
||||
<title>node9</title>
|
||||
<polygon fill="none" stroke="black" points="2542.56,-123.37 2445.66,-135.8 2348.77,-123.37 2385.78,-103.24 2505.55,-103.24 2542.56,-123.37"/>
|
||||
<text text-anchor="middle" x="2445.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch_cpu_library</text>
|
||||
</g>
|
||||
<!-- node19->node9 -->
|
||||
<g id="edge18" class="edge">
|
||||
<title>node19->node9</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2635.72,-246.84C2636.4,-227.49 2634.61,-192.58 2615.66,-171.8 2601.13,-155.87 2551.93,-141.56 2510.18,-131.84"/>
|
||||
<polygon fill="black" stroke="black" points="2511.2,-128.48 2500.67,-129.68 2509.65,-135.31 2511.2,-128.48"/>
|
||||
</g>
|
||||
<!-- node13 -->
|
||||
<g id="node16" class="node">
|
||||
<title>node13</title>
|
||||
<polygon fill="none" stroke="black" points="3056.45,-195.37 2953.66,-207.8 2850.87,-195.37 2890.13,-175.24 3017.19,-175.24 3056.45,-195.37"/>
|
||||
<text text-anchor="middle" x="2953.66" y="-185.53" font-family="Times,serif" font-size="12.00">torch_cuda_library</text>
|
||||
</g>
|
||||
<!-- node19->node13 -->
|
||||
<g id="edge22" class="edge">
|
||||
<title>node19->node13</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2685.21,-249.71C2741.11,-237.45 2831.21,-217.67 2891.42,-204.46"/>
|
||||
<polygon fill="black" stroke="black" points="2891.8,-207.96 2900.82,-202.4 2890.3,-201.13 2891.8,-207.96"/>
|
||||
</g>
|
||||
<!-- node10 -->
|
||||
<g id="node13" class="node">
|
||||
<title>node10</title>
|
||||
<polygon fill="none" stroke="black" points="2362.4,-27.9 2285.6,-43.12 1974.66,-49.9 1663.72,-43.12 1586.93,-27.9 1802.1,-15.68 2147.22,-15.68 2362.4,-27.9"/>
|
||||
<text text-anchor="middle" x="1974.66" y="-27.63" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cpu.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node9->node10 -->
|
||||
<g id="edge19" class="edge">
|
||||
<title>node9->node10</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2381.16,-105.31C2301.63,-91.15 2165.65,-66.92 2073.05,-50.43"/>
|
||||
<polygon fill="black" stroke="black" points="2073.93,-47.03 2063.48,-48.72 2072.71,-53.92 2073.93,-47.03"/>
|
||||
</g>
|
||||
<!-- node11 -->
|
||||
<g id="node14" class="node">
|
||||
<title>node11</title>
|
||||
<polygon fill="none" stroke="black" points="2510.72,-37.46 2445.66,-49.9 2380.61,-37.46 2405.46,-17.34 2485.87,-17.34 2510.72,-37.46"/>
|
||||
<text text-anchor="middle" x="2445.66" y="-27.63" font-family="Times,serif" font-size="12.00">caffe2::mkl</text>
|
||||
</g>
|
||||
<!-- node9->node11 -->
|
||||
<g id="edge20" class="edge">
|
||||
<title>node9->node11</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2445.66,-102.95C2445.66,-91.68 2445.66,-75.4 2445.66,-61.37"/>
|
||||
<polygon fill="black" stroke="black" points="2449.16,-61.78 2445.66,-51.78 2442.16,-61.78 2449.16,-61.78"/>
|
||||
</g>
|
||||
<!-- node12 -->
|
||||
<g id="node15" class="node">
|
||||
<title>node12</title>
|
||||
<polygon fill="none" stroke="black" points="2794.95,-41.76 2661.66,-63.8 2528.37,-41.76 2579.28,-6.09 2744.04,-6.09 2794.95,-41.76"/>
|
||||
<text text-anchor="middle" x="2661.66" y="-34.75" font-family="Times,serif" font-size="12.00">dummy</text>
|
||||
<text text-anchor="middle" x="2661.66" y="-20.5" font-family="Times,serif" font-size="12.00">(protobuf::libprotobuf)</text>
|
||||
</g>
|
||||
<!-- node9->node12 -->
|
||||
<g id="edge21" class="edge">
|
||||
<title>node9->node12</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2481.82,-102.76C2512.55,-90.82 2557.5,-73.36 2594.77,-58.89"/>
|
||||
<polygon fill="black" stroke="black" points="2595.6,-62.32 2603.65,-55.44 2593.06,-55.79 2595.6,-62.32"/>
|
||||
</g>
|
||||
<!-- node13->node9 -->
|
||||
<g id="edge28" class="edge">
|
||||
<title>node13->node9</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2880.59,-179.79C2799.97,-169.71 2666.42,-152.57 2551.66,-135.8 2540.2,-134.13 2528.06,-132.27 2516.24,-130.41"/>
|
||||
<polygon fill="black" stroke="black" points="2516.96,-126.98 2506.54,-128.86 2515.87,-133.89 2516.96,-126.98"/>
|
||||
</g>
|
||||
<!-- node14 -->
|
||||
<g id="node17" class="node">
|
||||
<title>node14</title>
|
||||
<polygon fill="none" stroke="black" points="3346.69,-113.8 3268.85,-129.03 2953.66,-135.8 2638.48,-129.03 2560.63,-113.8 2778.75,-101.59 3128.58,-101.59 3346.69,-113.8"/>
|
||||
<text text-anchor="middle" x="2953.66" y="-113.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cuda.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node13->node14 -->
|
||||
<g id="edge23" class="edge">
|
||||
<title>node13->node14</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2953.66,-174.97C2953.66,-167.13 2953.66,-157.01 2953.66,-147.53"/>
|
||||
<polygon fill="black" stroke="black" points="2957.16,-147.59 2953.66,-137.59 2950.16,-147.59 2957.16,-147.59"/>
|
||||
</g>
|
||||
<!-- node15 -->
|
||||
<g id="node18" class="node">
|
||||
<title>node15</title>
|
||||
<polygon fill="none" stroke="black" points="3514.74,-123.37 3439.66,-135.8 3364.58,-123.37 3393.26,-103.24 3486.06,-103.24 3514.74,-123.37"/>
|
||||
<text text-anchor="middle" x="3439.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch::cudart</text>
|
||||
</g>
|
||||
<!-- node13->node15 -->
|
||||
<g id="edge24" class="edge">
|
||||
<title>node13->node15</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3028.35,-180.51C3109.24,-171.17 3241.96,-154.78 3355.66,-135.8 3364.43,-134.34 3373.69,-132.63 3382.72,-130.88"/>
|
||||
<polygon fill="black" stroke="black" points="3383.38,-134.31 3392.51,-128.93 3382.02,-127.45 3383.38,-134.31"/>
|
||||
</g>
|
||||
<!-- node17 -->
|
||||
<g id="node20" class="node">
|
||||
<title>node17</title>
|
||||
<polygon fill="none" stroke="black" points="3716.84,-123.37 3624.66,-135.8 3532.48,-123.37 3567.69,-103.24 3681.63,-103.24 3716.84,-123.37"/>
|
||||
<text text-anchor="middle" x="3624.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch::nvtoolsext</text>
|
||||
</g>
|
||||
<!-- node13->node17 -->
|
||||
<g id="edge26" class="edge">
|
||||
<title>node13->node17</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3033.64,-183.25C3144.1,-175.14 3349.47,-158.53 3523.66,-135.8 3534.84,-134.35 3546.67,-132.57 3558.15,-130.72"/>
|
||||
<polygon fill="black" stroke="black" points="3558.68,-134.18 3567.98,-129.1 3557.54,-127.27 3558.68,-134.18"/>
|
||||
</g>
|
||||
<!-- node16 -->
|
||||
<g id="node19" class="node">
|
||||
<title>node16</title>
|
||||
<polygon fill="none" stroke="black" points="3510.78,-27.9 3496.7,-43.12 3439.66,-49.9 3382.63,-43.12 3368.54,-27.9 3408.01,-15.68 3471.31,-15.68 3510.78,-27.9"/>
|
||||
<text text-anchor="middle" x="3439.66" y="-27.63" font-family="Times,serif" font-size="12.00">CUDA::cudart</text>
|
||||
</g>
|
||||
<!-- node15->node16 -->
|
||||
<g id="edge25" class="edge">
|
||||
<title>node15->node16</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3439.66,-102.95C3439.66,-91.68 3439.66,-75.4 3439.66,-61.37"/>
|
||||
<polygon fill="black" stroke="black" points="3443.16,-61.78 3439.66,-51.78 3436.16,-61.78 3443.16,-61.78"/>
|
||||
</g>
|
||||
<!-- node18 -->
|
||||
<g id="node21" class="node">
|
||||
<title>node18</title>
|
||||
<polygon fill="none" stroke="black" points="3714.32,-27.9 3696.56,-43.12 3624.66,-49.9 3552.77,-43.12 3535.01,-27.9 3584.76,-15.68 3664.56,-15.68 3714.32,-27.9"/>
|
||||
<text text-anchor="middle" x="3624.66" y="-27.63" font-family="Times,serif" font-size="12.00">CUDA::nvToolsExt</text>
|
||||
</g>
|
||||
<!-- node17->node18 -->
|
||||
<g id="edge27" class="edge">
|
||||
<title>node17->node18</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3624.66,-102.95C3624.66,-91.68 3624.66,-75.4 3624.66,-61.37"/>
|
||||
<polygon fill="black" stroke="black" points="3628.16,-61.78 3624.66,-51.78 3621.16,-61.78 3628.16,-61.78"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 7.1 KiB After Width: | Height: | Size: 18 KiB |
@@ -5,6 +5,7 @@
|
||||
The hyperparameters defined in the algorithm are:
|
||||
|
||||
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *true*.
|
||||
- ***bisection_best*** (*boolean*): If set to *true*, the algorithm will take as *priorAccuracy* the best accuracy computed. If set to *false⁺ it will take the last accuracy as *priorAccuracy*. Default value: *false*.
|
||||
|
||||
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
|
||||
|
||||
@@ -26,4 +27,4 @@ The hyperparameters defined in the algorithm are:
|
||||
|
||||
## Operation
|
||||
|
||||
### [Algorithm](./algorithm.md)
|
||||
### [Base Algorithm](./algorithm.md)
|
||||
|
2912
docs/Doxyfile.in
Normal file
2912
docs/Doxyfile.in
Normal file
File diff suppressed because it is too large
Load Diff
@@ -105,8 +105,7 @@
|
||||
|
||||
2. $numItemsPack \leftarrow 0$
|
||||
|
||||
10. If
|
||||
$(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
|
||||
10. If $(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
|
||||
|
||||
11. $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$
|
||||
|
||||
|
BIN
docs/logo_small.png
Normal file
BIN
docs/logo_small.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
@@ -1,5 +0,0 @@
|
||||
filter = bayesnet/
|
||||
exclude-directories = build_debug/lib/
|
||||
exclude = bayesnet/utils/loguru.*
|
||||
print-summary = yes
|
||||
sort = uncovered-percent
|
@@ -1,168 +0,0 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
ArffFiles::ArffFiles() = default;
|
||||
|
||||
std::vector<std::string> ArffFiles::getLines() const
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
|
||||
unsigned long int ArffFiles::getSize() const
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassName() const
|
||||
{
|
||||
return className;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassType() const
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
|
||||
std::vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
|
||||
void ArffFiles::loadCommon(std::string fileName)
|
||||
{
|
||||
std::ifstream file(fileName);
|
||||
if (!file.is_open()) {
|
||||
throw std::invalid_argument("Unable to open file");
|
||||
}
|
||||
std::string line;
|
||||
std::string keyword;
|
||||
std::string attribute;
|
||||
std::string type;
|
||||
std::string type_w;
|
||||
while (getline(file, line)) {
|
||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
||||
std::stringstream ss(line);
|
||||
ss >> keyword >> attribute;
|
||||
type = "";
|
||||
while (ss >> type_w)
|
||||
type += type_w + " ";
|
||||
attributes.emplace_back(trim(attribute), trim(type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw std::invalid_argument("No attributes found");
|
||||
}
|
||||
|
||||
void ArffFiles::load(const std::string& fileName, bool classLast)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
if (classLast) {
|
||||
className = std::get<0>(attributes.back());
|
||||
classType = std::get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
labelIndex = static_cast<int>(attributes.size());
|
||||
} else {
|
||||
className = std::get<0>(attributes.front());
|
||||
classType = std::get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
labelIndex = 0;
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
void ArffFiles::load(const std::string& fileName, const std::string& name)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
bool found = false;
|
||||
for (int i = 0; i < attributes.size(); ++i) {
|
||||
if (attributes[i].first == name) {
|
||||
className = std::get<0>(attributes[i]);
|
||||
classType = std::get<1>(attributes[i]);
|
||||
attributes.erase(attributes.begin() + i);
|
||||
labelIndex = i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
throw std::invalid_argument("Class name not found");
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
|
||||
void ArffFiles::generateDataset(int labelIndex)
|
||||
{
|
||||
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
|
||||
auto yy = std::vector<std::string>(lines.size(), "");
|
||||
auto removeLines = std::vector<int>(); // Lines with missing values
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
std::stringstream ss(lines[i]);
|
||||
std::string value;
|
||||
int pos = 0;
|
||||
int xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
if (value == "?") {
|
||||
X[xIndex++][i] = -1;
|
||||
removeLines.push_back(i);
|
||||
} else
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto i : removeLines) {
|
||||
yy.erase(yy.begin() + i);
|
||||
for (auto& x : X) {
|
||||
x.erase(x.begin() + i);
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
|
||||
std::string ArffFiles::trim(const std::string& source)
|
||||
{
|
||||
std::string s(source);
|
||||
s.erase(0, s.find_first_not_of(" '\n\r\t"));
|
||||
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
|
||||
{
|
||||
std::vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
std::map<std::string, int> labelMap;
|
||||
int i = 0;
|
||||
for (const std::string& label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
@@ -1,32 +0,0 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class ArffFiles {
|
||||
private:
|
||||
std::vector<std::string> lines;
|
||||
std::vector<std::pair<std::string, std::string>> attributes;
|
||||
std::string className;
|
||||
std::string classType;
|
||||
std::vector<std::vector<float>> X;
|
||||
std::vector<int> y;
|
||||
void generateDataset(int);
|
||||
void loadCommon(std::string);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(const std::string&, bool = true);
|
||||
void load(const std::string&, const std::string&);
|
||||
std::vector<std::string> getLines() const;
|
||||
unsigned long int getSize() const;
|
||||
std::string getClassName() const;
|
||||
std::string getClassType() const;
|
||||
static std::string trim(const std::string&);
|
||||
std::vector<std::vector<float>>& getX();
|
||||
std::vector<int>& getY();
|
||||
std::vector<std::pair<std::string, std::string>> getAttributes() const;
|
||||
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||
};
|
||||
|
||||
#endif
|
@@ -1 +0,0 @@
|
||||
add_library(ArffFiles ArffFiles.cc)
|
Submodule lib/catch2 updated: bff6e35e2b...029fe3b460
Submodule lib/folding updated: 71d6055be4...9652853d69
2
lib/json
2
lib/json
Submodule lib/json updated: 199dea11b1...620034ecec
2009
lib/log/loguru.cpp
Normal file
2009
lib/log/loguru.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1475
lib/log/loguru.hpp
Normal file
1475
lib/log/loguru.hpp
Normal file
File diff suppressed because it is too large
Load Diff
2
lib/mdlp
2
lib/mdlp
Submodule lib/mdlp updated: 5708dc3de9...7d62d6af4a
@@ -5,16 +5,23 @@ project(bayesnet_sample)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
find_package(Torch REQUIRED)
|
||||
find_library(BayesNet NAMES BayesNet.a libBayesNet.a REQUIRED)
|
||||
find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a REQUIRED)
|
||||
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet)
|
||||
find_library(FImdlp NAMES libfimdlp.a PATHS REQUIRED)
|
||||
|
||||
message(STATUS "FImdlp=${FImdlp}")
|
||||
message(STATUS "FImdlp_INCLUDE_DIRS=${FImdlp_INCLUDE_DIRS}")
|
||||
message(STATUS "BayesNet=${BayesNet}")
|
||||
message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
|
||||
|
||||
include_directories(
|
||||
lib/Files
|
||||
lib/mdlp
|
||||
../tests/lib/Files
|
||||
lib/json/include
|
||||
/usr/local/include
|
||||
/usr/local/include/fimdlp/
|
||||
)
|
||||
|
||||
add_subdirectory(lib/Files)
|
||||
add_subdirectory(lib/mdlp)
|
||||
add_executable(bayesnet_sample sample.cc)
|
||||
target_link_libraries(bayesnet_sample ArffFiles mdlp "${TORCH_LIBRARIES}" "${BayesNet}")
|
||||
target_link_libraries(bayesnet_sample ${FImdlp} "${TORCH_LIBRARIES}" "${BayesNet}")
|
||||
add_executable(bayesnet_sample_xspode sample_xspode.cc)
|
||||
target_link_libraries(bayesnet_sample_xspode ${FImdlp} "${TORCH_LIBRARIES}" "${BayesNet}")
|
@@ -1,174 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
ArffFiles::ArffFiles() = default;
|
||||
|
||||
std::vector<std::string> ArffFiles::getLines() const
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
|
||||
unsigned long int ArffFiles::getSize() const
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassName() const
|
||||
{
|
||||
return className;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassType() const
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
|
||||
std::vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
|
||||
void ArffFiles::loadCommon(std::string fileName)
|
||||
{
|
||||
std::ifstream file(fileName);
|
||||
if (!file.is_open()) {
|
||||
throw std::invalid_argument("Unable to open file");
|
||||
}
|
||||
std::string line;
|
||||
std::string keyword;
|
||||
std::string attribute;
|
||||
std::string type;
|
||||
std::string type_w;
|
||||
while (getline(file, line)) {
|
||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
||||
std::stringstream ss(line);
|
||||
ss >> keyword >> attribute;
|
||||
type = "";
|
||||
while (ss >> type_w)
|
||||
type += type_w + " ";
|
||||
attributes.emplace_back(trim(attribute), trim(type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw std::invalid_argument("No attributes found");
|
||||
}
|
||||
|
||||
void ArffFiles::load(const std::string& fileName, bool classLast)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
if (classLast) {
|
||||
className = std::get<0>(attributes.back());
|
||||
classType = std::get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
labelIndex = static_cast<int>(attributes.size());
|
||||
} else {
|
||||
className = std::get<0>(attributes.front());
|
||||
classType = std::get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
labelIndex = 0;
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
void ArffFiles::load(const std::string& fileName, const std::string& name)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
bool found = false;
|
||||
for (int i = 0; i < attributes.size(); ++i) {
|
||||
if (attributes[i].first == name) {
|
||||
className = std::get<0>(attributes[i]);
|
||||
classType = std::get<1>(attributes[i]);
|
||||
attributes.erase(attributes.begin() + i);
|
||||
labelIndex = i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
throw std::invalid_argument("Class name not found");
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
|
||||
void ArffFiles::generateDataset(int labelIndex)
|
||||
{
|
||||
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
|
||||
auto yy = std::vector<std::string>(lines.size(), "");
|
||||
auto removeLines = std::vector<int>(); // Lines with missing values
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
std::stringstream ss(lines[i]);
|
||||
std::string value;
|
||||
int pos = 0;
|
||||
int xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
if (value == "?") {
|
||||
X[xIndex++][i] = -1;
|
||||
removeLines.push_back(i);
|
||||
} else
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto i : removeLines) {
|
||||
yy.erase(yy.begin() + i);
|
||||
for (auto& x : X) {
|
||||
x.erase(x.begin() + i);
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
|
||||
std::string ArffFiles::trim(const std::string& source)
|
||||
{
|
||||
std::string s(source);
|
||||
s.erase(0, s.find_first_not_of(" '\n\r\t"));
|
||||
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
|
||||
{
|
||||
std::vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
std::map<std::string, int> labelMap;
|
||||
int i = 0;
|
||||
for (const std::string& label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
@@ -1,38 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class ArffFiles {
|
||||
private:
|
||||
std::vector<std::string> lines;
|
||||
std::vector<std::pair<std::string, std::string>> attributes;
|
||||
std::string className;
|
||||
std::string classType;
|
||||
std::vector<std::vector<float>> X;
|
||||
std::vector<int> y;
|
||||
void generateDataset(int);
|
||||
void loadCommon(std::string);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(const std::string&, bool = true);
|
||||
void load(const std::string&, const std::string&);
|
||||
std::vector<std::string> getLines() const;
|
||||
unsigned long int getSize() const;
|
||||
std::string getClassName() const;
|
||||
std::string getClassType() const;
|
||||
static std::string trim(const std::string&);
|
||||
std::vector<std::vector<float>>& getX();
|
||||
std::vector<int>& getY();
|
||||
std::vector<std::pair<std::string, std::string>> getAttributes() const;
|
||||
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||
};
|
||||
|
||||
#endif
|
@@ -1 +0,0 @@
|
||||
add_library(ArffFiles ArffFiles.cc)
|
@@ -1,11 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(mdlp)
|
||||
|
||||
if (POLICY CMP0135)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
endif ()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
||||
|
@@ -1,222 +0,0 @@
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
|
||||
max_depth(max_depth_),
|
||||
proposed_cuts(proposed)
|
||||
{
|
||||
}
|
||||
|
||||
CPPFImdlp::CPPFImdlp() = default;
|
||||
|
||||
CPPFImdlp::~CPPFImdlp() = default;
|
||||
|
||||
size_t CPPFImdlp::compute_max_num_cut_points() const
|
||||
{
|
||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||
if (proposed_cuts == 0) {
|
||||
return numeric_limits<size_t>::max();
|
||||
}
|
||||
if (proposed_cuts < 0 || proposed_cuts > static_cast<float>(X.size())) {
|
||||
throw invalid_argument("wrong proposed num_cuts value");
|
||||
}
|
||||
if (proposed_cuts < 1)
|
||||
return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
|
||||
return static_cast<size_t>(proposed_cuts);
|
||||
}
|
||||
|
||||
void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
X = X_;
|
||||
y = y_;
|
||||
num_cut_points = compute_max_num_cut_points();
|
||||
depth = 0;
|
||||
discretizedData.clear();
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
if (X.empty() || y.empty()) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
if (min_length < 3) {
|
||||
throw invalid_argument("min_length must be greater than 2");
|
||||
}
|
||||
if (max_depth < 1) {
|
||||
throw invalid_argument("max_depth must be greater than 0");
|
||||
}
|
||||
indices = sortIndices(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
computeCutPoints(0, X.size(), 1);
|
||||
sort(cutPoints.begin(), cutPoints.end());
|
||||
if (num_cut_points > 0) {
|
||||
// Select the best (with lower entropy) cut points
|
||||
while (cutPoints.size() > num_cut_points) {
|
||||
resizeCutPoints();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t n;
|
||||
size_t m;
|
||||
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
|
||||
size_t idxNext = cut + 1 < end ? cut + 1 : cut;
|
||||
bool backWall; // true if duplicates reach beginning of the interval
|
||||
precision_t previous;
|
||||
precision_t actual;
|
||||
precision_t next;
|
||||
previous = X[indices[idxPrev]];
|
||||
actual = X[indices[cut]];
|
||||
next = X[indices[idxNext]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
// get the first equal value of X in the interval
|
||||
while (idxPrev > start && actual == previous) {
|
||||
previous = X[indices[--idxPrev]];
|
||||
}
|
||||
backWall = idxPrev == start && actual == previous;
|
||||
// get the last equal value of X in the interval
|
||||
while (idxNext < end - 1 && actual == next) {
|
||||
next = X[indices[++idxNext]];
|
||||
}
|
||||
// # of duplicates before cutpoint
|
||||
n = cut - 1 - idxPrev;
|
||||
// # of duplicates after cutpoint
|
||||
m = idxNext - cut - 1;
|
||||
// Decide which values to use
|
||||
cut = cut + (backWall ? m + 1 : -n);
|
||||
actual = X[indices[cut]];
|
||||
return { (actual + previous) / 2, cut };
|
||||
}
|
||||
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
|
||||
{
|
||||
size_t cut;
|
||||
pair<precision_t, size_t> result;
|
||||
// Check if the interval length and the depth are Ok
|
||||
if (end - start < min_length || depth_ > max_depth)
|
||||
return;
|
||||
depth = depth_ > depth ? depth_ : depth;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
result = valueCutPoint(start, cut, end);
|
||||
cut = result.second;
|
||||
cutPoints.push_back(result.first);
|
||||
computeCutPoints(start, cut, depth_ + 1);
|
||||
computeCutPoints(cut, end, depth_ + 1);
|
||||
}
|
||||
}
|
||||
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amongst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max();
|
||||
size_t elements = end - start;
|
||||
bool sameValues = true;
|
||||
precision_t entropy_left;
|
||||
precision_t entropy_right;
|
||||
precision_t minEntropy;
|
||||
// Check if all the values of the variable in the interval are the same
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
if (X[indices[idx]] != X[indices[start]]) {
|
||||
sameValues = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sameValues)
|
||||
return candidate;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / static_cast<precision_t>(elements) * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / static_cast<precision_t>(elements) * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k;
|
||||
int k1;
|
||||
int k2;
|
||||
precision_t ig;
|
||||
precision_t delta;
|
||||
precision_t ent;
|
||||
precision_t ent1;
|
||||
precision_t ent2;
|
||||
auto N = precision_t(end - start);
|
||||
k = metrics.computeNumClasses(start, end);
|
||||
k1 = metrics.computeNumClasses(start, cut);
|
||||
k2 = metrics.computeNumClasses(cut, end);
|
||||
ent = metrics.entropy(start, end);
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
||||
if (X_[i1] == X_[i2])
|
||||
return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
void CPPFImdlp::resizeCutPoints()
|
||||
{
|
||||
//Compute entropy of each of the whole cutpoint set and discards the biggest value
|
||||
precision_t maxEntropy = 0;
|
||||
precision_t entropy;
|
||||
size_t maxEntropyIdx = 0;
|
||||
size_t begin = 0;
|
||||
size_t end;
|
||||
for (size_t idx = 0; idx < cutPoints.size(); idx++) {
|
||||
end = begin;
|
||||
while (X[indices[end]] < cutPoints[idx] && end < X.size())
|
||||
end++;
|
||||
entropy = metrics.entropy(begin, end);
|
||||
if (entropy > maxEntropy) {
|
||||
maxEntropy = entropy;
|
||||
maxEntropyIdx = idx;
|
||||
}
|
||||
begin = end;
|
||||
}
|
||||
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
||||
}
|
||||
labels_t& CPPFImdlp::transform(const samples_t& data)
|
||||
{
|
||||
discretizedData.clear();
|
||||
discretizedData.reserve(data.size());
|
||||
for (const precision_t& item : data) {
|
||||
auto upper = upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
||||
discretizedData.push_back(upper - cutPoints.begin());
|
||||
}
|
||||
return discretizedData;
|
||||
}
|
||||
}
|
@@ -1,51 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
protected:
|
||||
size_t min_length = 3;
|
||||
int depth = 0;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
float proposed_cuts = 0;
|
||||
indices_t indices = indices_t();
|
||||
samples_t X = samples_t();
|
||||
labels_t y = labels_t();
|
||||
Metrics metrics = Metrics(y, indices);
|
||||
cutPoints_t cutPoints;
|
||||
size_t num_cut_points = numeric_limits<size_t>::max();
|
||||
labels_t discretizedData = labels_t();
|
||||
|
||||
static indices_t sortIndices(samples_t&, labels_t&);
|
||||
|
||||
void computeCutPoints(size_t, size_t, int);
|
||||
void resizeCutPoints();
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
size_t getCandidate(size_t, size_t);
|
||||
size_t compute_max_num_cut_points() const;
|
||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||
|
||||
public:
|
||||
CPPFImdlp();
|
||||
CPPFImdlp(size_t, int, float);
|
||||
~CPPFImdlp();
|
||||
void fit(samples_t&, labels_t&);
|
||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
||||
labels_t& transform(const samples_t&);
|
||||
inline int get_depth() const { return depth; };
|
||||
static inline string version() { return "1.1.2"; };
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Ricardo Montañana Gómez
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@@ -1,78 +0,0 @@
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_),
|
||||
numClasses(computeNumClasses(0, indices.size()))
|
||||
{
|
||||
}
|
||||
|
||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||
{
|
||||
set<int> nClasses;
|
||||
for (auto i = start; i < end; ++i) {
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
return static_cast<int>(nClasses.size());
|
||||
}
|
||||
|
||||
void Metrics::setData(const labels_t& y_, const indices_t& indices_)
|
||||
{
|
||||
indices = indices_;
|
||||
y = y_;
|
||||
numClasses = computeNumClasses(0, indices.size());
|
||||
entropyCache.clear();
|
||||
igCache.clear();
|
||||
}
|
||||
|
||||
precision_t Metrics::entropy(size_t start, size_t end)
|
||||
{
|
||||
precision_t p;
|
||||
precision_t ventropy = 0;
|
||||
int nElements = 0;
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
if (end - start < 2)
|
||||
return 0;
|
||||
if (entropyCache.find({ start, end }) != entropyCache.end()) {
|
||||
return entropyCache[{start, end}];
|
||||
}
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
nElements++;
|
||||
}
|
||||
for (auto count : counts) {
|
||||
if (count > 0) {
|
||||
p = static_cast<precision_t>(count) / static_cast<precision_t>(nElements);
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
entropyCache[{start, end}] = ventropy;
|
||||
return ventropy;
|
||||
}
|
||||
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
precision_t iGain;
|
||||
precision_t entropyInterval;
|
||||
precision_t entropyLeft;
|
||||
precision_t entropyRight;
|
||||
size_t nElementsLeft = cut - start;
|
||||
size_t nElementsRight = end - cut;
|
||||
size_t nElements = end - start;
|
||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||
return igCache[make_tuple(start, cut, end)];
|
||||
}
|
||||
entropyInterval = entropy(start, end);
|
||||
entropyLeft = entropy(start, cut);
|
||||
entropyRight = entropy(cut, end);
|
||||
iGain = entropyInterval -
|
||||
(static_cast<precision_t>(nElementsLeft) * entropyLeft +
|
||||
static_cast<precision_t>(nElementsRight) * entropyRight) /
|
||||
static_cast<precision_t>(nElements);
|
||||
igCache[make_tuple(start, cut, end)] = iGain;
|
||||
return iGain;
|
||||
}
|
||||
|
||||
}
|
@@ -1,28 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
|
||||
namespace mdlp {
|
||||
class Metrics {
|
||||
protected:
|
||||
labels_t& y;
|
||||
indices_t& indices;
|
||||
int numClasses;
|
||||
cacheEnt_t entropyCache = cacheEnt_t();
|
||||
cacheIg_t igCache = cacheIg_t();
|
||||
public:
|
||||
Metrics(labels_t&, indices_t&);
|
||||
void setData(const labels_t&, const indices_t&);
|
||||
int computeNumClasses(size_t, size_t);
|
||||
precision_t entropy(size_t, size_t);
|
||||
precision_t informationGain(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,41 +0,0 @@
|
||||
[](https://github.com/rmontanana/mdlp/actions/workflows/build.yml)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
|
||||
# mdlp
|
||||
|
||||
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
||||
|
||||
The implementation tries to mitigate the problem of different label values with the same value of the variable:
|
||||
|
||||
- Sorts the values of the variable using the label values as a tie-breaker
|
||||
- Once found a valid candidate for the split, it checks if the previous value is the same as actual one, and tries to get previous one, or next if the former is not possible.
|
||||
|
||||
Other features:
|
||||
|
||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||
- Intervals have to have more than two examples to be evaluated.
|
||||
|
||||
The algorithm returns the cut points for the variable.
|
||||
|
||||
## Sample
|
||||
|
||||
To run the sample, just execute the following commands:
|
||||
|
||||
```bash
|
||||
cd sample
|
||||
cmake -B build
|
||||
cd build
|
||||
make
|
||||
./sample -f iris -m 2
|
||||
./sample -h
|
||||
```
|
||||
|
||||
## Test
|
||||
|
||||
To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:
|
||||
|
||||
```bash
|
||||
cd tests
|
||||
./test
|
||||
```
|
@@ -1,24 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
typedef float precision_t;
|
||||
typedef vector<precision_t> samples_t;
|
||||
typedef vector<int> labels_t;
|
||||
typedef vector<size_t> indices_t;
|
||||
typedef vector<precision_t> cutPoints_t;
|
||||
typedef map<pair<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
@@ -4,9 +4,9 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.h>
|
||||
#include <ArffFiles.hpp>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <bayesnet/ensembles/BoostAODE.h>
|
||||
#include <bayesnet/ensembles/XBAODE.h>
|
||||
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
{
|
||||
@@ -57,12 +57,25 @@ int main(int argc, char* argv[])
|
||||
std::vector<std::string> features;
|
||||
std::string className;
|
||||
map<std::string, std::vector<int>> states;
|
||||
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
|
||||
auto clf = bayesnet::XBAODE(); // false for not using voting in predict
|
||||
std::cout << "Library version: " << clf.getVersion() << std::endl;
|
||||
tie(X, y, features, className, states) = loadDataset(file_name, true);
|
||||
clf.fit(X, y, features, className, states);
|
||||
torch::Tensor weights = torch::full({ X.size(1) }, 15, torch::kDouble);
|
||||
torch::Tensor dataset;
|
||||
try {
|
||||
auto yresized = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
|
||||
dataset = torch::cat({ X, yresized }, 0);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::stringstream oss;
|
||||
oss << "* Error in X and y dimensions *\n";
|
||||
oss << "X dimensions: " << dataset.sizes() << "\n";
|
||||
oss << "y dimensions: " << y.sizes();
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
clf.fit(dataset, features, className, states, weights, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto score = clf.score(X, y);
|
||||
std::cout << "File: " << file_name << " score: " << score << std::endl;
|
||||
std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
65
sample/sample_xspode.cc
Normal file
65
sample/sample_xspode.cc
Normal file
@@ -0,0 +1,65 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.hpp>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <bayesnet/ensembles/BoostAODE.h>
|
||||
#include <bayesnet/classifiers/XSPODE.h>
|
||||
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
{
|
||||
std::vector<mdlp::labels_t> Xd;
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
for (int i = 0; i < X.size(); i++) {
|
||||
fimdlp.fit(X[i], y);
|
||||
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
||||
Xd.push_back(xd);
|
||||
}
|
||||
return Xd;
|
||||
}
|
||||
tuple<std::vector<std::vector<int>>, std::vector<int>, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last)
|
||||
{
|
||||
auto handler = ArffFiles();
|
||||
handler.load(name, class_last);
|
||||
// Get Dataset X, y
|
||||
std::vector<mdlp::samples_t>& X = handler.getX();
|
||||
mdlp::labels_t y = handler.getY();
|
||||
// Get className & Features
|
||||
auto className = handler.getClassName();
|
||||
std::vector<std::string> features;
|
||||
auto attributes = handler.getAttributes();
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
|
||||
torch::Tensor Xd;
|
||||
auto states = map<std::string, std::vector<int>>();
|
||||
auto Xr = discretizeDataset(X, y);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
||||
auto item = states.at(features[i]);
|
||||
iota(begin(item), end(item), 0);
|
||||
}
|
||||
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
|
||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
return { Xr, y, features, className, states };
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
std::cerr << "Usage: " << argv[0] << " <file_name>" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::string file_name = argv[1];
|
||||
bayesnet::BaseClassifier* clf = new bayesnet::XSpode(0);
|
||||
std::cout << "Library version: " << clf->getVersion() << std::endl;
|
||||
auto [X, y, features, className, states] = loadDataset(file_name, true);
|
||||
torch::Tensor weights = torch::full({ static_cast<long>(X[0].size()) }, 1.0 / X[0].size(), torch::kDouble);
|
||||
clf->fit(X, y, features, className, states, bayesnet::Smoothing_t::ORIGINAL);
|
||||
auto score = clf->score(X, y);
|
||||
std::cout << "File: " << file_name << " Model: XSpode(0) score: " << score << std::endl;
|
||||
delete clf;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1,24 +1,33 @@
|
||||
if(ENABLE_TESTING)
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
${BayesNet_SOURCE_DIR}/tests/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||
${BayesNet_SOURCE_DIR}/lib/log
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${BayesNet_SOURCE_DIR}
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc
|
||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc
|
||||
TestUtils.cc TestBayesEnsemble.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
|
||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc TestXSPnDE.cc TestXBA2DE.cc
|
||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestXBAODE.cc TestA2DE.cc
|
||||
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc TestXSPODE.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp PRIVATE Catch2::Catch2WithMain)
|
||||
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
||||
add_test(NAME Network COMMAND TestBayesNet "[Network]")
|
||||
add_test(NAME Node COMMAND TestBayesNet "[Node]")
|
||||
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
|
||||
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
|
||||
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
|
||||
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")
|
||||
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
|
||||
add_test(NAME XSPODE COMMAND TestBayesNet "[XSPODE]")
|
||||
add_test(NAME XSPnDE COMMAND TestBayesNet "[XSPnDE]")
|
||||
add_test(NAME XBAODE COMMAND TestBayesNet "[XBAODE]")
|
||||
add_test(NAME XBA2DE COMMAND TestBayesNet "[XBA2DE]")
|
||||
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
|
||||
add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
|
||||
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
|
||||
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
|
||||
add_test(NAME Models COMMAND TestBayesNet "[Models]")
|
||||
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
|
||||
add_test(NAME Modules COMMAND TestBayesNet "[Modules]")
|
||||
add_test(NAME Network COMMAND TestBayesNet "[Network]")
|
||||
add_test(NAME Node COMMAND TestBayesNet "[Node]")
|
||||
add_test(NAME MST COMMAND TestBayesNet "[MST]")
|
||||
endif(ENABLE_TESTING)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user