Compare commits
87 Commits
block_upda
...
main
Author | SHA1 | Date | |
---|---|---|---|
43ceefd2c9 | |||
e6501502d1 | |||
d84adf6172 | |||
268a86cbe0 | |||
fc4c93b299 | |||
86f2bc44fc | |||
f0f3d9ad6e | |||
9a323cd7a3 | |||
cb949ac7e5 | |||
2c297ea15d | |||
4e4b6e67f4 | |||
82847774ee | |||
d0955d9369 | |||
2d34eb8c89 | |||
0159c397fa | |||
0bbc8328a9 | |||
35ca862eca | |||
26eb58b104 | |||
6fcc15d39a | |||
9a14133be5 | |||
59c1cf5b3b | |||
8e9090d283 | |||
02bcab01be | |||
716748e18c | |||
0b31780d39 | |||
fa26aa80f7 | |||
3eb61905fb | |||
ca0ae4dacf | |||
b34869cc61 | |||
27a3e5a5e0 | |||
684443a788 | |||
6d9badc33b | |||
015b1b0c0f | |||
7bb8e4df01 | |||
53710378de | |||
c833e9ba32 | |||
f5cb46ee29 | |||
fa35681abe | |||
b0bd0e6eee | |||
d43be27821 | |||
a2853dd2e5 | |||
0341bd5648 | |||
22b742f068 | |||
2584e8294d | |||
291ba0fb0e | |||
80043d5181 | |||
677ec5613d | |||
cccaa6e0af | |||
2e3e0e0fc2 | |||
8784a24898 | |||
54496c68f1 | |||
1f236a70db | |||
ef3c74633c | |||
7efd95095c | |||
0e24135d46 | |||
521bfd2a8e | |||
e2e0fb0c40 | |||
56b62a67cc | |||
c0fc107abb | |||
d8c44b3b7c | |||
6ab7cd2cbd | |||
b578ea8a2d | |||
9a752d15dc | |||
4992685e94 | |||
346b693c79 | |||
164c8bd90c | |||
ced29a2c2e | |||
0ec53f405f | |||
f806015b29 | |||
8115f25c06 | |||
618a1e539c | |||
7aeffba740 | |||
e79ea63afb | |||
3c7382a93a | |||
b4a222b100 | |||
23ef0cc5f7 | |||
793b2d3cd5 | |||
ae469b8146 | |||
f014928411 | |||
c4b563a339 | |||
49bb0582e6 | |||
b4c5261e01 | |||
b956aa3873 | |||
1f06631f69 | |||
6dd589bd61 | |||
6475f10825 | |||
7d906b24d1 |
39
.clang-uml
Normal file
39
.clang-uml
Normal file
@ -0,0 +1,39 @@
|
||||
compilation_database_dir: build_Debug
|
||||
output_directory: diagrams
|
||||
diagrams:
|
||||
BayesNet:
|
||||
type: class
|
||||
glob:
|
||||
- bayesnet/*.h
|
||||
- bayesnet/classifiers/*.h
|
||||
- bayesnet/classifiers/*.cc
|
||||
- bayesnet/ensembles/*.h
|
||||
- bayesnet/ensembles/*.cc
|
||||
- bayesnet/feature_selection/*.h
|
||||
- bayesnet/feature_selection/*.cc
|
||||
- bayesnet/network/*.h
|
||||
- bayesnet/network/*.cc
|
||||
- bayesnet/utils/*.h
|
||||
- bayesnet/utils/*.cc
|
||||
include:
|
||||
# Only include entities from the following namespaces
|
||||
namespaces:
|
||||
- bayesnet
|
||||
exclude:
|
||||
access:
|
||||
- private
|
||||
plantuml:
|
||||
style:
|
||||
# Apply this style to all classes in the diagram
|
||||
class: "#aliceblue;line:blue;line.dotted;text:blue"
|
||||
# Apply this style to all packages in the diagram
|
||||
package: "#back:grey"
|
||||
# Make all template instantiation relations point upwards and draw them
|
||||
# as green and dotted lines
|
||||
instantiation: "up[#green,dotted]"
|
||||
cmd: "/usr/bin/plantuml -tsvg \"diagrams/{}.puml\""
|
||||
before:
|
||||
- 'title clang-uml class diagram model'
|
||||
mermaid:
|
||||
before:
|
||||
- 'classDiagram'
|
57
.devcontainer/Dockerfile
Normal file
57
.devcontainer/Dockerfile
Normal file
@ -0,0 +1,57 @@
|
||||
FROM mcr.microsoft.com/devcontainers/cpp:ubuntu22.04
|
||||
|
||||
ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.29.3"
|
||||
|
||||
# Optionally install the cmake for vcpkg
|
||||
COPY ./reinstall-cmake.sh /tmp/
|
||||
|
||||
RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \
|
||||
chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \
|
||||
fi \
|
||||
&& rm -f /tmp/reinstall-cmake.sh
|
||||
|
||||
|
||||
# [Optional] Uncomment this section to install additional vcpkg ports.
|
||||
# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install <your-port-name-here>"
|
||||
|
||||
# [Optional] Uncomment this section to install additional packages.
|
||||
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
|
||||
&& apt-get -y install --no-install-recommends wget software-properties-common libdatetime-perl libcapture-tiny-perl libdatetime-format-dateparse-perl libgd-perl
|
||||
|
||||
# Add PPA for GCC 13
|
||||
RUN add-apt-repository ppa:ubuntu-toolchain-r/test
|
||||
RUN apt-get update
|
||||
|
||||
# Install GCC 13.1
|
||||
RUN apt-get install -y gcc-13 g++-13 doxygen
|
||||
|
||||
# Install lcov 2.1
|
||||
RUN wget --quiet https://github.com/linux-test-project/lcov/releases/download/v2.1/lcov-2.1.tar.gz && \
|
||||
tar -xvf lcov-2.1.tar.gz && \
|
||||
cd lcov-2.1 && \
|
||||
make install
|
||||
RUN rm lcov-2.1.tar.gz
|
||||
RUN rm -fr lcov-2.1
|
||||
|
||||
# Install Miniconda
|
||||
RUN mkdir -p /opt/conda
|
||||
RUN wget --quiet "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh" -O /opt/conda/miniconda.sh && \
|
||||
bash /opt/conda/miniconda.sh -b -p /opt/miniconda
|
||||
|
||||
# Add conda to PATH
|
||||
ENV PATH=/opt/miniconda/bin:$PATH
|
||||
|
||||
# add CXX and CC to the environment with gcc 13
|
||||
ENV CXX=/usr/bin/g++-13
|
||||
ENV CC=/usr/bin/gcc-13
|
||||
|
||||
# link the last gcov version
|
||||
RUN rm /usr/bin/gcov
|
||||
RUN ln -s /usr/bin/gcov-13 /usr/bin/gcov
|
||||
|
||||
# change ownership of /opt/miniconda to vscode user
|
||||
RUN chown -R vscode:vscode /opt/miniconda
|
||||
|
||||
USER vscode
|
||||
RUN conda init
|
||||
RUN conda install -y -c conda-forge yaml pytorch
|
37
.devcontainer/devcontainer.json
Normal file
37
.devcontainer/devcontainer.json
Normal file
@ -0,0 +1,37 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/cpp
|
||||
{
|
||||
"name": "C++",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile"
|
||||
},
|
||||
// "features": {
|
||||
// "ghcr.io/devcontainers/features/conda:1": {}
|
||||
// }
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
// Use 'postCreateCommand' to run commands after the container is created.
|
||||
"postCreateCommand": "make release && make debug && echo 'Done!'",
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
"customizations": {
|
||||
// Configure properties specific to VS Code.
|
||||
"vscode": {
|
||||
"settings": {},
|
||||
"extensions": [
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.cpptools-extension-pack",
|
||||
"ms-vscode.cpptools-themes",
|
||||
"ms-vscode.cmake-tools",
|
||||
"ms-azuretools.vscode-docker",
|
||||
"jbenden.c-cpp-flylint",
|
||||
"matepek.vscode-catch2-test-adapter",
|
||||
"GitHub.copilot"
|
||||
]
|
||||
}
|
||||
}
|
||||
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "root"
|
||||
}
|
59
.devcontainer/reinstall-cmake.sh
Normal file
59
.devcontainer/reinstall-cmake.sh
Normal file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env bash
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information.
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
#
|
||||
set -e
|
||||
|
||||
CMAKE_VERSION=${1:-"none"}
|
||||
|
||||
if [ "${CMAKE_VERSION}" = "none" ]; then
|
||||
echo "No CMake version specified, skipping CMake reinstallation"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Cleanup temporary directory and associated files when exiting the script.
|
||||
cleanup() {
|
||||
EXIT_CODE=$?
|
||||
set +e
|
||||
if [[ -n "${TMP_DIR}" ]]; then
|
||||
echo "Executing cleanup of tmp files"
|
||||
rm -Rf "${TMP_DIR}"
|
||||
fi
|
||||
exit $EXIT_CODE
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
|
||||
echo "Installing CMake..."
|
||||
apt-get -y purge --auto-remove cmake
|
||||
mkdir -p /opt/cmake
|
||||
|
||||
architecture=$(dpkg --print-architecture)
|
||||
case "${architecture}" in
|
||||
arm64)
|
||||
ARCH=aarch64 ;;
|
||||
amd64)
|
||||
ARCH=x86_64 ;;
|
||||
*)
|
||||
echo "Unsupported architecture ${architecture}."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
CMAKE_BINARY_NAME="cmake-${CMAKE_VERSION}-linux-${ARCH}.sh"
|
||||
CMAKE_CHECKSUM_NAME="cmake-${CMAKE_VERSION}-SHA-256.txt"
|
||||
TMP_DIR=$(mktemp -d -t cmake-XXXXXXXXXX)
|
||||
|
||||
echo "${TMP_DIR}"
|
||||
cd "${TMP_DIR}"
|
||||
|
||||
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_BINARY_NAME}" -O
|
||||
curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_CHECKSUM_NAME}" -O
|
||||
|
||||
sha256sum -c --ignore-missing "${CMAKE_CHECKSUM_NAME}"
|
||||
sh "${TMP_DIR}/${CMAKE_BINARY_NAME}" --prefix=/opt/cmake --skip-license
|
||||
|
||||
ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
|
||||
ln -s /opt/cmake/bin/ctest /usr/local/bin/ctest
|
12
.github/dependabot.yml
vendored
Normal file
12
.github/dependabot.yml
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for more information:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
# https://containers.dev/guide/dependabot
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "devcontainers"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: weekly
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -39,4 +39,9 @@ cmake-build*/**
|
||||
puml/**
|
||||
.vscode/settings.json
|
||||
sample/build
|
||||
**/.DS_Store
|
||||
docs/manual
|
||||
docs/man3
|
||||
docs/man
|
||||
docs/Doxyfile
|
||||
|
||||
|
21
.gitmodules
vendored
21
.gitmodules
vendored
@ -1,13 +1,3 @@
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
main = main
|
||||
update = merge
|
||||
[submodule "lib/catch2"]
|
||||
path = lib/catch2
|
||||
main = v2.x
|
||||
update = merge
|
||||
url = https://github.com/catchorg/Catch2.git
|
||||
[submodule "lib/json"]
|
||||
path = lib/json
|
||||
url = https://github.com/nlohmann/json.git
|
||||
@ -18,3 +8,14 @@
|
||||
url = https://github.com/rmontanana/folding
|
||||
main = main
|
||||
update = merge
|
||||
[submodule "tests/lib/catch2"]
|
||||
path = tests/lib/catch2
|
||||
url = https://github.com/catchorg/Catch2.git
|
||||
main = main
|
||||
update = merge
|
||||
[submodule "tests/lib/Files"]
|
||||
path = tests/lib/Files
|
||||
url = https://github.com/rmontanana/ArffFiles
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
|
4
.sonarlint/connectedMode.json
Normal file
4
.sonarlint/connectedMode.json
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"sonarCloudOrganization": "rmontanana",
|
||||
"projectKey": "rmontanana_BayesNet"
|
||||
}
|
6
.vscode/launch.json
vendored
6
.vscode/launch.json
vendored
@ -14,11 +14,11 @@
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "test",
|
||||
"program": "${workspaceFolder}/build_debug/tests/TestBayesNet",
|
||||
"program": "${workspaceFolder}/build_Debug/tests/TestBayesNet",
|
||||
"args": [
|
||||
"Block Update"
|
||||
"No features selected"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build_debug/tests"
|
||||
"cwd": "${workspaceFolder}/build_Debug/tests"
|
||||
},
|
||||
{
|
||||
"name": "(gdb) Launch",
|
||||
|
59
CHANGELOG.md
59
CHANGELOG.md
@ -5,7 +5,57 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [unreleased]
|
||||
## [Unreleased]
|
||||
|
||||
## [1.0.6] 2024-11-23
|
||||
|
||||
### Fixed
|
||||
|
||||
- Prevent existing edges to be added to the network in the `add_edge` method.
|
||||
- Don't allow to add nodes or edges on already fiited networks.
|
||||
- Number of threads spawned
|
||||
- Network class tests
|
||||
|
||||
### Added
|
||||
|
||||
- Library logo generated with <https://openart.ai> to README.md
|
||||
- Link to the coverage report in the README.md coverage label.
|
||||
- *convergence_best* hyperparameter to the BoostAODE class, to control the way the prior accuracy is computed if convergence is set. Default value is *false*.
|
||||
- SPnDE model.
|
||||
- A2DE model.
|
||||
- BoostA2DE model.
|
||||
- A2DE & SPnDE tests.
|
||||
- Add tests to reach 99% of coverage.
|
||||
- Add tests to check the correct version of the mdlp, folding and json libraries.
|
||||
- Library documentation generated with Doxygen.
|
||||
- Link to documentation in the README.md.
|
||||
- Three types of smoothing the Bayesian Network ORIGINAL, LAPLACE and CESTNIK.
|
||||
|
||||
### Internal
|
||||
|
||||
- Fixed doxygen optional dependency
|
||||
- Add env parallel variable to Makefile
|
||||
- Add CountingSemaphore class to manage the number of threads spawned.
|
||||
- Ignore CUDA language in CMake CodeCoverage module.
|
||||
- Update mdlp library as a git submodule.
|
||||
- Create library ShuffleArffFile to limit the number of samples with a parameter and shuffle them.
|
||||
- Refactor catch2 library location to test/lib
|
||||
- Refactor loadDataset function in tests.
|
||||
- Remove conditionalEdgeWeights method in BayesMetrics.
|
||||
- Refactor Coverage Report generation.
|
||||
- Add devcontainer to work on apple silicon.
|
||||
- Change build cmake folder names to Debug & Release.
|
||||
- Add a Makefile target (doc) to generate the documentation.
|
||||
- Add a Makefile target (doc-install) to install the documentation.
|
||||
|
||||
### Libraries versions
|
||||
|
||||
- mdlp: 2.0.1
|
||||
- Folding: 1.1.0
|
||||
- json: 3.11
|
||||
- ArffFiles: 1.1.0
|
||||
|
||||
## [1.0.5] 2024-04-20
|
||||
|
||||
### Added
|
||||
|
||||
@ -16,6 +66,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Badges of coverage and code quality (codacy) in README.md. Coverage badge is updated with *make viewcoverage*
|
||||
- Tests to reach 97% of coverage.
|
||||
- Copyright header to source files.
|
||||
- Diagrams to README.md: UML class diagram & dependency diagram
|
||||
- Action to create diagrams to Makefile: *make diagrams*
|
||||
|
||||
### Changed
|
||||
|
||||
@ -23,6 +75,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- The worse model count in BoostAODE is reset to 0 every time a new model produces better accuracy, so the tolerance of the model is meant to be the number of **consecutive** models that produce worse accuracy.
|
||||
- Default hyperparameter values in BoostAODE: bisection is true, maxTolerance is 3, convergence is true
|
||||
|
||||
### Removed
|
||||
|
||||
- The 'predict_single' hyperparameter from the BoostAODE class.
|
||||
- The 'repeatSparent' hyperparameter from the BoostAODE class.
|
||||
|
||||
## [1.0.4] 2024-03-06
|
||||
|
||||
### Added
|
||||
|
5
CMakeGraphVizOptions.cmake
Normal file
5
CMakeGraphVizOptions.cmake
Normal file
@ -0,0 +1,5 @@
|
||||
# Set the default graph title
|
||||
set(GRAPHVIZ_GRAPH_NAME "BayesNet dependency graph")
|
||||
|
||||
set(GRAPHVIZ_SHARED_LIBS OFF)
|
||||
set(GRAPHVIZ_STATIC_LIBS ON)
|
@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(BayesNet
|
||||
VERSION 1.0.4.1
|
||||
VERSION 1.0.6
|
||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||
LANGUAGES CXX
|
||||
@ -25,8 +25,12 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fprofile-arcs -ftest-coverage -fno-elide-constructors")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-default-inline")
|
||||
endif()
|
||||
|
||||
# Options
|
||||
# -------
|
||||
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
||||
@ -45,11 +49,12 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(CODE_COVERAGE ON)
|
||||
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
|
||||
|
||||
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
message(STATUS "Languages=${LANGUAGES}")
|
||||
if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE("Code coverage enabled")
|
||||
MESSAGE(STATUS "Code coverage enabled")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
|
||||
@ -59,21 +64,21 @@ endif (ENABLE_CLANG_TIDY)
|
||||
|
||||
# External libraries - dependencies of BayesNet
|
||||
# ---------------------------------------------
|
||||
|
||||
# include(FetchContent)
|
||||
add_git_submodule("lib/mdlp")
|
||||
add_git_submodule("lib/json")
|
||||
add_git_submodule("lib/mdlp")
|
||||
|
||||
# Subdirectories
|
||||
# --------------
|
||||
add_subdirectory(config)
|
||||
add_subdirectory(lib/Files)
|
||||
add_subdirectory(bayesnet)
|
||||
|
||||
# Testing
|
||||
# -------
|
||||
if (ENABLE_TESTING)
|
||||
MESSAGE("Testing enabled")
|
||||
add_git_submodule("lib/catch2")
|
||||
MESSAGE(STATUS "Testing enabled")
|
||||
add_subdirectory(tests/lib/catch2)
|
||||
include(CTest)
|
||||
add_subdirectory(tests)
|
||||
endif (ENABLE_TESTING)
|
||||
@ -85,4 +90,19 @@ install(TARGETS BayesNet
|
||||
LIBRARY DESTINATION lib
|
||||
CONFIGURATIONS Release)
|
||||
install(DIRECTORY bayesnet/ DESTINATION include/bayesnet FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h")
|
||||
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h DESTINATION include/bayesnet CONFIGURATIONS Release)
|
||||
install(FILES ${CMAKE_BINARY_DIR}/configured_files/include/bayesnet/config.h DESTINATION include/bayesnet CONFIGURATIONS Release)
|
||||
|
||||
# Documentation
|
||||
# -------------
|
||||
find_package(Doxygen)
|
||||
if (Doxygen_FOUND)
|
||||
set(DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs)
|
||||
set(doxyfile_in ${DOC_DIR}/Doxyfile.in)
|
||||
set(doxyfile ${DOC_DIR}/Doxyfile)
|
||||
configure_file(${doxyfile_in} ${doxyfile} @ONLY)
|
||||
doxygen_add_docs(doxygen
|
||||
WORKING_DIRECTORY ${DOC_DIR}
|
||||
CONFIG_FILE ${doxyfile})
|
||||
else (Doxygen_FOUND)
|
||||
MESSAGE("* Doxygen not found")
|
||||
endif (Doxygen_FOUND)
|
||||
|
109
Makefile
109
Makefile
@ -1,12 +1,22 @@
|
||||
SHELL := /bin/bash
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: viewcoverage coverage setup help install uninstall buildr buildd test clean debug release sample updatebadge
|
||||
.PHONY: viewcoverage coverage setup help install uninstall diagrams buildr buildd test clean debug release sample updatebadge doc doc-install
|
||||
|
||||
f_release = build_release
|
||||
f_debug = build_debug
|
||||
f_release = build_Release
|
||||
f_debug = build_Debug
|
||||
f_diagrams = diagrams
|
||||
app_targets = BayesNet
|
||||
test_targets = TestBayesNet
|
||||
n_procs = -j 16
|
||||
clang-uml = clang-uml
|
||||
plantuml = plantuml
|
||||
lcov = lcov
|
||||
genhtml = genhtml
|
||||
dot = dot
|
||||
docsrcdir = docs/manual
|
||||
mansrcdir = docs/man3
|
||||
mandestdir = /usr/local/share/man
|
||||
sed_command_link = 's/e">LCOV -/e"><a href="https:\/\/rmontanana.github.io\/bayesnet">Back to manual<\/a> LCOV -/g'
|
||||
sed_command_diagram = 's/Diagram"/Diagram" width="100%" height="100%" /g'
|
||||
|
||||
define ClearTests
|
||||
@for t in $(test_targets); do \
|
||||
@ -31,17 +41,27 @@ setup: ## Install dependencies for tests and coverage
|
||||
pip install gcovr; \
|
||||
sudo dnf install lcov;\
|
||||
fi
|
||||
@echo "* You should install plantuml & graphviz for the diagrams"
|
||||
|
||||
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
|
||||
diagrams: ## Create an UML class diagram & dependency of the project (diagrams/BayesNet.png)
|
||||
@which $(plantuml) || (echo ">>> Please install plantuml"; exit 1)
|
||||
@which $(dot) || (echo ">>> Please install graphviz"; exit 1)
|
||||
@which $(clang-uml) || (echo ">>> Please install clang-uml"; exit 1)
|
||||
@export PLANTUML_LIMIT_SIZE=16384
|
||||
@echo ">>> Creating UML class diagram of the project...";
|
||||
@$(clang-uml) -p
|
||||
@cd $(f_diagrams); \
|
||||
$(plantuml) -tsvg BayesNet.puml
|
||||
@echo ">>> Creating dependency graph diagram of the project...";
|
||||
$(MAKE) debug
|
||||
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
|
||||
cd $(f_debug) && cmake .. --graphviz=dependency.dot
|
||||
@$(dot) -Tsvg $(f_debug)/dependency.dot.BayesNet -o $(f_diagrams)/dependency.svg
|
||||
|
||||
buildd: ## Build the debug targets
|
||||
cmake --build $(f_debug) -t $(app_targets) $(n_procs)
|
||||
cmake --build $(f_debug) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
|
||||
|
||||
buildr: ## Build the release targets
|
||||
cmake --build $(f_release) -t $(app_targets) $(n_procs)
|
||||
cmake --build $(f_release) -t $(app_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
|
||||
|
||||
clean: ## Clean the tests info
|
||||
@echo ">>> Cleaning Debug BayesNet tests...";
|
||||
@ -83,9 +103,9 @@ sample: ## Build sample
|
||||
|
||||
opt = ""
|
||||
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
|
||||
@echo ">>> Running BayesNet & Platform tests...";
|
||||
@echo ">>> Running BayesNet tests...";
|
||||
@$(MAKE) clean
|
||||
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
|
||||
@cmake --build $(f_debug) -t $(test_targets) --parallel $(CMAKE_BUILD_PARALLEL_LEVEL)
|
||||
@for t in $(test_targets); do \
|
||||
echo ">>> Running $$t...";\
|
||||
if [ -f $(f_debug)/tests/$$t ]; then \
|
||||
@ -98,31 +118,70 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
|
||||
|
||||
coverage: ## Run tests and generate coverage report (build/index.html)
|
||||
@echo ">>> Building tests with coverage..."
|
||||
@$(MAKE) test
|
||||
@gcovr $(f_debug)/tests
|
||||
@echo ">>> Done";
|
||||
|
||||
viewcoverage: ## Run tests, generate coverage report and upload it to codecov (build/index.html)
|
||||
@echo ">>> Building tests with coverage..."
|
||||
@$(MAKE) coverage
|
||||
@which $(lcov) || (echo ">>ease install lcov"; exit 1)
|
||||
@if [ ! -f $(f_debug)/tests/coverage.info ] ; then $(MAKE) test ; fi
|
||||
@echo ">>> Building report..."
|
||||
@cd $(f_debug)/tests; \
|
||||
lcov --directory . --capture --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
lcov --remove coverage.info 'bayesnet/utils/loguru.*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
genhtml coverage.info --output-directory coverage >/dev/null 2>&1;
|
||||
$(lcov) --directory CMakeFiles --capture --demangle-cpp --ignore-errors source,source --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info '/usr/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'lib/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'libtorch/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'tests/*' --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info 'bayesnet/utils/loguru.*' --ignore-errors unused --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --remove coverage.info '/opt/miniconda/*' --ignore-errors unused --output-file coverage.info >/dev/null 2>&1; \
|
||||
$(lcov) --summary coverage.info
|
||||
@$(MAKE) updatebadge
|
||||
@xdg-open $(f_debug)/tests/coverage/index.html || open $(f_debug)/tests/coverage/index.html 2>/dev/null
|
||||
@echo ">>> Done";
|
||||
|
||||
viewcoverage: ## View the html coverage report
|
||||
@which $(genhtml) >/dev/null || (echo ">>> Please install lcov (genhtml not found)"; exit 1)
|
||||
@if [ ! -d $(docsrcdir)/coverage ]; then mkdir -p $(docsrcdir)/coverage; fi
|
||||
@if [ ! -f $(f_debug)/tests/coverage.info ]; then \
|
||||
echo ">>> No coverage.info file found. Run make coverage first!"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@$(genhtml) $(f_debug)/tests/coverage.info --demangle-cpp --output-directory $(docsrcdir)/coverage --title "BayesNet Coverage Report" -s -k -f --legend >/dev/null 2>&1;
|
||||
@xdg-open $(docsrcdir)/coverage/index.html || open $(docsrcdir)/coverage/index.html 2>/dev/null
|
||||
@echo ">>> Done";
|
||||
|
||||
updatebadge: ## Update the coverage badge in README.md
|
||||
@which python || (echo ">>> Please install python"; exit 1)
|
||||
@if [ ! -f $(f_debug)/tests/coverage.info ]; then \
|
||||
echo ">>> No coverage.info file found. Run make coverage first!"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@echo ">>> Updating coverage badge..."
|
||||
@env python update_coverage.py $(f_debug)/tests
|
||||
@echo ">>> Done";
|
||||
|
||||
doc: ## Generate documentation
|
||||
@echo ">>> Generating documentation..."
|
||||
@cmake --build $(f_release) -t doxygen
|
||||
@cp -rp diagrams $(docsrcdir)
|
||||
@
|
||||
@if [ "$(shell uname)" = "Darwin" ]; then \
|
||||
sed -i "" $(sed_command_link) $(docsrcdir)/coverage/index.html ; \
|
||||
sed -i "" $(sed_command_diagram) $(docsrcdir)/index.html ; \
|
||||
else \
|
||||
sed -i $(sed_command_link) $(docsrcdir)/coverage/index.html ; \
|
||||
sed -i $(sed_command_diagram) $(docsrcdir)/index.html ; \
|
||||
fi
|
||||
@echo ">>> Done";
|
||||
|
||||
docdir = ""
|
||||
doc-install: ## Install documentation
|
||||
@echo ">>> Installing documentation..."
|
||||
@if [ "$(docdir)" = "" ]; then \
|
||||
echo "docdir parameter has to be set when calling doc-install, i.e. docdir=../bayesnet_help"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@if [ ! -d $(docdir) ]; then \
|
||||
@$(MAKE) doc; \
|
||||
fi
|
||||
@cp -rp $(docsrcdir)/* $(docdir)
|
||||
@sudo cp -rp $(mansrcdir) $(mandestdir)
|
||||
@echo ">>> Done";
|
||||
|
||||
help: ## Show help message
|
||||
@IFS=$$'\n' ; \
|
||||
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||
|
63
README.md
63
README.md
@ -1,13 +1,16 @@
|
||||
# BayesNet
|
||||
# <img src="logo.png" alt="logo" width="50"/> BayesNet
|
||||
|
||||
![C++](https://img.shields.io/badge/c++-%2300599C.svg?style=flat&logo=c%2B%2B&logoColor=white)
|
||||
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](<https://opensource.org/licenses/MIT>)
|
||||
![Gitea Release](https://img.shields.io/gitea/v/release/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000)
|
||||
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/cf3e0ac71d764650b1bf4d8d00d303b1)](https://app.codacy.com/gh/Doctorado-ML/BayesNet/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
||||
[![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
|
||||
![Static Badge](https://img.shields.io/badge/Coverage-97,2%25-green)
|
||||
[![Coverage Badge](https://img.shields.io/badge/Coverage-99,1%25-green)](html/index.html)
|
||||
[![DOI](https://zenodo.org/badge/667782806.svg)](https://doi.org/10.5281/zenodo.14210344)
|
||||
|
||||
Bayesian Network Classifiers using libtorch from scratch
|
||||
Bayesian Network Classifiers library
|
||||
|
||||
## Dependencies
|
||||
|
||||
@ -20,6 +23,12 @@ unzip libtorch-shared-with-deps-latest.zips
|
||||
|
||||
## Setup
|
||||
|
||||
### Getting the code
|
||||
|
||||
```bash
|
||||
git clone --recurse-submodules https://github.com/doctorado-ml/bayesnet
|
||||
```
|
||||
|
||||
### Release
|
||||
|
||||
```bash
|
||||
@ -33,7 +42,13 @@ sudo make install
|
||||
```bash
|
||||
make debug
|
||||
make test
|
||||
```
|
||||
|
||||
### Coverage
|
||||
|
||||
```bash
|
||||
make coverage
|
||||
make viewcoverage
|
||||
```
|
||||
|
||||
### Sample app
|
||||
@ -47,4 +62,44 @@ make sample fname=tests/data/glass.arff
|
||||
|
||||
## Models
|
||||
|
||||
### [BoostAODE](docs/BoostAODE.md)
|
||||
#### - TAN
|
||||
|
||||
#### - KDB
|
||||
|
||||
#### - SPODE
|
||||
|
||||
#### - SPnDE
|
||||
|
||||
#### - AODE
|
||||
|
||||
#### - A2DE
|
||||
|
||||
#### - [BoostAODE](docs/BoostAODE.md)
|
||||
|
||||
#### - BoostA2DE
|
||||
|
||||
### With Local Discretization
|
||||
|
||||
#### - TANLd
|
||||
|
||||
#### - KDBLd
|
||||
|
||||
#### - SPODELd
|
||||
|
||||
#### - AODELd
|
||||
|
||||
## Documentation
|
||||
|
||||
### [Manual](https://rmontanana.github.io/bayesnet/)
|
||||
|
||||
### [Coverage report](https://rmontanana.github.io/bayesnet/coverage/index.html)
|
||||
|
||||
## Diagrams
|
||||
|
||||
### UML Class Diagram
|
||||
|
||||
![BayesNet UML Class Diagram](diagrams/BayesNet.svg)
|
||||
|
||||
### Dependency Diagram
|
||||
|
||||
![BayesNet Dependency Diagram](diagrams/dependency.svg)
|
||||
|
@ -8,16 +8,18 @@
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "bayesnet/network/Network.h"
|
||||
|
||||
namespace bayesnet {
|
||||
enum status_t { NORMAL, WARNING, ERROR };
|
||||
class BaseClassifier {
|
||||
public:
|
||||
// X is nxm std::vector, y is nx1 std::vector
|
||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
// X is nxm tensor, y is nx1 tensor
|
||||
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) = 0;
|
||||
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||
virtual ~BaseClassifier() = default;
|
||||
torch::Tensor virtual predict(torch::Tensor& X) = 0;
|
||||
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
|
||||
@ -39,7 +41,7 @@ namespace bayesnet {
|
||||
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
||||
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
||||
protected:
|
||||
virtual void trainModel(const torch::Tensor& weights) = 0;
|
||||
virtual void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) = 0;
|
||||
std::vector<std::string> validHyperparameters;
|
||||
};
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
${BayesNet_SOURCE_DIR}/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${BayesNet_SOURCE_DIR}
|
||||
@ -10,4 +9,4 @@ include_directories(
|
||||
file(GLOB_RECURSE Sources "*.cc")
|
||||
|
||||
add_library(BayesNet ${Sources})
|
||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
||||
target_link_libraries(BayesNet fimdlp "${TORCH_LIBRARIES}")
|
||||
|
@ -11,7 +11,7 @@
|
||||
namespace bayesnet {
|
||||
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
|
||||
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
||||
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
this->features = features;
|
||||
this->className = className;
|
||||
@ -23,7 +23,7 @@ namespace bayesnet {
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
model.initialize();
|
||||
buildModel(weights);
|
||||
trainModel(weights);
|
||||
trainModel(weights, smoothing);
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
@ -41,20 +41,20 @@ namespace bayesnet {
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
}
|
||||
void Classifier::trainModel(const torch::Tensor& weights)
|
||||
void Classifier::trainModel(const torch::Tensor& weights, Smoothing_t smoothing)
|
||||
{
|
||||
model.fit(dataset, weights, features, className, states);
|
||||
model.fit(dataset, weights, features, className, states, smoothing);
|
||||
}
|
||||
// X is nxm where n is the number of features and m the number of samples
|
||||
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
dataset = X;
|
||||
buildDataset(y);
|
||||
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
// X is nxm where n is the number of features and m the number of samples
|
||||
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
|
||||
for (int i = 0; i < X.size(); ++i) {
|
||||
@ -63,18 +63,18 @@ namespace bayesnet {
|
||||
auto ytmp = torch::tensor(y, torch::kInt32);
|
||||
buildDataset(ytmp);
|
||||
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
this->dataset = dataset;
|
||||
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
this->dataset = dataset;
|
||||
return build(features, className, states, weights);
|
||||
return build(features, className, states, weights, smoothing);
|
||||
}
|
||||
void Classifier::checkFitParameters()
|
||||
{
|
||||
|
@ -8,7 +8,6 @@
|
||||
#define CLASSIFIER_H
|
||||
#include <torch/torch.h>
|
||||
#include "bayesnet/utils/BayesMetrics.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
#include "bayesnet/BaseClassifier.h"
|
||||
|
||||
namespace bayesnet {
|
||||
@ -16,10 +15,10 @@ namespace bayesnet {
|
||||
public:
|
||||
Classifier(Network model);
|
||||
virtual ~Classifier() = default;
|
||||
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override;
|
||||
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void addNodes();
|
||||
int getNumberOfNodes() const override;
|
||||
int getNumberOfEdges() const override;
|
||||
@ -51,10 +50,10 @@ namespace bayesnet {
|
||||
std::vector<std::string> notes; // Used to store messages occurred during the fit process
|
||||
void checkFitParameters();
|
||||
virtual void buildModel(const torch::Tensor& weights) = 0;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void buildDataset(torch::Tensor& y);
|
||||
private:
|
||||
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
namespace bayesnet {
|
||||
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
|
||||
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
@ -19,7 +19,7 @@ namespace bayesnet {
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
|
||||
KDB::fit(dataset, features, className, states);
|
||||
KDB::fit(dataset, features, className, states, smoothing);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ namespace bayesnet {
|
||||
public:
|
||||
explicit KDBLd(int k);
|
||||
virtual ~KDBLd() = default;
|
||||
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
|
@ -4,7 +4,6 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.h>
|
||||
#include "Proposal.h"
|
||||
|
||||
namespace bayesnet {
|
||||
@ -54,8 +53,7 @@ namespace bayesnet {
|
||||
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
|
||||
}
|
||||
}
|
||||
auto arff = ArffFiles();
|
||||
auto yxv = arff.factorize(yJoinParents);
|
||||
auto yxv = factorize(yJoinParents);
|
||||
auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
|
||||
auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
|
||||
discretizers[feature]->fit(xvf, yxv);
|
||||
@ -72,7 +70,7 @@ namespace bayesnet {
|
||||
states[pFeatures[index]] = xStates;
|
||||
}
|
||||
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
|
||||
model.fit(pDataset, weights, pFeatures, pClassName, states);
|
||||
model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::ORIGINAL);
|
||||
}
|
||||
return states;
|
||||
}
|
||||
@ -113,4 +111,19 @@ namespace bayesnet {
|
||||
}
|
||||
return Xtd;
|
||||
}
|
||||
std::vector<int> Proposal::factorize(const std::vector<std::string>& labels_t)
|
||||
{
|
||||
std::vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
std::map<std::string, int> labelMap;
|
||||
int i = 0;
|
||||
for (const std::string& label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
bool allDigits = std::all_of(label.begin(), label.end(), ::isdigit);
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
||||
}
|
@ -27,6 +27,7 @@ namespace bayesnet {
|
||||
torch::Tensor y; // y discrete nx1 tensor
|
||||
map<std::string, mdlp::CPPFImdlp*> discretizers;
|
||||
private:
|
||||
std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||
torch::Tensor& pDataset; // (n+1)xm tensor
|
||||
std::vector<std::string>& pFeatures;
|
||||
std::string& pClassName;
|
||||
|
@ -8,25 +8,25 @@
|
||||
|
||||
namespace bayesnet {
|
||||
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
|
||||
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
Xf = X_;
|
||||
y = y_;
|
||||
return commonFit(features_, className_, states_);
|
||||
return commonFit(features_, className_, states_, smoothing);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
if (!torch::is_floating_point(dataset)) {
|
||||
throw std::runtime_error("Dataset must be a floating point tensor");
|
||||
}
|
||||
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
|
||||
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
|
||||
return commonFit(features_, className_, states_);
|
||||
return commonFit(features_, className_, states_, smoothing);
|
||||
}
|
||||
|
||||
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
features = features_;
|
||||
className = className_;
|
||||
@ -34,7 +34,7 @@ namespace bayesnet {
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
|
||||
SPODE::fit(dataset, features, className, states);
|
||||
SPODE::fit(dataset, features, className, states, smoothing);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
}
|
||||
|
@ -14,10 +14,10 @@ namespace bayesnet {
|
||||
public:
|
||||
explicit SPODELd(int root);
|
||||
virtual ~SPODELd() = default;
|
||||
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states);
|
||||
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
|
||||
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
};
|
||||
|
38
bayesnet/classifiers/SPnDE.cc
Normal file
38
bayesnet/classifiers/SPnDE.cc
Normal file
@ -0,0 +1,38 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "SPnDE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
SPnDE::SPnDE(std::vector<int> parents) : Classifier(Network()), parents(parents) {}
|
||||
|
||||
void SPnDE::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
// 0. Add all nodes to the model
|
||||
addNodes();
|
||||
std::vector<int> attributes;
|
||||
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
|
||||
if (std::find(parents.begin(), parents.end(), i) == parents.end()) {
|
||||
attributes.push_back(i);
|
||||
}
|
||||
}
|
||||
// 1. Add edges from the class node to all other nodes
|
||||
// 2. Add edges from the parents nodes to all other nodes
|
||||
for (const auto& attribute : attributes) {
|
||||
model.addEdge(className, features[attribute]);
|
||||
for (const auto& root : parents) {
|
||||
|
||||
model.addEdge(features[root], features[attribute]);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<std::string> SPnDE::graph(const std::string& name) const
|
||||
{
|
||||
return model.graph(name);
|
||||
}
|
||||
|
||||
}
|
26
bayesnet/classifiers/SPnDE.h
Normal file
26
bayesnet/classifiers/SPnDE.h
Normal file
@ -0,0 +1,26 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef SPnDE_H
|
||||
#define SPnDE_H
|
||||
#include <vector>
|
||||
#include "Classifier.h"
|
||||
|
||||
namespace bayesnet {
|
||||
class SPnDE : public Classifier {
|
||||
public:
|
||||
explicit SPnDE(std::vector<int> parents);
|
||||
virtual ~SPnDE() = default;
|
||||
std::vector<std::string> graph(const std::string& name = "SPnDE") const override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
private:
|
||||
std::vector<int> parents;
|
||||
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
@ -8,7 +8,7 @@
|
||||
|
||||
namespace bayesnet {
|
||||
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
|
||||
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
@ -19,7 +19,7 @@ namespace bayesnet {
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
|
||||
TAN::fit(dataset, features, className, states);
|
||||
TAN::fit(dataset, features, className, states, smoothing);
|
||||
states = localDiscretizationProposal(states, model);
|
||||
return *this;
|
||||
|
||||
|
@ -15,10 +15,9 @@ namespace bayesnet {
|
||||
public:
|
||||
TANLd();
|
||||
virtual ~TANLd() = default;
|
||||
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
|
||||
std::vector<std::string> graph(const std::string& name = "TAN") const override;
|
||||
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "TANLd") const override;
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
static inline std::string version() { return "0.0.1"; };
|
||||
};
|
||||
}
|
||||
#endif // !TANLD_H
|
40
bayesnet/ensembles/A2DE.cc
Normal file
40
bayesnet/ensembles/A2DE.cc
Normal file
@ -0,0 +1,40 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "A2DE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
A2DE::A2DE(bool predict_voting) : Ensemble(predict_voting)
|
||||
{
|
||||
validHyperparameters = { "predict_voting" };
|
||||
}
|
||||
void A2DE::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void A2DE::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
models.clear();
|
||||
significanceModels.clear();
|
||||
for (int i = 0; i < features.size() - 1; ++i) {
|
||||
for (int j = i + 1; j < features.size(); ++j) {
|
||||
auto model = std::make_unique<SPnDE>(std::vector<int>({ i, j }));
|
||||
models.push_back(std::move(model));
|
||||
}
|
||||
}
|
||||
n_models = static_cast<unsigned>(models.size());
|
||||
significanceModels = std::vector<double>(n_models, 1.0);
|
||||
}
|
||||
std::vector<std::string> A2DE::graph(const std::string& title) const
|
||||
{
|
||||
return Ensemble::graph(title);
|
||||
}
|
||||
}
|
22
bayesnet/ensembles/A2DE.h
Normal file
22
bayesnet/ensembles/A2DE.h
Normal file
@ -0,0 +1,22 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef A2DE_H
|
||||
#define A2DE_H
|
||||
#include "bayesnet/classifiers/SPnDE.h"
|
||||
#include "Ensemble.h"
|
||||
namespace bayesnet {
|
||||
class A2DE : public Ensemble {
|
||||
public:
|
||||
A2DE(bool predict_voting = false);
|
||||
virtual ~A2DE() {};
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
std::vector<std::string> graph(const std::string& title = "A2DE") const override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
};
|
||||
}
|
||||
#endif
|
@ -10,7 +10,7 @@ namespace bayesnet {
|
||||
AODELd::AODELd(bool predict_voting) : Ensemble(predict_voting), Proposal(dataset, features, className)
|
||||
{
|
||||
}
|
||||
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
|
||||
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||
{
|
||||
checkInput(X_, y_);
|
||||
features = features_;
|
||||
@ -20,8 +20,9 @@ namespace bayesnet {
|
||||
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
|
||||
states = fit_local_discretization(y);
|
||||
// We have discretized the input data
|
||||
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
|
||||
Ensemble::fit(dataset, features, className, states);
|
||||
// 1st we need to fit the model to build the normal AODE structure, Ensemble::fit
|
||||
// calls buildModel to initialize the base models
|
||||
Ensemble::fit(dataset, features, className, states, smoothing);
|
||||
return *this;
|
||||
|
||||
}
|
||||
@ -34,10 +35,10 @@ namespace bayesnet {
|
||||
n_models = models.size();
|
||||
significanceModels = std::vector<double>(n_models, 1.0);
|
||||
}
|
||||
void AODELd::trainModel(const torch::Tensor& weights)
|
||||
void AODELd::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
for (const auto& model : models) {
|
||||
model->fit(Xf, y, features, className, states);
|
||||
model->fit(Xf, y, features, className, states, smoothing);
|
||||
}
|
||||
}
|
||||
std::vector<std::string> AODELd::graph(const std::string& name) const
|
||||
|
@ -15,10 +15,10 @@ namespace bayesnet {
|
||||
public:
|
||||
AODELd(bool predict_voting = true);
|
||||
virtual ~AODELd() = default;
|
||||
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
|
||||
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing) override;
|
||||
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
};
|
||||
}
|
||||
|
246
bayesnet/ensembles/Boost.cc
Normal file
246
bayesnet/ensembles/Boost.cc
Normal file
@ -0,0 +1,246 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
#include <folding.hpp>
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "Boost.h"
|
||||
|
||||
namespace bayesnet {
|
||||
Boost::Boost(bool predict_voting) : Ensemble(predict_voting)
|
||||
{
|
||||
validHyperparameters = { "order", "convergence", "convergence_best", "bisection", "threshold", "maxTolerance",
|
||||
"predict_voting", "select_features", "block_update" };
|
||||
}
|
||||
void Boost::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("order")) {
|
||||
std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
|
||||
order_algorithm = hyperparameters["order"];
|
||||
if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC + ", " + Orders.RAND + "]");
|
||||
}
|
||||
hyperparameters.erase("order");
|
||||
}
|
||||
if (hyperparameters.contains("convergence")) {
|
||||
convergence = hyperparameters["convergence"];
|
||||
hyperparameters.erase("convergence");
|
||||
}
|
||||
if (hyperparameters.contains("convergence_best")) {
|
||||
convergence_best = hyperparameters["convergence_best"];
|
||||
hyperparameters.erase("convergence_best");
|
||||
}
|
||||
if (hyperparameters.contains("bisection")) {
|
||||
bisection = hyperparameters["bisection"];
|
||||
hyperparameters.erase("bisection");
|
||||
}
|
||||
if (hyperparameters.contains("threshold")) {
|
||||
threshold = hyperparameters["threshold"];
|
||||
hyperparameters.erase("threshold");
|
||||
}
|
||||
if (hyperparameters.contains("maxTolerance")) {
|
||||
maxTolerance = hyperparameters["maxTolerance"];
|
||||
if (maxTolerance < 1 || maxTolerance > 4)
|
||||
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]");
|
||||
hyperparameters.erase("maxTolerance");
|
||||
}
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
if (hyperparameters.contains("select_features")) {
|
||||
auto selectedAlgorithm = hyperparameters["select_features"];
|
||||
std::vector<std::string> algos = { SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF };
|
||||
selectFeatures = true;
|
||||
select_features_algorithm = selectedAlgorithm;
|
||||
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " + SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
|
||||
}
|
||||
hyperparameters.erase("select_features");
|
||||
}
|
||||
if (hyperparameters.contains("block_update")) {
|
||||
block_update = hyperparameters["block_update"];
|
||||
hyperparameters.erase("block_update");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
void Boost::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
// Models shall be built in trainModel
|
||||
models.clear();
|
||||
significanceModels.clear();
|
||||
n_models = 0;
|
||||
// Prepare the validation dataset
|
||||
auto y_ = dataset.index({ -1, "..." });
|
||||
if (convergence) {
|
||||
// Prepare train & validation sets from train data
|
||||
auto fold = folding::StratifiedKFold(5, y_, 271);
|
||||
auto [train, test] = fold.getFold(0);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
// Get train and validation sets
|
||||
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
|
||||
y_train = dataset.index({ -1, train_t });
|
||||
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
|
||||
y_test = dataset.index({ -1, test_t });
|
||||
dataset = X_train;
|
||||
m = X_train.size(1);
|
||||
auto n_classes = states.at(className).size();
|
||||
// Build dataset with train data
|
||||
buildDataset(y_train);
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
} else {
|
||||
// Use all data to train
|
||||
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
||||
y_train = y_;
|
||||
}
|
||||
}
|
||||
std::vector<int> Boost::featureSelection(torch::Tensor& weights_)
|
||||
{
|
||||
int maxFeatures = 0;
|
||||
if (select_features_algorithm == SelectFeatures.CFS) {
|
||||
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||
} else if (select_features_algorithm == SelectFeatures.IWSS) {
|
||||
if (threshold < 0 || threshold >0.5) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]");
|
||||
}
|
||||
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
} else if (select_features_algorithm == SelectFeatures.FCBF) {
|
||||
if (threshold < 1e-7 || threshold > 1) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]");
|
||||
}
|
||||
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
}
|
||||
featureSelector->fit();
|
||||
auto featuresUsed = featureSelector->getFeatures();
|
||||
delete featureSelector;
|
||||
return featuresUsed;
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> Boost::update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
|
||||
{
|
||||
bool terminate = false;
|
||||
double alpha_t = 0;
|
||||
auto mask_wrong = ypred != ytrain;
|
||||
auto mask_right = ypred == ytrain;
|
||||
auto masked_weights = weights * mask_wrong.to(weights.dtype());
|
||||
double epsilon_t = masked_weights.sum().item<double>();
|
||||
if (epsilon_t > 0.5) {
|
||||
// Inverse the weights policy (plot ln(wt))
|
||||
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
|
||||
// learner is better than random guess" (Zhi-Hua Zhou, 2012)
|
||||
terminate = true;
|
||||
} else {
|
||||
double wt = (1 - epsilon_t) / epsilon_t;
|
||||
alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
|
||||
// Step 3.2: Update weights for next classifier
|
||||
// Step 3.2.1: Update weights of wrong samples
|
||||
weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights;
|
||||
// Step 3.2.2: Update weights of right samples
|
||||
weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights;
|
||||
// Step 3.3: Normalise the weights
|
||||
double totalWeights = torch::sum(weights).item<double>();
|
||||
weights = weights / totalWeights;
|
||||
}
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> Boost::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
|
||||
{
|
||||
/* Update Block algorithm
|
||||
k = # of models in block
|
||||
n_models = # of models in ensemble to make predictions
|
||||
n_models_bak = # models saved
|
||||
models = vector of models to make predictions
|
||||
models_bak = models not used to make predictions
|
||||
significances_bak = backup of significances vector
|
||||
|
||||
Case list
|
||||
A) k = 1, n_models = 1 => n = 0 , n_models = n + k
|
||||
B) k = 1, n_models = n + 1 => n_models = n + k
|
||||
C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
|
||||
D) k > 1, n_models = k => n = 0, n_models = n + k
|
||||
E) k > 1, n_models = k + n => n_models = n + k
|
||||
|
||||
A, D) n=0, k > 0, n_models == k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Don’t move any classifiers out of models
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Don’t restore any classifiers to models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
|
||||
B, C, E) n > 0, k > 0, n_models == n + k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Move first n classifiers to models_bak
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Insert classifiers in models_bak to be the first n models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
*/
|
||||
//
|
||||
// Make predict with only the last k models
|
||||
//
|
||||
std::unique_ptr<Classifier> model;
|
||||
std::vector<std::unique_ptr<Classifier>> models_bak;
|
||||
// 1. n_models_bak <- n_models 2. significances_bak <- significances
|
||||
auto significance_bak = significanceModels;
|
||||
auto n_models_bak = n_models;
|
||||
// 3. significances = vector(k, 1)
|
||||
significanceModels = std::vector<double>(k, 1.0);
|
||||
// 4. Move first n classifiers to models_bak
|
||||
// backup the first n_models - k models (if n_models == k, don't backup any)
|
||||
for (int i = 0; i < n_models - k; ++i) {
|
||||
model = std::move(models[0]);
|
||||
models.erase(models.begin());
|
||||
models_bak.push_back(std::move(model));
|
||||
}
|
||||
assert(models.size() == k);
|
||||
// 5. n_models <- k
|
||||
n_models = k;
|
||||
// 6. Make prediction, compute alpha, update weights
|
||||
auto ypred = predict(X_train);
|
||||
//
|
||||
// Update weights
|
||||
//
|
||||
double alpha_t;
|
||||
bool terminate;
|
||||
std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
|
||||
//
|
||||
// Restore the models if needed
|
||||
//
|
||||
// 7. Insert classifiers in models_bak to be the first n models
|
||||
// if n_models_bak == k, don't restore any, because none of them were moved
|
||||
if (k != n_models_bak) {
|
||||
// Insert in the same order as they were extracted
|
||||
int bak_size = models_bak.size();
|
||||
for (int i = 0; i < bak_size; ++i) {
|
||||
model = std::move(models_bak[bak_size - 1 - i]);
|
||||
models_bak.erase(models_bak.end() - 1);
|
||||
models.insert(models.begin(), std::move(model));
|
||||
}
|
||||
}
|
||||
// 8. significances <- significances_bak
|
||||
significanceModels = significance_bak;
|
||||
//
|
||||
// Update the significance of the last k models
|
||||
//
|
||||
// 9. Update last k significances
|
||||
for (int i = 0; i < k; ++i) {
|
||||
significanceModels[n_models_bak - k + i] = alpha_t;
|
||||
}
|
||||
// 10. n_models <- n_models_bak
|
||||
n_models = n_models_bak;
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
}
|
52
bayesnet/ensembles/Boost.h
Normal file
52
bayesnet/ensembles/Boost.h
Normal file
@ -0,0 +1,52 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BOOST_H
|
||||
#define BOOST_H
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <torch/torch.h>
|
||||
#include "Ensemble.h"
|
||||
#include "bayesnet/feature_selection/FeatureSelect.h"
|
||||
namespace bayesnet {
|
||||
const struct {
|
||||
std::string CFS = "CFS";
|
||||
std::string FCBF = "FCBF";
|
||||
std::string IWSS = "IWSS";
|
||||
}SelectFeatures;
|
||||
const struct {
|
||||
std::string ASC = "asc";
|
||||
std::string DESC = "desc";
|
||||
std::string RAND = "rand";
|
||||
}Orders;
|
||||
class Boost : public Ensemble {
|
||||
public:
|
||||
explicit Boost(bool predict_voting = false);
|
||||
virtual ~Boost() = default;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
protected:
|
||||
std::vector<int> featureSelection(torch::Tensor& weights_);
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights);
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
|
||||
torch::Tensor X_train, y_train, X_test, y_test;
|
||||
// Hyperparameters
|
||||
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
||||
int maxTolerance = 3;
|
||||
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
||||
bool convergence = true; //if true, stop when the model does not improve
|
||||
bool convergence_best = false; // wether to keep the best accuracy to the moment or the last accuracy as prior accuracy
|
||||
bool selectFeatures = false; // if true, use feature selection
|
||||
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
||||
FeatureSelect* featureSelector = nullptr;
|
||||
double threshold = -1;
|
||||
bool block_update = false;
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
170
bayesnet/ensembles/BoostA2DE.cc
Normal file
170
bayesnet/ensembles/BoostA2DE.cc
Normal file
@ -0,0 +1,170 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <set>
|
||||
#include <functional>
|
||||
#include <limits.h>
|
||||
#include <tuple>
|
||||
#include <folding.hpp>
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "BoostA2DE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
BoostA2DE::BoostA2DE(bool predict_voting) : Boost(predict_voting)
|
||||
{
|
||||
}
|
||||
std::vector<int> BoostA2DE::initializeModels(const Smoothing_t smoothing)
|
||||
{
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
if (featuresSelected.size() < 2) {
|
||||
notes.push_back("No features selected in initialization");
|
||||
status = ERROR;
|
||||
return std::vector<int>();
|
||||
}
|
||||
for (int i = 0; i < featuresSelected.size() - 1; i++) {
|
||||
for (int j = i + 1; j < featuresSelected.size(); j++) {
|
||||
auto parents = { featuresSelected[i], featuresSelected[j] };
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPnDE>(parents);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
n_models++;
|
||||
}
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void BoostA2DE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
//
|
||||
// Logging setup
|
||||
//
|
||||
// loguru::set_thread_name("BoostA2DE");
|
||||
// loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
|
||||
// loguru::add_file("boostA2DE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
||||
|
||||
// Algorithm based on the adaboost algorithm for classification
|
||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||
fitted = true;
|
||||
double alpha_t = 0;
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
if (featuresUsed.size() == 0) {
|
||||
return;
|
||||
}
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
significanceModels[i] = alpha_t;
|
||||
}
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||
// Variables to control the accuracy finish condition
|
||||
double priorAccuracy = 0.0;
|
||||
double improvement = 1.0;
|
||||
double convergence_threshold = 1e-4;
|
||||
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||
// Step 0: Set the finish condition
|
||||
// epsilon sub t > 0.5 => inverse the weights policy
|
||||
// validation error is not decreasing
|
||||
// run out of features
|
||||
bool ascending = order_algorithm == Orders.ASC;
|
||||
std::mt19937 g{ 173 };
|
||||
std::vector<std::pair<int, int>> pairSelection;
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
pairSelection = metrics.SelectKPairs(weights_, featuresUsed, ascending, 0); // Get all the pairs sorted
|
||||
if (order_algorithm == Orders.RAND) {
|
||||
std::shuffle(pairSelection.begin(), pairSelection.end(), g);
|
||||
}
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
while (counter++ < k && pairSelection.size() > 0) {
|
||||
auto feature_pair = pairSelection[0];
|
||||
pairSelection.erase(pairSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPnDE>(std::vector<int>({ feature_pair.first, feature_pair.second }));
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
}
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
numItemsPack++;
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
// VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
|
||||
}
|
||||
if (block_update) {
|
||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||
}
|
||||
if (convergence && !finished) {
|
||||
auto y_val_predict = predict(X_test);
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
} else {
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (improvement < convergence_threshold) {
|
||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
} else {
|
||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
if (convergence_best) {
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
} else {
|
||||
// Keep the last accuray obtained as the prior accuracy
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
}
|
||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
||||
finished = finished || tolerance > maxTolerance || pairSelection.size() == 0;
|
||||
}
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||
for (int i = 0; i < numItemsPack; ++i) {
|
||||
significanceModels.pop_back();
|
||||
models.pop_back();
|
||||
n_models--;
|
||||
}
|
||||
} else {
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (pairSelection.size() > 0) {
|
||||
notes.push_back("Pairs not used in train: " + std::to_string(pairSelection.size()));
|
||||
status = WARNING;
|
||||
}
|
||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||
}
|
||||
std::vector<std::string> BoostA2DE::graph(const std::string& title) const
|
||||
{
|
||||
return Ensemble::graph(title);
|
||||
}
|
||||
}
|
25
bayesnet/ensembles/BoostA2DE.h
Normal file
25
bayesnet/ensembles/BoostA2DE.h
Normal file
@ -0,0 +1,25 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BOOSTA2DE_H
|
||||
#define BOOSTA2DE_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "bayesnet/classifiers/SPnDE.h"
|
||||
#include "Boost.h"
|
||||
namespace bayesnet {
|
||||
class BoostA2DE : public Boost {
|
||||
public:
|
||||
explicit BoostA2DE(bool predict_voting = false);
|
||||
virtual ~BoostA2DE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "BoostA2DE") const override;
|
||||
protected:
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
};
|
||||
}
|
||||
#endif
|
@ -4,276 +4,41 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <random>
|
||||
#include <set>
|
||||
#include <functional>
|
||||
#include <limits.h>
|
||||
#include <tuple>
|
||||
#include <folding.hpp>
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "BoostAODE.h"
|
||||
|
||||
#include "bayesnet/utils/loguru.cpp"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
BoostAODE::BoostAODE(bool predict_voting) : Ensemble(predict_voting)
|
||||
BoostAODE::BoostAODE(bool predict_voting) : Boost(predict_voting)
|
||||
{
|
||||
validHyperparameters = {
|
||||
"maxModels", "bisection", "order", "convergence", "threshold",
|
||||
"select_features", "maxTolerance", "predict_voting", "block_update"
|
||||
};
|
||||
|
||||
}
|
||||
void BoostAODE::buildModel(const torch::Tensor& weights)
|
||||
std::vector<int> BoostAODE::initializeModels(const Smoothing_t smoothing)
|
||||
{
|
||||
// Models shall be built in trainModel
|
||||
models.clear();
|
||||
significanceModels.clear();
|
||||
n_models = 0;
|
||||
// Prepare the validation dataset
|
||||
auto y_ = dataset.index({ -1, "..." });
|
||||
if (convergence) {
|
||||
// Prepare train & validation sets from train data
|
||||
auto fold = folding::StratifiedKFold(5, y_, 271);
|
||||
auto [train, test] = fold.getFold(0);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
// Get train and validation sets
|
||||
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
|
||||
y_train = dataset.index({ -1, train_t });
|
||||
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
|
||||
y_test = dataset.index({ -1, test_t });
|
||||
dataset = X_train;
|
||||
m = X_train.size(1);
|
||||
auto n_classes = states.at(className).size();
|
||||
// Build dataset with train data
|
||||
buildDataset(y_train);
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
} else {
|
||||
// Use all data to train
|
||||
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
|
||||
y_train = y_;
|
||||
}
|
||||
}
|
||||
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
|
||||
{
|
||||
auto hyperparameters = hyperparameters_;
|
||||
if (hyperparameters.contains("order")) {
|
||||
std::vector<std::string> algos = { Orders.ASC, Orders.DESC, Orders.RAND };
|
||||
order_algorithm = hyperparameters["order"];
|
||||
if (std::find(algos.begin(), algos.end(), order_algorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid order algorithm, valid values [" + Orders.ASC + ", " + Orders.DESC + ", " + Orders.RAND + "]");
|
||||
}
|
||||
hyperparameters.erase("order");
|
||||
}
|
||||
if (hyperparameters.contains("convergence")) {
|
||||
convergence = hyperparameters["convergence"];
|
||||
hyperparameters.erase("convergence");
|
||||
}
|
||||
if (hyperparameters.contains("bisection")) {
|
||||
bisection = hyperparameters["bisection"];
|
||||
hyperparameters.erase("bisection");
|
||||
}
|
||||
if (hyperparameters.contains("threshold")) {
|
||||
threshold = hyperparameters["threshold"];
|
||||
hyperparameters.erase("threshold");
|
||||
}
|
||||
if (hyperparameters.contains("maxTolerance")) {
|
||||
maxTolerance = hyperparameters["maxTolerance"];
|
||||
if (maxTolerance < 1 || maxTolerance > 4)
|
||||
throw std::invalid_argument("Invalid maxTolerance value, must be greater in [1, 4]");
|
||||
hyperparameters.erase("maxTolerance");
|
||||
}
|
||||
if (hyperparameters.contains("predict_voting")) {
|
||||
predict_voting = hyperparameters["predict_voting"];
|
||||
hyperparameters.erase("predict_voting");
|
||||
}
|
||||
if (hyperparameters.contains("select_features")) {
|
||||
auto selectedAlgorithm = hyperparameters["select_features"];
|
||||
std::vector<std::string> algos = { SelectFeatures.IWSS, SelectFeatures.CFS, SelectFeatures.FCBF };
|
||||
selectFeatures = true;
|
||||
select_features_algorithm = selectedAlgorithm;
|
||||
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
|
||||
throw std::invalid_argument("Invalid selectFeatures value, valid values [" + SelectFeatures.IWSS + ", " + SelectFeatures.CFS + ", " + SelectFeatures.FCBF + "]");
|
||||
}
|
||||
hyperparameters.erase("select_features");
|
||||
}
|
||||
if (hyperparameters.contains("block_update")) {
|
||||
block_update = hyperparameters["block_update"];
|
||||
hyperparameters.erase("block_update");
|
||||
}
|
||||
Classifier::setHyperparameters(hyperparameters);
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights(torch::Tensor& ytrain, torch::Tensor& ypred, torch::Tensor& weights)
|
||||
{
|
||||
bool terminate = false;
|
||||
double alpha_t = 0;
|
||||
auto mask_wrong = ypred != ytrain;
|
||||
auto mask_right = ypred == ytrain;
|
||||
auto masked_weights = weights * mask_wrong.to(weights.dtype());
|
||||
double epsilon_t = masked_weights.sum().item<double>();
|
||||
if (epsilon_t > 0.5) {
|
||||
// Inverse the weights policy (plot ln(wt))
|
||||
// "In each round of AdaBoost, there is a sanity check to ensure that the current base
|
||||
// learner is better than random guess" (Zhi-Hua Zhou, 2012)
|
||||
terminate = true;
|
||||
} else {
|
||||
double wt = (1 - epsilon_t) / epsilon_t;
|
||||
alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
|
||||
// Step 3.2: Update weights for next classifier
|
||||
// Step 3.2.1: Update weights of wrong samples
|
||||
weights += mask_wrong.to(weights.dtype()) * exp(alpha_t) * weights;
|
||||
// Step 3.2.2: Update weights of right samples
|
||||
weights += mask_right.to(weights.dtype()) * exp(-alpha_t) * weights;
|
||||
// Step 3.3: Normalise the weights
|
||||
double totalWeights = torch::sum(weights).item<double>();
|
||||
weights = weights / totalWeights;
|
||||
}
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::tuple<torch::Tensor&, double, bool> BoostAODE::update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights)
|
||||
{
|
||||
/* Update Block algorithm
|
||||
k = # of models in block
|
||||
n_models = # of models in ensemble to make predictions
|
||||
n_models_bak = # models saved
|
||||
models = vector of models to make predictions
|
||||
models_bak = models not used to make predictions
|
||||
significances_bak = backup of significances vector
|
||||
|
||||
Case list
|
||||
A) k = 1, n_models = 1 => n = 0 , n_models = n + k
|
||||
B) k = 1, n_models = n + 1 => n_models = n + k
|
||||
C) k > 1, n_models = k + 1 => n= 1, n_models = n + k
|
||||
D) k > 1, n_models = k => n = 0, n_models = n + k
|
||||
E) k > 1, n_models = k + n => n_models = n + k
|
||||
|
||||
A, D) n=0, k > 0, n_models == k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Don’t move any classifiers out of models
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Don’t restore any classifiers to models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
|
||||
B, C, E) n > 0, k > 0, n_models == n + k
|
||||
1. n_models_bak <- n_models
|
||||
2. significances_bak <- significances
|
||||
3. significances = vector(k, 1)
|
||||
4. Move first n classifiers to models_bak
|
||||
5. n_models <- k
|
||||
6. Make prediction, compute alpha, update weights
|
||||
7. Insert classifiers in models_bak to be the first n models
|
||||
8. significances <- significances_bak
|
||||
9. Update last k significances
|
||||
10. n_models <- n_models_bak
|
||||
*/
|
||||
//
|
||||
// Make predict with only the last k models
|
||||
//
|
||||
std::unique_ptr<Classifier> model;
|
||||
std::vector<std::unique_ptr<Classifier>> models_bak;
|
||||
// 1. n_models_bak <- n_models 2. significances_bak <- significances
|
||||
auto significance_bak = significanceModels;
|
||||
auto n_models_bak = n_models;
|
||||
// 3. significances = vector(k, 1)
|
||||
significanceModels = std::vector<double>(k, 1.0);
|
||||
// 4. Move first n classifiers to models_bak
|
||||
// backup the first n_models - k models (if n_models == k, don't backup any)
|
||||
VLOG_SCOPE_F(1, "upd_weights_block n_models=%d k=%d", n_models, k);
|
||||
for (int i = 0; i < n_models - k; ++i) {
|
||||
model = std::move(models[0]);
|
||||
models.erase(models.begin());
|
||||
models_bak.push_back(std::move(model));
|
||||
}
|
||||
assert(models.size() == k);
|
||||
// 5. n_models <- k
|
||||
n_models = k;
|
||||
// 6. Make prediction, compute alpha, update weights
|
||||
auto ypred = predict(X_train);
|
||||
//
|
||||
// Update weights
|
||||
//
|
||||
double alpha_t;
|
||||
bool terminate;
|
||||
std::tie(weights, alpha_t, terminate) = update_weights(y_train, ypred, weights);
|
||||
//
|
||||
// Restore the models if needed
|
||||
//
|
||||
// 7. Insert classifiers in models_bak to be the first n models
|
||||
// if n_models_bak == k, don't restore any, because none of them were moved
|
||||
if (k != n_models_bak) {
|
||||
// Insert in the same order as they were extracted
|
||||
int bak_size = models_bak.size();
|
||||
for (int i = 0; i < bak_size; ++i) {
|
||||
model = std::move(models_bak[bak_size - 1 - i]);
|
||||
models_bak.erase(models_bak.end() - 1);
|
||||
models.insert(models.begin(), std::move(model));
|
||||
}
|
||||
}
|
||||
// 8. significances <- significances_bak
|
||||
significanceModels = significance_bak;
|
||||
//
|
||||
// Update the significance of the last k models
|
||||
//
|
||||
// 9. Update last k significances
|
||||
for (int i = 0; i < k; ++i) {
|
||||
significanceModels[n_models_bak - k + i] = alpha_t;
|
||||
}
|
||||
// 10. n_models <- n_models_bak
|
||||
n_models = n_models_bak;
|
||||
return { weights, alpha_t, terminate };
|
||||
}
|
||||
std::vector<int> BoostAODE::initializeModels()
|
||||
{
|
||||
std::vector<int> featuresUsed;
|
||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||
int maxFeatures = 0;
|
||||
if (select_features_algorithm == SelectFeatures.CFS) {
|
||||
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
|
||||
} else if (select_features_algorithm == SelectFeatures.IWSS) {
|
||||
if (threshold < 0 || threshold >0.5) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.IWSS + " [0, 0.5]");
|
||||
}
|
||||
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
} else if (select_features_algorithm == SelectFeatures.FCBF) {
|
||||
if (threshold < 1e-7 || threshold > 1) {
|
||||
throw std::invalid_argument("Invalid threshold value for " + SelectFeatures.FCBF + " [1e-7, 1]");
|
||||
}
|
||||
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
|
||||
}
|
||||
featureSelector->fit();
|
||||
auto cfsFeatures = featureSelector->getFeatures();
|
||||
auto scores = featureSelector->getScores();
|
||||
for (int i = 0; i < cfsFeatures.size(); ++i) {
|
||||
LOG_F(INFO, "Feature: %d Score: %f", cfsFeatures[i], scores[i]);
|
||||
}
|
||||
for (const int& feature : cfsFeatures) {
|
||||
featuresUsed.push_back(feature);
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
for (const int& feature : featuresSelected) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
n_models++;
|
||||
}
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
delete featureSelector;
|
||||
return featuresUsed;
|
||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||
return featuresSelected;
|
||||
}
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights)
|
||||
void BoostAODE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
//
|
||||
// Logging setup
|
||||
//
|
||||
loguru::set_thread_name("BoostAODE");
|
||||
loguru::g_stderr_verbosity = loguru::Verbosity_OFF;;
|
||||
loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
||||
// loguru::set_thread_name("BoostAODE");
|
||||
// loguru::g_stderr_verbosity = loguru::Verbosity_OFF;
|
||||
// loguru::add_file("boostAODE.log", loguru::Truncate, loguru::Verbosity_MAX);
|
||||
|
||||
// Algorithm based on the adaboost algorithm for classification
|
||||
// as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
|
||||
fitted = true;
|
||||
@ -282,7 +47,7 @@ namespace bayesnet {
|
||||
bool finished = false;
|
||||
std::vector<int> featuresUsed;
|
||||
if (selectFeatures) {
|
||||
featuresUsed = initializeModels();
|
||||
featuresUsed = initializeModels(smoothing);
|
||||
auto ypred = predict(X_train);
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
// Update significance of the models
|
||||
@ -292,11 +57,6 @@ namespace bayesnet {
|
||||
if (finished) {
|
||||
return;
|
||||
}
|
||||
LOG_F(INFO, "Initial models: %d", n_models);
|
||||
LOG_F(INFO, "Significances: ");
|
||||
for (int i = 0; i < n_models; ++i) {
|
||||
LOG_F(INFO, "i=%d significance=%f", i, significanceModels[i]);
|
||||
}
|
||||
}
|
||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||
// Variables to control the accuracy finish condition
|
||||
@ -313,7 +73,6 @@ namespace bayesnet {
|
||||
while (!finished) {
|
||||
// Step 1: Build ranking with mutual information
|
||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||
VLOG_SCOPE_F(1, "featureSelection.size: %zu featuresUsed.size: %zu", featureSelection.size(), featuresUsed.size());
|
||||
if (order_algorithm == Orders.RAND) {
|
||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||
}
|
||||
@ -322,24 +81,20 @@ namespace bayesnet {
|
||||
{ return std::find(begin(featuresUsed), end(featuresUsed), x) != end(featuresUsed);}),
|
||||
end(featureSelection)
|
||||
);
|
||||
int k = pow(2, tolerance);
|
||||
int k = bisection ? pow(2, tolerance) : 1;
|
||||
int counter = 0; // The model counter of the current pack
|
||||
VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||
while (counter++ < k && featureSelection.size() > 0) {
|
||||
auto feature = featureSelection[0];
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPODE>(feature);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
auto ypred = model->predict(X_train);
|
||||
// Step 3.1: Compute the classifier amout of say
|
||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred, weights_);
|
||||
if (finished) {
|
||||
VLOG_SCOPE_F(2, "** epsilon_t > 0.5 **");
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||
numItemsPack++;
|
||||
@ -347,7 +102,7 @@ namespace bayesnet {
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(alpha_t);
|
||||
n_models++;
|
||||
VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
|
||||
// VLOG_SCOPE_F(2, "numItemsPack: %d n_models: %d featuresUsed: %zu", numItemsPack, n_models, featuresUsed.size());
|
||||
}
|
||||
if (block_update) {
|
||||
std::tie(weights_, alpha_t, finished) = update_weights_block(k, y_train, weights_);
|
||||
@ -357,37 +112,40 @@ namespace bayesnet {
|
||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||
if (priorAccuracy == 0) {
|
||||
priorAccuracy = accuracy;
|
||||
VLOG_SCOPE_F(3, "First accuracy: %f", priorAccuracy);
|
||||
} else {
|
||||
improvement = accuracy - priorAccuracy;
|
||||
}
|
||||
if (improvement < convergence_threshold) {
|
||||
VLOG_SCOPE_F(3, "(improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance++;
|
||||
} else {
|
||||
VLOG_SCOPE_F(3, "*(improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||
tolerance = 0; // Reset the counter if the model performs better
|
||||
numItemsPack = 0;
|
||||
}
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
// priorAccuracy = accuracy;
|
||||
if (convergence_best) {
|
||||
// Keep the best accuracy until now as the prior accuracy
|
||||
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||
} else {
|
||||
// Keep the last accuray obtained as the prior accuracy
|
||||
priorAccuracy = accuracy;
|
||||
}
|
||||
}
|
||||
VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||
}
|
||||
if (tolerance > maxTolerance) {
|
||||
if (numItemsPack < n_models) {
|
||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||
VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||
for (int i = 0; i < numItemsPack; ++i) {
|
||||
significanceModels.pop_back();
|
||||
models.pop_back();
|
||||
n_models--;
|
||||
}
|
||||
} else {
|
||||
VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||
}
|
||||
}
|
||||
if (featuresUsed.size() != features.size()) {
|
||||
|
@ -6,44 +6,21 @@
|
||||
|
||||
#ifndef BOOSTAODE_H
|
||||
#define BOOSTAODE_H
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "bayesnet/classifiers/SPODE.h"
|
||||
#include "bayesnet/feature_selection/FeatureSelect.h"
|
||||
#include "Ensemble.h"
|
||||
#include "Boost.h"
|
||||
|
||||
namespace bayesnet {
|
||||
struct {
|
||||
std::string CFS = "CFS";
|
||||
std::string FCBF = "FCBF";
|
||||
std::string IWSS = "IWSS";
|
||||
}SelectFeatures;
|
||||
struct {
|
||||
std::string ASC = "asc";
|
||||
std::string DESC = "desc";
|
||||
std::string RAND = "rand";
|
||||
}Orders;
|
||||
class BoostAODE : public Ensemble {
|
||||
class BoostAODE : public Boost {
|
||||
public:
|
||||
BoostAODE(bool predict_voting = false);
|
||||
explicit BoostAODE(bool predict_voting = false);
|
||||
virtual ~BoostAODE() = default;
|
||||
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||
protected:
|
||||
void buildModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
private:
|
||||
std::tuple<torch::Tensor&, double, bool> update_weights_block(int k, torch::Tensor& ytrain, torch::Tensor& weights);
|
||||
std::vector<int> initializeModels();
|
||||
torch::Tensor X_train, y_train, X_test, y_test;
|
||||
// Hyperparameters
|
||||
bool bisection = true; // if true, use bisection stratety to add k models at once to the ensemble
|
||||
int maxTolerance = 3;
|
||||
std::string order_algorithm; // order to process the KBest features asc, desc, rand
|
||||
bool convergence = true; //if true, stop when the model does not improve
|
||||
bool selectFeatures = false; // if true, use feature selection
|
||||
std::string select_features_algorithm = Orders.DESC; // Selected feature selection algorithm
|
||||
FeatureSelect* featureSelector = nullptr;
|
||||
double threshold = -1;
|
||||
bool block_update = false;
|
||||
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||
};
|
||||
}
|
||||
#endif
|
@ -3,22 +3,21 @@
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "Ensemble.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
Ensemble::Ensemble(bool predict_voting) : Classifier(Network()), n_models(0), predict_voting(predict_voting)
|
||||
{
|
||||
|
||||
};
|
||||
const std::string ENSEMBLE_NOT_FITTED = "Ensemble has not been fitted";
|
||||
void Ensemble::trainModel(const torch::Tensor& weights)
|
||||
void Ensemble::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
n_models = models.size();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
// fit with std::vectors
|
||||
models[i]->fit(dataset, features, className, states);
|
||||
models[i]->fit(dataset, features, className, states, smoothing);
|
||||
}
|
||||
}
|
||||
std::vector<int> Ensemble::compute_arg_max(std::vector<std::vector<double>>& X)
|
||||
@ -85,17 +84,9 @@ namespace bayesnet {
|
||||
{
|
||||
auto n_states = models[0]->getClassNumStates();
|
||||
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
y_pred += ypredict * significanceModels[i];
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
y_pred += ypredict * significanceModels[i];
|
||||
}
|
||||
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
|
||||
y_pred /= sum;
|
||||
@ -105,23 +96,15 @@ namespace bayesnet {
|
||||
{
|
||||
auto n_states = models[0]->getClassNumStates();
|
||||
std::vector<std::vector<double>> y_pred(X[0].size(), std::vector<double>(n_states, 0.0));
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
assert(ypredict.size() == y_pred.size());
|
||||
assert(ypredict[0].size() == y_pred[0].size());
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
// Multiply each prediction by the significance of the model and then add it to the final prediction
|
||||
for (auto j = 0; j < ypredict.size(); ++j) {
|
||||
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
|
||||
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
|
||||
}
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
auto ypredict = models[i]->predict_proba(X);
|
||||
assert(ypredict.size() == y_pred.size());
|
||||
assert(ypredict[0].size() == y_pred[0].size());
|
||||
// Multiply each prediction by the significance of the model and then add it to the final prediction
|
||||
for (auto j = 0; j < ypredict.size(); ++j) {
|
||||
std::transform(y_pred[j].begin(), y_pred[j].end(), ypredict[j].begin(), y_pred[j].begin(),
|
||||
[significanceModels = significanceModels[i]](double x, double y) { return x + y * significanceModels; });
|
||||
}
|
||||
}
|
||||
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
|
||||
//Divide each element of the prediction by the sum of the significances
|
||||
@ -141,17 +124,9 @@ namespace bayesnet {
|
||||
{
|
||||
// Build a m x n_models tensor with the predictions of each model
|
||||
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict(X);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
y_pred.index_put_({ "...", i }, ypredict);
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
auto ypredict = models[i]->predict(X);
|
||||
y_pred.index_put_({ "...", i }, ypredict);
|
||||
}
|
||||
return voting(y_pred);
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ namespace bayesnet {
|
||||
unsigned n_models;
|
||||
std::vector<std::unique_ptr<Classifier>> models;
|
||||
std::vector<double> significanceModels;
|
||||
void trainModel(const torch::Tensor& weights) override;
|
||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||
bool predict_voting;
|
||||
};
|
||||
}
|
||||
|
@ -5,20 +5,20 @@
|
||||
// ***************************************************************
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include "Network.h"
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
#include "bayesnet/utils/CountingSemaphore.h"
|
||||
#include <pthread.h>
|
||||
#include <fstream>
|
||||
namespace bayesnet {
|
||||
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
Network::Network() : fitted{ false }, classNumStates{ 0 }
|
||||
{
|
||||
}
|
||||
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
|
||||
{
|
||||
|
||||
}
|
||||
Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
|
||||
maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples)
|
||||
Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
|
||||
fitted(other.fitted), samples(other.samples)
|
||||
{
|
||||
if (samples.defined())
|
||||
samples = samples.clone();
|
||||
@ -35,16 +35,15 @@ namespace bayesnet {
|
||||
nodes.clear();
|
||||
samples = torch::Tensor();
|
||||
}
|
||||
float Network::getMaxThreads() const
|
||||
{
|
||||
return maxThreads;
|
||||
}
|
||||
torch::Tensor& Network::getSamples()
|
||||
{
|
||||
return samples;
|
||||
}
|
||||
void Network::addNode(const std::string& name)
|
||||
{
|
||||
if (fitted) {
|
||||
throw std::invalid_argument("Cannot add node to a fitted network. Initialize first.");
|
||||
}
|
||||
if (name == "") {
|
||||
throw std::invalid_argument("Node name cannot be empty");
|
||||
}
|
||||
@ -94,12 +93,21 @@ namespace bayesnet {
|
||||
}
|
||||
void Network::addEdge(const std::string& parent, const std::string& child)
|
||||
{
|
||||
if (fitted) {
|
||||
throw std::invalid_argument("Cannot add edge to a fitted network. Initialize first.");
|
||||
}
|
||||
if (nodes.find(parent) == nodes.end()) {
|
||||
throw std::invalid_argument("Parent node " + parent + " does not exist");
|
||||
}
|
||||
if (nodes.find(child) == nodes.end()) {
|
||||
throw std::invalid_argument("Child node " + child + " does not exist");
|
||||
}
|
||||
// Check if the edge is already in the graph
|
||||
for (auto& node : nodes[parent]->getChildren()) {
|
||||
if (node->getName() == child) {
|
||||
throw std::invalid_argument("Edge " + parent + " -> " + child + " already exists");
|
||||
}
|
||||
}
|
||||
// Temporarily add edge to check for cycles
|
||||
nodes[parent]->addChild(nodes[child].get());
|
||||
nodes[child]->addParent(nodes[parent].get());
|
||||
@ -155,7 +163,7 @@ namespace bayesnet {
|
||||
classNumStates = nodes.at(className)->getNumStates();
|
||||
}
|
||||
// X comes in nxm, where n is the number of features and m the number of samples
|
||||
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
|
||||
this->className = className;
|
||||
@ -164,17 +172,17 @@ namespace bayesnet {
|
||||
for (int i = 0; i < featureNames.size(); ++i) {
|
||||
auto row_feature = X.index({ i, "..." });
|
||||
}
|
||||
completeFit(states, weights);
|
||||
completeFit(states, weights, smoothing);
|
||||
}
|
||||
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
|
||||
this->className = className;
|
||||
this->samples = samples;
|
||||
completeFit(states, weights);
|
||||
completeFit(states, weights, smoothing);
|
||||
}
|
||||
// input_data comes in nxm, where n is the number of features and m the number of samples
|
||||
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
|
||||
{
|
||||
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
|
||||
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
|
||||
@ -185,17 +193,43 @@ namespace bayesnet {
|
||||
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
|
||||
}
|
||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||
completeFit(states, weights);
|
||||
completeFit(states, weights, smoothing);
|
||||
}
|
||||
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
|
||||
{
|
||||
setStates(states);
|
||||
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
|
||||
std::vector<std::thread> threads;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
const double n_samples = static_cast<double>(samples.size(1));
|
||||
auto worker = [&](std::pair<const std::string, std::unique_ptr<Node>>& node, int i) {
|
||||
std::string threadName = "FitWorker-" + std::to_string(i);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
double numStates = static_cast<double>(node.second->getNumStates());
|
||||
double smoothing_factor;
|
||||
switch (smoothing) {
|
||||
case Smoothing_t::ORIGINAL:
|
||||
smoothing_factor = 1.0 / n_samples;
|
||||
break;
|
||||
case Smoothing_t::LAPLACE:
|
||||
smoothing_factor = 1.0;
|
||||
break;
|
||||
case Smoothing_t::CESTNIK:
|
||||
smoothing_factor = 1 / numStates;
|
||||
break;
|
||||
default:
|
||||
smoothing_factor = 0.0; // No smoothing
|
||||
}
|
||||
node.second->computeCPT(samples, features, smoothing_factor, weights);
|
||||
semaphore.release();
|
||||
};
|
||||
int i = 0;
|
||||
for (auto& node : nodes) {
|
||||
threads.emplace_back([this, &node, &weights]() {
|
||||
node.second->computeCPT(samples, features, laplaceSmoothing, weights);
|
||||
});
|
||||
semaphore.acquire();
|
||||
threads.emplace_back(worker, std::ref(node), i++);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
@ -207,14 +241,38 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw std::logic_error("You must call fit() before calling predict()");
|
||||
}
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (samples.size(0) != features.size() - 1) {
|
||||
throw std::invalid_argument("(T) Sample size (" + std::to_string(samples.size(0)) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
torch::Tensor result;
|
||||
std::vector<std::thread> threads;
|
||||
std::mutex mtx;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
|
||||
for (int i = 0; i < samples.size(1); ++i) {
|
||||
const torch::Tensor sample = samples.index({ "...", i });
|
||||
auto worker = [&](const torch::Tensor& sample, int i) {
|
||||
std::string threadName = "PredictWorker-" + std::to_string(i);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
auto psample = predict_sample(sample);
|
||||
auto temp = torch::tensor(psample, torch::kFloat64);
|
||||
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
|
||||
result.index_put_({ i, "..." }, temp);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
result.index_put_({ i, "..." }, temp);
|
||||
}
|
||||
semaphore.release();
|
||||
};
|
||||
for (int i = 0; i < samples.size(1); ++i) {
|
||||
semaphore.acquire();
|
||||
const torch::Tensor sample = samples.index({ "...", i });
|
||||
threads.emplace_back(worker, sample, i);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
if (proba)
|
||||
return result;
|
||||
@ -239,18 +297,38 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw std::logic_error("You must call fit() before calling predict()");
|
||||
}
|
||||
std::vector<int> predictions;
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (tsamples.size() != features.size() - 1) {
|
||||
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::vector<int> predictions(tsamples[0].size(), 0);
|
||||
std::vector<int> sample;
|
||||
std::vector<std::thread> threads;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
auto worker = [&](const std::vector<int>& sample, const int row, int& prediction) {
|
||||
std::string threadName = "(V)PWorker-" + std::to_string(row);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
auto classProbabilities = predict_sample(sample);
|
||||
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
|
||||
int predictedClass = distance(classProbabilities.begin(), maxElem);
|
||||
prediction = predictedClass;
|
||||
semaphore.release();
|
||||
};
|
||||
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||
sample.clear();
|
||||
for (int col = 0; col < tsamples.size(); ++col) {
|
||||
sample.push_back(tsamples[col][row]);
|
||||
}
|
||||
std::vector<double> classProbabilities = predict_sample(sample);
|
||||
// Find the class with the maximum posterior probability
|
||||
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
|
||||
int predictedClass = distance(classProbabilities.begin(), maxElem);
|
||||
predictions.push_back(predictedClass);
|
||||
semaphore.acquire();
|
||||
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
@ -261,14 +339,36 @@ namespace bayesnet {
|
||||
if (!fitted) {
|
||||
throw std::logic_error("You must call fit() before calling predict_proba()");
|
||||
}
|
||||
std::vector<std::vector<double>> predictions;
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (tsamples.size() != features.size() - 1) {
|
||||
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::vector<std::vector<double>> predictions(tsamples[0].size(), std::vector<double>(classNumStates, 0.0));
|
||||
std::vector<int> sample;
|
||||
std::vector<std::thread> threads;
|
||||
auto& semaphore = CountingSemaphore::getInstance();
|
||||
auto worker = [&](const std::vector<int>& sample, int row, std::vector<double>& predictions) {
|
||||
std::string threadName = "(V)PWorker-" + std::to_string(row);
|
||||
#if defined(__linux__)
|
||||
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||
#else
|
||||
pthread_setname_np(threadName.c_str());
|
||||
#endif
|
||||
std::vector<double> classProbabilities = predict_sample(sample);
|
||||
predictions = classProbabilities;
|
||||
semaphore.release();
|
||||
};
|
||||
for (int row = 0; row < tsamples[0].size(); ++row) {
|
||||
sample.clear();
|
||||
for (int col = 0; col < tsamples.size(); ++col) {
|
||||
sample.push_back(tsamples[col][row]);
|
||||
}
|
||||
predictions.push_back(predict_sample(sample));
|
||||
semaphore.acquire();
|
||||
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
return predictions;
|
||||
}
|
||||
@ -286,11 +386,6 @@ namespace bayesnet {
|
||||
// Return 1xn std::vector of probabilities
|
||||
std::vector<double> Network::predict_sample(const std::vector<int>& sample)
|
||||
{
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (sample.size() != features.size() - 1) {
|
||||
throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::map<std::string, int> evidence;
|
||||
for (int i = 0; i < sample.size(); ++i) {
|
||||
evidence[features[i]] = sample[i];
|
||||
@ -300,44 +395,26 @@ namespace bayesnet {
|
||||
// Return 1xn std::vector of probabilities
|
||||
std::vector<double> Network::predict_sample(const torch::Tensor& sample)
|
||||
{
|
||||
// Ensure the sample size is equal to the number of features
|
||||
if (sample.size(0) != features.size() - 1) {
|
||||
throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
|
||||
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
|
||||
}
|
||||
std::map<std::string, int> evidence;
|
||||
for (int i = 0; i < sample.size(0); ++i) {
|
||||
evidence[features[i]] = sample[i].item<int>();
|
||||
}
|
||||
return exactInference(evidence);
|
||||
}
|
||||
double Network::computeFactor(std::map<std::string, int>& completeEvidence)
|
||||
{
|
||||
double result = 1.0;
|
||||
for (auto& node : getNodes()) {
|
||||
result *= node.second->getFactorValue(completeEvidence);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
|
||||
{
|
||||
std::vector<double> result(classNumStates, 0.0);
|
||||
std::vector<std::thread> threads;
|
||||
std::mutex mtx;
|
||||
auto completeEvidence = std::map<std::string, int>(evidence);
|
||||
for (int i = 0; i < classNumStates; ++i) {
|
||||
threads.emplace_back([this, &result, &evidence, i, &mtx]() {
|
||||
auto completeEvidence = std::map<std::string, int>(evidence);
|
||||
completeEvidence[getClassName()] = i;
|
||||
double factor = computeFactor(completeEvidence);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
result[i] = factor;
|
||||
});
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
completeEvidence[getClassName()] = i;
|
||||
double partial = 1.0;
|
||||
for (auto& node : getNodes()) {
|
||||
partial *= node.second->getFactorValue(completeEvidence);
|
||||
}
|
||||
result[i] = partial;
|
||||
}
|
||||
// Normalize result
|
||||
double sum = accumulate(result.begin(), result.end(), 0.0);
|
||||
double sum = std::accumulate(result.begin(), result.end(), 0.0);
|
||||
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
|
||||
return result;
|
||||
}
|
||||
@ -410,11 +487,7 @@ namespace bayesnet {
|
||||
result.insert(it2, fatherName);
|
||||
ending = false;
|
||||
}
|
||||
} else {
|
||||
throw std::logic_error("Error in topological sort because of node " + feature + " is not in result");
|
||||
}
|
||||
} else {
|
||||
throw std::logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,14 +12,18 @@
|
||||
#include "Node.h"
|
||||
|
||||
namespace bayesnet {
|
||||
enum class Smoothing_t {
|
||||
NONE = -1,
|
||||
ORIGINAL = 0,
|
||||
LAPLACE,
|
||||
CESTNIK
|
||||
};
|
||||
class Network {
|
||||
public:
|
||||
Network();
|
||||
explicit Network(float);
|
||||
explicit Network(const Network&);
|
||||
~Network() = default;
|
||||
torch::Tensor& getSamples();
|
||||
float getMaxThreads() const;
|
||||
void addNode(const std::string&);
|
||||
void addEdge(const std::string&, const std::string&);
|
||||
std::map<std::string, std::unique_ptr<Node>>& getNodes();
|
||||
@ -32,9 +36,9 @@ namespace bayesnet {
|
||||
/*
|
||||
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
|
||||
*/
|
||||
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||
std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
|
||||
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
|
||||
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
|
||||
@ -50,19 +54,16 @@ namespace bayesnet {
|
||||
private:
|
||||
std::map<std::string, std::unique_ptr<Node>> nodes;
|
||||
bool fitted;
|
||||
float maxThreads = 0.95;
|
||||
int classNumStates;
|
||||
std::vector<std::string> features; // Including classname
|
||||
std::string className;
|
||||
double laplaceSmoothing;
|
||||
torch::Tensor samples; // n+1xm tensor used to fit the model
|
||||
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
|
||||
std::vector<double> predict_sample(const std::vector<int>&);
|
||||
std::vector<double> predict_sample(const torch::Tensor&);
|
||||
std::vector<double> exactInference(std::map<std::string, int>&);
|
||||
double computeFactor(std::map<std::string, int>&);
|
||||
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
|
||||
void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void setStates(const std::map<std::string, std::vector<int>>&);
|
||||
};
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
namespace bayesnet {
|
||||
|
||||
Node::Node(const std::string& name)
|
||||
: name(name), numStates(0), cpTable(torch::Tensor()), parents(std::vector<Node*>()), children(std::vector<Node*>())
|
||||
: name(name)
|
||||
{
|
||||
}
|
||||
void Node::clear()
|
||||
@ -90,52 +90,54 @@ namespace bayesnet {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
|
||||
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
|
||||
{
|
||||
dimensions.clear();
|
||||
// Get dimensions of the CPT
|
||||
dimensions.push_back(numStates);
|
||||
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
|
||||
|
||||
// Create a tensor of zeros with the dimensions of the CPT
|
||||
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
|
||||
cpTable = torch::zeros(dimensions, torch::kDouble) + smoothing;
|
||||
// Fill table with counts
|
||||
auto pos = find(features.begin(), features.end(), name);
|
||||
if (pos == features.end()) {
|
||||
throw std::logic_error("Feature " + name + " not found in dataset");
|
||||
}
|
||||
int name_index = pos - features.begin();
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
coordinates.push_back(dataset.index({ name_index, n_sample }));
|
||||
coordinates.clear();
|
||||
auto sample = dataset.index({ "...", n_sample });
|
||||
coordinates.push_back(sample[name_index]);
|
||||
for (auto parent : parents) {
|
||||
pos = find(features.begin(), features.end(), parent->getName());
|
||||
if (pos == features.end()) {
|
||||
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
|
||||
}
|
||||
int parent_index = pos - features.begin();
|
||||
coordinates.push_back(dataset.index({ parent_index, n_sample }));
|
||||
coordinates.push_back(sample[parent_index]);
|
||||
}
|
||||
// Increment the count of the corresponding coordinate
|
||||
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>());
|
||||
cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
|
||||
}
|
||||
// Normalize the counts
|
||||
// Divide each row by the sum of the row
|
||||
cpTable = cpTable / cpTable.sum(0);
|
||||
}
|
||||
float Node::getFactorValue(std::map<std::string, int>& evidence)
|
||||
double Node::getFactorValue(std::map<std::string, int>& evidence)
|
||||
{
|
||||
c10::List<c10::optional<at::Tensor>> coordinates;
|
||||
// following predetermined order of indices in the cpTable (see Node.h)
|
||||
coordinates.push_back(at::tensor(evidence[name]));
|
||||
transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
|
||||
return cpTable.index({ coordinates }).item<float>();
|
||||
return cpTable.index({ coordinates }).item<double>();
|
||||
}
|
||||
std::vector<std::string> Node::graph(const std::string& className)
|
||||
{
|
||||
auto output = std::vector<std::string>();
|
||||
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
|
||||
output.push_back(name + " [shape=circle" + suffix + "] \n");
|
||||
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); });
|
||||
output.push_back("\"" + name + "\" [shape=circle" + suffix + "] \n");
|
||||
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return "\"" + name + "\" -> \"" + child->getName() + "\""; });
|
||||
return output;
|
||||
}
|
||||
}
|
@ -12,14 +12,6 @@
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
class Node {
|
||||
private:
|
||||
std::string name;
|
||||
std::vector<Node*> parents;
|
||||
std::vector<Node*> children;
|
||||
int numStates; // number of states of the variable
|
||||
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
|
||||
std::vector<int64_t> dimensions; // dimensions of the cpTable
|
||||
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
|
||||
public:
|
||||
explicit Node(const std::string&);
|
||||
void clear();
|
||||
@ -31,12 +23,20 @@ namespace bayesnet {
|
||||
std::vector<Node*>& getParents();
|
||||
std::vector<Node*>& getChildren();
|
||||
torch::Tensor& getCPT();
|
||||
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
|
||||
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights);
|
||||
int getNumStates() const;
|
||||
void setNumStates(int);
|
||||
unsigned minFill();
|
||||
std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
|
||||
float getFactorValue(std::map<std::string, int>&);
|
||||
double getFactorValue(std::map<std::string, int>&);
|
||||
private:
|
||||
std::string name;
|
||||
std::vector<Node*> parents;
|
||||
std::vector<Node*> children;
|
||||
int numStates = 0; // number of states of the variable
|
||||
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
|
||||
std::vector<int64_t> dimensions; // dimensions of the cpTable
|
||||
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
|
||||
};
|
||||
}
|
||||
#endif
|
@ -4,29 +4,79 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <tuple>
|
||||
#include "Mst.h"
|
||||
#include "BayesMetrics.h"
|
||||
namespace bayesnet {
|
||||
//samples is n+1xm tensor used to fit the model
|
||||
Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
|
||||
: samples(samples)
|
||||
, features(features)
|
||||
, className(className)
|
||||
, features(features)
|
||||
, classNumStates(classNumStates)
|
||||
{
|
||||
}
|
||||
//samples is n+1xm std::vector used to fit the model
|
||||
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
|
||||
: features(features)
|
||||
: samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
|
||||
, className(className)
|
||||
, features(features)
|
||||
, classNumStates(classNumStates)
|
||||
, samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
|
||||
{
|
||||
for (int i = 0; i < vsamples.size(); ++i) {
|
||||
samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32));
|
||||
}
|
||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||
}
|
||||
std::vector<std::pair<int, int>> Metrics::SelectKPairs(const torch::Tensor& weights, std::vector<int>& featuresExcluded, bool ascending, unsigned k)
|
||||
{
|
||||
// Return the K Best features
|
||||
auto n = features.size();
|
||||
// compute scores
|
||||
scoresKPairs.clear();
|
||||
pairsKBest.clear();
|
||||
auto labels = samples.index({ -1, "..." });
|
||||
for (int i = 0; i < n - 1; ++i) {
|
||||
if (std::find(featuresExcluded.begin(), featuresExcluded.end(), i) != featuresExcluded.end()) {
|
||||
continue;
|
||||
}
|
||||
for (int j = i + 1; j < n; ++j) {
|
||||
if (std::find(featuresExcluded.begin(), featuresExcluded.end(), j) != featuresExcluded.end()) {
|
||||
continue;
|
||||
}
|
||||
auto key = std::make_pair(i, j);
|
||||
auto value = conditionalMutualInformation(samples.index({ i, "..." }), samples.index({ j, "..." }), labels, weights);
|
||||
scoresKPairs.push_back({ key, value });
|
||||
}
|
||||
}
|
||||
// sort scores
|
||||
if (ascending) {
|
||||
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
|
||||
{ return a.second < b.second; });
|
||||
|
||||
} else {
|
||||
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
|
||||
{ return a.second > b.second; });
|
||||
}
|
||||
for (auto& [pairs, score] : scoresKPairs) {
|
||||
pairsKBest.push_back(pairs);
|
||||
}
|
||||
if (k != 0 && k < pairsKBest.size()) {
|
||||
if (ascending) {
|
||||
int limit = pairsKBest.size() - k;
|
||||
for (int i = 0; i < limit; i++) {
|
||||
pairsKBest.erase(pairsKBest.begin());
|
||||
scoresKPairs.erase(scoresKPairs.begin());
|
||||
}
|
||||
} else {
|
||||
pairsKBest.resize(k);
|
||||
scoresKPairs.resize(k);
|
||||
}
|
||||
}
|
||||
return pairsKBest;
|
||||
}
|
||||
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
|
||||
{
|
||||
// Return the K Best features
|
||||
@ -66,7 +116,10 @@ namespace bayesnet {
|
||||
{
|
||||
return scoresKBest;
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::pair<int, int>, double>> Metrics::getScoresKPairs() const
|
||||
{
|
||||
return scoresKPairs;
|
||||
}
|
||||
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
|
||||
{
|
||||
auto result = std::vector<double>();
|
||||
@ -105,14 +158,8 @@ namespace bayesnet {
|
||||
}
|
||||
return matrix;
|
||||
}
|
||||
// To use in Python
|
||||
std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_)
|
||||
{
|
||||
const torch::Tensor weights = torch::tensor(weights_);
|
||||
auto matrix = conditionalEdge(weights);
|
||||
std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
|
||||
return v;
|
||||
}
|
||||
// Measured in nats (natural logarithm (log) base e)
|
||||
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
|
||||
double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
|
||||
{
|
||||
torch::Tensor counts = feature.bincount(weights);
|
||||
@ -151,10 +198,54 @@ namespace bayesnet {
|
||||
}
|
||||
return entropyValue;
|
||||
}
|
||||
// I(X;Y) = H(Y) - H(Y|X)
|
||||
// H(X|Y,C) = sum_{y in Y, c in C} p(x,c) H(X|Y=y,C=c)
|
||||
double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
|
||||
{
|
||||
// Ensure the tensors are of the same length
|
||||
assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0));
|
||||
// Convert tensors to vectors for easier processing
|
||||
auto firstFeatureData = firstFeature.accessor<int, 1>();
|
||||
auto secondFeatureData = secondFeature.accessor<int, 1>();
|
||||
auto labelsData = labels.accessor<int, 1>();
|
||||
auto weightsData = weights.accessor<double, 1>();
|
||||
int numSamples = firstFeature.size(0);
|
||||
// Maps for joint and marginal probabilities
|
||||
std::map<std::tuple<int, int, int>, double> jointCount;
|
||||
std::map<std::tuple<int, int>, double> marginalCount;
|
||||
// Compute joint and marginal counts
|
||||
for (int i = 0; i < numSamples; ++i) {
|
||||
auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]);
|
||||
auto keyMarginal = std::make_tuple(firstFeatureData[i], labelsData[i]);
|
||||
|
||||
jointCount[keyJoint] += weightsData[i];
|
||||
marginalCount[keyMarginal] += weightsData[i];
|
||||
}
|
||||
// Total weight sum
|
||||
double totalWeight = torch::sum(weights).item<double>();
|
||||
if (totalWeight == 0)
|
||||
return 0;
|
||||
// Compute the conditional entropy
|
||||
double conditionalEntropy = 0.0;
|
||||
for (const auto& [keyJoint, jointFreq] : jointCount) {
|
||||
auto [x, c, y] = keyJoint;
|
||||
auto keyMarginal = std::make_tuple(x, c);
|
||||
//double p_xc = marginalCount[keyMarginal] / totalWeight;
|
||||
double p_y_given_xc = jointFreq / marginalCount[keyMarginal];
|
||||
if (p_y_given_xc > 0) {
|
||||
conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
|
||||
}
|
||||
}
|
||||
return conditionalEntropy;
|
||||
}
|
||||
// I(X;Y) = H(Y) - H(Y|X) ; I(X;Y) >= 0
|
||||
double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
|
||||
{
|
||||
return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
|
||||
return std::max(entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights), 0.0);
|
||||
}
|
||||
// I(X;Y|C) = H(X|C) - H(X|Y,C) >= 0
|
||||
double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
|
||||
{
|
||||
return std::max(conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights), 0.0);
|
||||
}
|
||||
/*
|
||||
Compute the maximum spanning tree considering the weights as distances
|
||||
|
@ -16,21 +16,26 @@ namespace bayesnet {
|
||||
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
|
||||
std::vector<std::pair<int, int>> SelectKPairs(const torch::Tensor& weights, std::vector<int>& featuresExcluded, bool ascending = false, unsigned k = 0);
|
||||
std::vector<double> getScoresKBest() const;
|
||||
std::vector<std::pair<std::pair<int, int>, double>> getScoresKPairs() const;
|
||||
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
|
||||
double conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
|
||||
torch::Tensor conditionalEdge(const torch::Tensor& weights);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
// Measured in nats (natural logarithm (log) base e)
|
||||
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
|
||||
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
|
||||
protected:
|
||||
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
|
||||
std::string className;
|
||||
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
|
||||
std::vector<std::string> features;
|
||||
template <class T>
|
||||
std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
|
||||
{
|
||||
std::vector<std::pair<T, T>> result;
|
||||
for (int i = 0; i < source.size(); ++i) {
|
||||
for (int i = 0; i < source.size() - 1; ++i) {
|
||||
T temp = source[i];
|
||||
for (int j = i + 1; j < source.size(); ++j) {
|
||||
result.push_back({ temp, source[j] });
|
||||
@ -49,6 +54,8 @@ namespace bayesnet {
|
||||
int classNumStates = 0;
|
||||
std::vector<double> scoresKBest;
|
||||
std::vector<int> featuresKBest; // sorted indices of the features
|
||||
std::vector<std::pair<int, int>> pairsKBest; // sorted indices of the pairs
|
||||
std::vector<std::pair<std::pair<int, int>, double>> scoresKPairs;
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
};
|
||||
}
|
||||
|
46
bayesnet/utils/CountingSemaphore.h
Normal file
46
bayesnet/utils/CountingSemaphore.h
Normal file
@ -0,0 +1,46 @@
|
||||
#ifndef COUNTING_SEMAPHORE_H
|
||||
#define COUNTING_SEMAPHORE_H
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <algorithm>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <thread>
|
||||
|
||||
class CountingSemaphore {
|
||||
public:
|
||||
static CountingSemaphore& getInstance()
|
||||
{
|
||||
static CountingSemaphore instance;
|
||||
return instance;
|
||||
}
|
||||
// Delete copy constructor and assignment operator
|
||||
CountingSemaphore(const CountingSemaphore&) = delete;
|
||||
CountingSemaphore& operator=(const CountingSemaphore&) = delete;
|
||||
void acquire()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
cv_.wait(lock, [this]() { return count_ > 0; });
|
||||
--count_;
|
||||
}
|
||||
void release()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
++count_;
|
||||
if (count_ <= max_count_) {
|
||||
cv_.notify_one();
|
||||
}
|
||||
}
|
||||
private:
|
||||
CountingSemaphore()
|
||||
: max_count_(std::max(1u, static_cast<uint>(0.95 * std::thread::hardware_concurrency()))),
|
||||
count_(max_count_)
|
||||
{
|
||||
}
|
||||
std::mutex mtx_;
|
||||
std::condition_variable cv_;
|
||||
const uint max_count_;
|
||||
uint count_;
|
||||
};
|
||||
#endif
|
@ -53,14 +53,14 @@ namespace bayesnet {
|
||||
}
|
||||
}
|
||||
|
||||
void insertElement(std::list<int>& variables, int variable)
|
||||
void MST::insertElement(std::list<int>& variables, int variable)
|
||||
{
|
||||
if (std::find(variables.begin(), variables.end(), variable) == variables.end()) {
|
||||
variables.push_front(variable);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
|
||||
std::vector<std::pair<int, int>> MST::reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
|
||||
{
|
||||
// Create the edges of a DAG from the MST
|
||||
// replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted
|
||||
|
@ -14,6 +14,8 @@ namespace bayesnet {
|
||||
public:
|
||||
MST() = default;
|
||||
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
void insertElement(std::list<int>& variables, int variable);
|
||||
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree();
|
||||
private:
|
||||
torch::Tensor weights;
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -137,7 +137,7 @@
|
||||
|
||||
include(CMakeParseArguments)
|
||||
|
||||
option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE)
|
||||
option(CODE_COVERAGE_VERBOSE "Verbose information" TRUE)
|
||||
|
||||
# Check prereqs
|
||||
find_program( GCOV_PATH gcov )
|
||||
@ -160,7 +160,11 @@ foreach(LANG ${LANGUAGES})
|
||||
endif()
|
||||
elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
|
||||
AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
|
||||
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
|
||||
if ("${LANG}" MATCHES "CUDA")
|
||||
message(STATUS "Ignoring CUDA")
|
||||
else()
|
||||
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
|
Binary file not shown.
580
diagrams/BayesNet.puml
Normal file
580
diagrams/BayesNet.puml
Normal file
@ -0,0 +1,580 @@
|
||||
@startuml
|
||||
title clang-uml class diagram model
|
||||
class "bayesnet::Node" as C_0010428199432536647474
|
||||
class C_0010428199432536647474 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Node(const std::string &) : void
|
||||
..
|
||||
+addChild(Node *) : void
|
||||
+addParent(Node *) : void
|
||||
+clear() : void
|
||||
+computeCPT(const torch::Tensor & dataset, const std::vector<std::string> & features, const double smoothing, const torch::Tensor & weights) : void
|
||||
+getCPT() : torch::Tensor &
|
||||
+getChildren() : std::vector<Node *> &
|
||||
+getFactorValue(std::map<std::string,int> &) : double
|
||||
+getName() const : std::string
|
||||
+getNumStates() const : int
|
||||
+getParents() : std::vector<Node *> &
|
||||
+graph(const std::string & clasName) : std::vector<std::string>
|
||||
+minFill() : unsigned int
|
||||
+removeChild(Node *) : void
|
||||
+removeParent(Node *) : void
|
||||
+setNumStates(int) : void
|
||||
__
|
||||
}
|
||||
enum "bayesnet::Smoothing_t" as C_0013393078277439680282
|
||||
enum C_0013393078277439680282 {
|
||||
NONE
|
||||
ORIGINAL
|
||||
LAPLACE
|
||||
CESTNIK
|
||||
}
|
||||
class "bayesnet::Network" as C_0009493661199123436603
|
||||
class C_0009493661199123436603 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Network() : void
|
||||
+Network(const Network &) : void
|
||||
+~Network() = default : void
|
||||
..
|
||||
+addEdge(const std::string &, const std::string &) : void
|
||||
+addNode(const std::string &) : void
|
||||
+dump_cpt() const : std::string
|
||||
+fit(const torch::Tensor & samples, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
|
||||
+fit(const torch::Tensor & X, const torch::Tensor & y, const torch::Tensor & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
|
||||
+fit(const std::vector<std::vector<int>> & input_data, const std::vector<int> & labels, const std::vector<double> & weights, const std::vector<std::string> & featureNames, const std::string & className, const std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : void
|
||||
+getClassName() const : std::string
|
||||
+getClassNumStates() const : int
|
||||
+getEdges() const : std::vector<std::pair<std::string,std::string>>
|
||||
+getFeatures() const : std::vector<std::string>
|
||||
+getNodes() : std::map<std::string,std::unique_ptr<Node>> &
|
||||
+getNumEdges() const : int
|
||||
+getSamples() : torch::Tensor &
|
||||
+getStates() const : int
|
||||
+graph(const std::string & title) const : std::vector<std::string>
|
||||
+initialize() : void
|
||||
+predict(const std::vector<std::vector<int>> &) : std::vector<int>
|
||||
+predict(const torch::Tensor &) : torch::Tensor
|
||||
+predict_proba(const std::vector<std::vector<int>> &) : std::vector<std::vector<double>>
|
||||
+predict_proba(const torch::Tensor &) : torch::Tensor
|
||||
+predict_tensor(const torch::Tensor & samples, const bool proba) : torch::Tensor
|
||||
+score(const std::vector<std::vector<int>> &, const std::vector<int> &) : double
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_sort() : std::vector<std::string>
|
||||
+version() : std::string
|
||||
__
|
||||
}
|
||||
enum "bayesnet::status_t" as C_0005907365846270811004
|
||||
enum C_0005907365846270811004 {
|
||||
NORMAL
|
||||
WARNING
|
||||
ERROR
|
||||
}
|
||||
abstract "bayesnet::BaseClassifier" as C_0002617087915615796317
|
||||
abstract C_0002617087915615796317 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+~BaseClassifier() = default : void
|
||||
..
|
||||
{abstract} +dump_cpt() const = 0 : std::string
|
||||
{abstract} +fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) = 0 : BaseClassifier &
|
||||
{abstract} +getClassNumStates() const = 0 : int
|
||||
{abstract} +getNotes() const = 0 : std::vector<std::string>
|
||||
{abstract} +getNumberOfEdges() const = 0 : int
|
||||
{abstract} +getNumberOfNodes() const = 0 : int
|
||||
{abstract} +getNumberOfStates() const = 0 : int
|
||||
{abstract} +getStatus() const = 0 : status_t
|
||||
+getValidHyperparameters() : std::vector<std::string> &
|
||||
{abstract} +getVersion() = 0 : std::string
|
||||
{abstract} +graph(const std::string & title = "") const = 0 : std::vector<std::string>
|
||||
{abstract} +predict(std::vector<std::vector<int>> & X) = 0 : std::vector<int>
|
||||
{abstract} +predict(torch::Tensor & X) = 0 : torch::Tensor
|
||||
{abstract} +predict_proba(std::vector<std::vector<int>> & X) = 0 : std::vector<std::vector<double>>
|
||||
{abstract} +predict_proba(torch::Tensor & X) = 0 : torch::Tensor
|
||||
{abstract} +score(std::vector<std::vector<int>> & X, std::vector<int> & y) = 0 : float
|
||||
{abstract} +score(torch::Tensor & X, torch::Tensor & y) = 0 : float
|
||||
{abstract} +setHyperparameters(const nlohmann::json & hyperparameters) = 0 : void
|
||||
{abstract} +show() const = 0 : std::vector<std::string>
|
||||
{abstract} +topological_order() = 0 : std::vector<std::string>
|
||||
{abstract} #trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) = 0 : void
|
||||
__
|
||||
#validHyperparameters : std::vector<std::string>
|
||||
}
|
||||
class "bayesnet::Metrics" as C_0005895723015084986588
|
||||
class C_0005895723015084986588 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Metrics() = default : void
|
||||
+Metrics(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
+Metrics(const std::vector<std::vector<int>> & vsamples, const std::vector<int> & labels, const std::vector<std::string> & features, const std::string & className, const int classNumStates) : void
|
||||
..
|
||||
+SelectKBestWeighted(const torch::Tensor & weights, bool ascending = false, unsigned int k = 0) : std::vector<int>
|
||||
+SelectKPairs(const torch::Tensor & weights, std::vector<int> & featuresExcluded, bool ascending = false, unsigned int k = 0) : std::vector<std::pair<int,int>>
|
||||
+conditionalEdge(const torch::Tensor & weights) : torch::Tensor
|
||||
+conditionalEntropy(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & labels, const torch::Tensor & weights) : double
|
||||
+conditionalMutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & labels, const torch::Tensor & weights) : double
|
||||
#doCombinations<T>(const std::vector<T> & source) : std::vector<std::pair<T, T> >
|
||||
+entropy(const torch::Tensor & feature, const torch::Tensor & weights) : double
|
||||
+getScoresKBest() const : std::vector<double>
|
||||
+getScoresKPairs() const : std::vector<std::pair<std::pair<int,int>,double>>
|
||||
+maximumSpanningTree(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : std::vector<std::pair<int,int>>
|
||||
+mutualInformation(const torch::Tensor & firstFeature, const torch::Tensor & secondFeature, const torch::Tensor & weights) : double
|
||||
#pop_first<T>(std::vector<T> & v) : T
|
||||
__
|
||||
#className : std::string
|
||||
#features : std::vector<std::string>
|
||||
#samples : torch::Tensor
|
||||
}
|
||||
abstract "bayesnet::Classifier" as C_0016351972983202413152
|
||||
abstract C_0016351972983202413152 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Classifier(Network model) : void
|
||||
+~Classifier() = default : void
|
||||
..
|
||||
+addNodes() : void
|
||||
#buildDataset(torch::Tensor & y) : void
|
||||
{abstract} #buildModel(const torch::Tensor & weights) = 0 : void
|
||||
#checkFitParameters() : void
|
||||
+dump_cpt() const : std::string
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
|
||||
+fit(std::vector<std::vector<int>> & X, std::vector<int> & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : Classifier &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const torch::Tensor & weights, const Smoothing_t smoothing) : Classifier &
|
||||
+getClassNumStates() const : int
|
||||
+getNotes() const : std::vector<std::string>
|
||||
+getNumberOfEdges() const : int
|
||||
+getNumberOfNodes() const : int
|
||||
+getNumberOfStates() const : int
|
||||
+getStatus() const : status_t
|
||||
+getVersion() : std::string
|
||||
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||
+predict_proba(torch::Tensor & X) : torch::Tensor
|
||||
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_order() : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
#className : std::string
|
||||
#dataset : torch::Tensor
|
||||
#features : std::vector<std::string>
|
||||
#fitted : bool
|
||||
#m : unsigned int
|
||||
#metrics : Metrics
|
||||
#model : Network
|
||||
#n : unsigned int
|
||||
#notes : std::vector<std::string>
|
||||
#states : std::map<std::string,std::vector<int>>
|
||||
#status : status_t
|
||||
}
|
||||
class "bayesnet::KDB" as C_0008902920152122000044
|
||||
class C_0008902920152122000044 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDB(int k, float theta = 0.03) : void
|
||||
+~KDB() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODE" as C_0004096182510460307610
|
||||
class C_0004096182510460307610 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODE(int root) : void
|
||||
+~SPODE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPODE") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPnDE" as C_0016268916386101512883
|
||||
class C_0016268916386101512883 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPnDE(std::vector<int> parents) : void
|
||||
+~SPnDE() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "SPnDE") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::TAN" as C_0014087955399074584137
|
||||
class C_0014087955399074584137 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TAN() : void
|
||||
+~TAN() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & name = "TAN") const : std::vector<std::string>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Proposal" as C_0017759964713298103839
|
||||
class C_0017759964713298103839 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Proposal(torch::Tensor & pDataset, std::vector<std::string> & features_, std::string & className_) : void
|
||||
+~Proposal() : void
|
||||
..
|
||||
#checkInput(const torch::Tensor & X, const torch::Tensor & y) : void
|
||||
#fit_local_discretization(const torch::Tensor & y) : std::map<std::string,std::vector<int>>
|
||||
#localDiscretizationProposal(const std::map<std::string,std::vector<int>> & states, Network & model) : std::map<std::string,std::vector<int>>
|
||||
#prepareX(torch::Tensor & X) : torch::Tensor
|
||||
__
|
||||
#Xf : torch::Tensor
|
||||
#discretizers : map<std::string,mdlp::CPPFImdlp *>
|
||||
#y : torch::Tensor
|
||||
}
|
||||
class "bayesnet::KDBLd" as C_0002756018222998454702
|
||||
class C_0002756018222998454702 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+KDBLd(int k) : void
|
||||
+~KDBLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : KDBLd &
|
||||
+graph(const std::string & name = "KDB") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::SPODELd" as C_0010957245114062042836
|
||||
class C_0010957245114062042836 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+SPODELd(int root) : void
|
||||
+~SPODELd() = default : void
|
||||
..
|
||||
+commonFit(const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+fit(torch::Tensor & dataset, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : SPODELd &
|
||||
+graph(const std::string & name = "SPODELd") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
{static} +version() : std::string
|
||||
__
|
||||
}
|
||||
class "bayesnet::TANLd" as C_0013350632773616302678
|
||||
class C_0013350632773616302678 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+TANLd() : void
|
||||
+~TANLd() = default : void
|
||||
..
|
||||
+fit(torch::Tensor & X, torch::Tensor & y, const std::vector<std::string> & features, const std::string & className, std::map<std::string,std::vector<int>> & states, const Smoothing_t smoothing) : TANLd &
|
||||
+graph(const std::string & name = "TANLd") const : std::vector<std::string>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
__
|
||||
}
|
||||
class "bayesnet::Ensemble" as C_0015881931090842884611
|
||||
class C_0015881931090842884611 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Ensemble(bool predict_voting = true) : void
|
||||
+~Ensemble() = default : void
|
||||
..
|
||||
#compute_arg_max(std::vector<std::vector<double>> & X) : std::vector<int>
|
||||
#compute_arg_max(torch::Tensor & X) : torch::Tensor
|
||||
+dump_cpt() const : std::string
|
||||
+getNumberOfEdges() const : int
|
||||
+getNumberOfNodes() const : int
|
||||
+getNumberOfStates() const : int
|
||||
+graph(const std::string & title) const : std::vector<std::string>
|
||||
+predict(std::vector<std::vector<int>> & X) : std::vector<int>
|
||||
+predict(torch::Tensor & X) : torch::Tensor
|
||||
#predict_average_proba(torch::Tensor & X) : torch::Tensor
|
||||
#predict_average_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||
#predict_average_voting(torch::Tensor & X) : torch::Tensor
|
||||
#predict_average_voting(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||
+predict_proba(std::vector<std::vector<int>> & X) : std::vector<std::vector<double>>
|
||||
+predict_proba(torch::Tensor & X) : torch::Tensor
|
||||
+score(std::vector<std::vector<int>> & X, std::vector<int> & y) : float
|
||||
+score(torch::Tensor & X, torch::Tensor & y) : float
|
||||
+show() const : std::vector<std::string>
|
||||
+topological_order() : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
#voting(torch::Tensor & votes) : torch::Tensor
|
||||
__
|
||||
#models : std::vector<std::unique_ptr<Classifier>>
|
||||
#n_models : unsigned int
|
||||
#predict_voting : bool
|
||||
#significanceModels : std::vector<double>
|
||||
}
|
||||
class "bayesnet::A2DE" as C_0001410789567057647859
|
||||
class C_0001410789567057647859 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+A2DE(bool predict_voting = false) : void
|
||||
+~A2DE() : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "A2DE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::AODE" as C_0006288892608974306258
|
||||
class C_0006288892608974306258 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODE(bool predict_voting = false) : void
|
||||
+~AODE() : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+graph(const std::string & title = "AODE") const : std::vector<std::string>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters) : void
|
||||
__
|
||||
}
|
||||
abstract "bayesnet::FeatureSelect" as C_0013562609546004646591
|
||||
abstract C_0013562609546004646591 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FeatureSelect(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~FeatureSelect() : void
|
||||
..
|
||||
#computeMeritCFS() : double
|
||||
#computeSuFeatures(const int a, const int b) : double
|
||||
#computeSuLabels() : void
|
||||
{abstract} +fit() = 0 : void
|
||||
+getFeatures() const : std::vector<int>
|
||||
+getScores() const : std::vector<double>
|
||||
#initialize() : void
|
||||
#symmetricalUncertainty(int a, int b) : double
|
||||
__
|
||||
#fitted : bool
|
||||
#maxFeatures : int
|
||||
#selectedFeatures : std::vector<int>
|
||||
#selectedScores : std::vector<double>
|
||||
#suFeatures : std::map<std::pair<int,int>,double>
|
||||
#suLabels : std::vector<double>
|
||||
#weights : const torch::Tensor &
|
||||
}
|
||||
class "bayesnet::(anonymous_60342586)" as C_0005584545181746538542
|
||||
class C_0005584545181746538542 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60343240)" as C_0016227156982041949444
|
||||
class C_0016227156982041949444 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::Boost" as C_0009819322948617116148
|
||||
class C_0009819322948617116148 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Boost(bool predict_voting = false) : void
|
||||
+~Boost() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
#featureSelection(torch::Tensor & weights_) : std::vector<int>
|
||||
+setHyperparameters(const nlohmann::json & hyperparameters_) : void
|
||||
#update_weights(torch::Tensor & ytrain, torch::Tensor & ypred, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
|
||||
#update_weights_block(int k, torch::Tensor & ytrain, torch::Tensor & weights) : std::tuple<torch::Tensor &,double,bool>
|
||||
__
|
||||
#X_test : torch::Tensor
|
||||
#X_train : torch::Tensor
|
||||
#bisection : bool
|
||||
#block_update : bool
|
||||
#convergence : bool
|
||||
#convergence_best : bool
|
||||
#featureSelector : FeatureSelect *
|
||||
#maxTolerance : int
|
||||
#order_algorithm : std::string
|
||||
#selectFeatures : bool
|
||||
#select_features_algorithm : std::string
|
||||
#threshold : double
|
||||
#y_test : torch::Tensor
|
||||
#y_train : torch::Tensor
|
||||
}
|
||||
class "bayesnet::AODELd" as C_0003898187834670349177
|
||||
class C_0003898187834670349177 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+AODELd(bool predict_voting = true) : void
|
||||
+~AODELd() = default : void
|
||||
..
|
||||
#buildModel(const torch::Tensor & weights) : void
|
||||
+fit(torch::Tensor & X_, torch::Tensor & y_, const std::vector<std::string> & features_, const std::string & className_, std::map<std::string,std::vector<int>> & states_, const Smoothing_t smoothing) : AODELd &
|
||||
+graph(const std::string & name = "AODELd") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60275628)" as C_0009086919615463763584
|
||||
class C_0009086919615463763584 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60276282)" as C_0015251985607563196159
|
||||
class C_0015251985607563196159 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostA2DE" as C_0000272055465257861326
|
||||
class C_0000272055465257861326 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+BoostA2DE(bool predict_voting = false) : void
|
||||
+~BoostA2DE() = default : void
|
||||
..
|
||||
+graph(const std::string & title = "BoostA2DE") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60275502)" as C_0016033655851510053155
|
||||
class C_0016033655851510053155 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60276156)" as C_0000379522761622473555
|
||||
class C_0000379522761622473555 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::BoostAODE" as C_0002867772739198819061
|
||||
class C_0002867772739198819061 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+BoostAODE(bool predict_voting = false) : void
|
||||
+~BoostAODE() = default : void
|
||||
..
|
||||
+graph(const std::string & title = "BoostAODE") const : std::vector<std::string>
|
||||
#trainModel(const torch::Tensor & weights, const Smoothing_t smoothing) : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::CFS" as C_0000093018845530739957
|
||||
class C_0000093018845530739957 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+CFS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights) : void
|
||||
+~CFS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::FCBF" as C_0001157456122733975432
|
||||
class C_0001157456122733975432 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+FCBF(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~FCBF() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::IWSS" as C_0000066148117395428429
|
||||
class C_0000066148117395428429 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+IWSS(const torch::Tensor & samples, const std::vector<std::string> & features, const std::string & className, const int maxFeatures, const int classNumStates, const torch::Tensor & weights, const double threshold) : void
|
||||
+~IWSS() : void
|
||||
..
|
||||
+fit() : void
|
||||
__
|
||||
}
|
||||
class "bayesnet::(anonymous_60730495)" as C_0004857727320042830573
|
||||
class C_0004857727320042830573 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60731150)" as C_0000076541533312623385
|
||||
class C_0000076541533312623385 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60653004)" as C_0001444063444142949758
|
||||
class C_0001444063444142949758 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60653658)" as C_0007139277546931322856
|
||||
class C_0007139277546931322856 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60731375)" as C_0010493853592456211189
|
||||
class C_0010493853592456211189 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+CFS : std::string
|
||||
+FCBF : std::string
|
||||
+IWSS : std::string
|
||||
}
|
||||
class "bayesnet::(anonymous_60732030)" as C_0007011438637915849564
|
||||
class C_0007011438637915849564 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
__
|
||||
+ASC : std::string
|
||||
+DESC : std::string
|
||||
+RAND : std::string
|
||||
}
|
||||
class "bayesnet::MST" as C_0001054867409378333602
|
||||
class C_0001054867409378333602 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+MST() = default : void
|
||||
+MST(const std::vector<std::string> & features, const torch::Tensor & weights, const int root) : void
|
||||
..
|
||||
+insertElement(std::list<int> & variables, int variable) : void
|
||||
+maximumSpanningTree() : std::vector<std::pair<int,int>>
|
||||
+reorder(std::vector<std::pair<float,std::pair<int,int>>> T, int root_original) : std::vector<std::pair<int,int>>
|
||||
__
|
||||
}
|
||||
class "bayesnet::Graph" as C_0009576333456015187741
|
||||
class C_0009576333456015187741 #aliceblue;line:blue;line.dotted;text:blue {
|
||||
+Graph(int V) : void
|
||||
..
|
||||
+addEdge(int u, int v, float wt) : void
|
||||
+find_set(int i) : int
|
||||
+get_mst() : std::vector<std::pair<float,std::pair<int,int>>>
|
||||
+kruskal_algorithm() : void
|
||||
+union_set(int u, int v) : void
|
||||
__
|
||||
}
|
||||
C_0010428199432536647474 --> C_0010428199432536647474 : -parents
|
||||
C_0010428199432536647474 --> C_0010428199432536647474 : -children
|
||||
C_0009493661199123436603 ..> C_0013393078277439680282
|
||||
C_0009493661199123436603 o-- C_0010428199432536647474 : -nodes
|
||||
C_0002617087915615796317 ..> C_0013393078277439680282
|
||||
C_0002617087915615796317 ..> C_0005907365846270811004
|
||||
C_0016351972983202413152 ..> C_0013393078277439680282
|
||||
C_0016351972983202413152 o-- C_0009493661199123436603 : #model
|
||||
C_0016351972983202413152 o-- C_0005895723015084986588 : #metrics
|
||||
C_0016351972983202413152 o-- C_0005907365846270811004 : #status
|
||||
C_0002617087915615796317 <|-- C_0016351972983202413152
|
||||
|
||||
C_0016351972983202413152 <|-- C_0008902920152122000044
|
||||
|
||||
C_0016351972983202413152 <|-- C_0004096182510460307610
|
||||
|
||||
C_0016351972983202413152 <|-- C_0016268916386101512883
|
||||
|
||||
C_0016351972983202413152 <|-- C_0014087955399074584137
|
||||
|
||||
C_0017759964713298103839 ..> C_0009493661199123436603
|
||||
C_0002756018222998454702 ..> C_0013393078277439680282
|
||||
C_0008902920152122000044 <|-- C_0002756018222998454702
|
||||
|
||||
C_0017759964713298103839 <|-- C_0002756018222998454702
|
||||
|
||||
C_0010957245114062042836 ..> C_0013393078277439680282
|
||||
C_0004096182510460307610 <|-- C_0010957245114062042836
|
||||
|
||||
C_0017759964713298103839 <|-- C_0010957245114062042836
|
||||
|
||||
C_0013350632773616302678 ..> C_0013393078277439680282
|
||||
C_0014087955399074584137 <|-- C_0013350632773616302678
|
||||
|
||||
C_0017759964713298103839 <|-- C_0013350632773616302678
|
||||
|
||||
C_0015881931090842884611 ..> C_0013393078277439680282
|
||||
C_0015881931090842884611 o-- C_0016351972983202413152 : #models
|
||||
C_0016351972983202413152 <|-- C_0015881931090842884611
|
||||
|
||||
C_0015881931090842884611 <|-- C_0001410789567057647859
|
||||
|
||||
C_0015881931090842884611 <|-- C_0006288892608974306258
|
||||
|
||||
C_0005895723015084986588 <|-- C_0013562609546004646591
|
||||
|
||||
C_0009819322948617116148 --> C_0013562609546004646591 : #featureSelector
|
||||
C_0015881931090842884611 <|-- C_0009819322948617116148
|
||||
|
||||
C_0003898187834670349177 ..> C_0013393078277439680282
|
||||
C_0015881931090842884611 <|-- C_0003898187834670349177
|
||||
|
||||
C_0017759964713298103839 <|-- C_0003898187834670349177
|
||||
|
||||
C_0000272055465257861326 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0000272055465257861326
|
||||
|
||||
C_0002867772739198819061 ..> C_0013393078277439680282
|
||||
C_0009819322948617116148 <|-- C_0002867772739198819061
|
||||
|
||||
C_0013562609546004646591 <|-- C_0000093018845530739957
|
||||
|
||||
C_0013562609546004646591 <|-- C_0001157456122733975432
|
||||
|
||||
C_0013562609546004646591 <|-- C_0000066148117395428429
|
||||
|
||||
|
||||
'Generated with clang-uml, version 0.5.5
|
||||
'LLVM version clang version 18.1.8 (Fedora 18.1.8-5.fc41)
|
||||
@enduml
|
1
diagrams/BayesNet.svg
Normal file
1
diagrams/BayesNet.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 196 KiB |
314
diagrams/dependency.svg
Normal file
314
diagrams/dependency.svg
Normal file
@ -0,0 +1,314 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<!-- Generated by graphviz version 12.1.0 (20240811.2233)
|
||||
-->
|
||||
<!-- Title: BayesNet Pages: 1 -->
|
||||
<svg width="3725pt" height="432pt"
|
||||
viewBox="0.00 0.00 3724.84 431.80" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 427.8)">
|
||||
<title>BayesNet</title>
|
||||
<polygon fill="white" stroke="none" points="-4,4 -4,-427.8 3720.84,-427.8 3720.84,4 -4,4"/>
|
||||
<!-- node0 -->
|
||||
<g id="node1" class="node">
|
||||
<title>node0</title>
|
||||
<polygon fill="none" stroke="black" points="1655.43,-398.35 1655.43,-413.26 1625.69,-423.8 1583.63,-423.8 1553.89,-413.26 1553.89,-398.35 1583.63,-387.8 1625.69,-387.8 1655.43,-398.35"/>
|
||||
<text text-anchor="middle" x="1604.66" y="-401.53" font-family="Times,serif" font-size="12.00">BayesNet</text>
|
||||
</g>
|
||||
<!-- node1 -->
|
||||
<g id="node2" class="node">
|
||||
<title>node1</title>
|
||||
<polygon fill="none" stroke="black" points="413.32,-257.8 372.39,-273.03 206.66,-279.8 40.93,-273.03 0,-257.8 114.69,-245.59 298.64,-245.59 413.32,-257.8"/>
|
||||
<text text-anchor="middle" x="206.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10.so</text>
|
||||
</g>
|
||||
<!-- node0->node1 -->
|
||||
<g id="edge1" class="edge">
|
||||
<title>node0->node1</title>
|
||||
<path fill="none" stroke="black" d="M1553.59,-400.53C1451.65,-391.91 1215.69,-371.61 1017.66,-351.8 773.36,-327.37 488.07,-295.22 329.31,-277.01"/>
|
||||
<polygon fill="black" stroke="black" points="329.93,-273.56 319.6,-275.89 329.14,-280.51 329.93,-273.56"/>
|
||||
</g>
|
||||
<!-- node2 -->
|
||||
<g id="node3" class="node">
|
||||
<title>node2</title>
|
||||
<polygon fill="none" stroke="black" points="894.21,-257.8 848.35,-273.03 662.66,-279.8 476.98,-273.03 431.12,-257.8 559.61,-245.59 765.71,-245.59 894.21,-257.8"/>
|
||||
<text text-anchor="middle" x="662.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libc10_cuda.so</text>
|
||||
</g>
|
||||
<!-- node0->node2 -->
|
||||
<g id="edge2" class="edge">
|
||||
<title>node0->node2</title>
|
||||
<path fill="none" stroke="black" d="M1555.34,-397.37C1408.12,-375.18 969.52,-309.06 767.13,-278.55"/>
|
||||
<polygon fill="black" stroke="black" points="767.81,-275.12 757.4,-277.09 766.77,-282.04 767.81,-275.12"/>
|
||||
</g>
|
||||
<!-- node3 -->
|
||||
<g id="node4" class="node">
|
||||
<title>node3</title>
|
||||
<polygon fill="none" stroke="black" points="1338.68,-257.8 1296.49,-273.03 1125.66,-279.8 954.84,-273.03 912.65,-257.8 1030.86,-245.59 1220.46,-245.59 1338.68,-257.8"/>
|
||||
<text text-anchor="middle" x="1125.66" y="-257.53" font-family="Times,serif" font-size="12.00">/home/rmontanana/Code/libtorch/lib/libkineto.a</text>
|
||||
</g>
|
||||
<!-- node0->node3 -->
|
||||
<g id="edge3" class="edge">
|
||||
<title>node0->node3</title>
|
||||
<path fill="none" stroke="black" d="M1566.68,-393.54C1484.46,-369.17 1289.3,-311.32 1188.44,-281.41"/>
|
||||
<polygon fill="black" stroke="black" points="1189.53,-278.09 1178.95,-278.6 1187.54,-284.8 1189.53,-278.09"/>
|
||||
</g>
|
||||
<!-- node4 -->
|
||||
<g id="node5" class="node">
|
||||
<title>node4</title>
|
||||
<polygon fill="none" stroke="black" points="1552.26,-257.8 1532.93,-273.03 1454.66,-279.8 1376.4,-273.03 1357.07,-257.8 1411.23,-245.59 1498.1,-245.59 1552.26,-257.8"/>
|
||||
<text text-anchor="middle" x="1454.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/lib64/libcuda.so</text>
|
||||
</g>
|
||||
<!-- node0->node4 -->
|
||||
<g id="edge4" class="edge">
|
||||
<title>node0->node4</title>
|
||||
<path fill="none" stroke="black" d="M1586.27,-387.39C1559.5,-362.05 1509.72,-314.92 1479.65,-286.46"/>
|
||||
<polygon fill="black" stroke="black" points="1482.13,-283.99 1472.46,-279.65 1477.31,-289.07 1482.13,-283.99"/>
|
||||
</g>
|
||||
<!-- node5 -->
|
||||
<g id="node6" class="node">
|
||||
<title>node5</title>
|
||||
<polygon fill="none" stroke="black" points="1873.26,-257.8 1843.23,-273.03 1721.66,-279.8 1600.09,-273.03 1570.06,-257.8 1654.19,-245.59 1789.13,-245.59 1873.26,-257.8"/>
|
||||
<text text-anchor="middle" x="1721.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libcudart.so</text>
|
||||
</g>
|
||||
<!-- node0->node5 -->
|
||||
<g id="edge5" class="edge">
|
||||
<title>node0->node5</title>
|
||||
<path fill="none" stroke="black" d="M1619.76,-387.77C1628.83,-377.46 1640.53,-363.98 1650.66,-351.8 1668.32,-330.59 1687.84,-306.03 1701.94,-288.1"/>
|
||||
<polygon fill="black" stroke="black" points="1704.43,-290.59 1707.84,-280.56 1698.92,-286.27 1704.43,-290.59"/>
|
||||
</g>
|
||||
<!-- node6 -->
|
||||
<g id="node7" class="node">
|
||||
<title>node6</title>
|
||||
<polygon fill="none" stroke="black" points="2231.79,-257.8 2198.1,-273.03 2061.66,-279.8 1925.23,-273.03 1891.53,-257.8 1985.95,-245.59 2137.38,-245.59 2231.79,-257.8"/>
|
||||
<text text-anchor="middle" x="2061.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libnvToolsExt.so</text>
|
||||
</g>
|
||||
<!-- node0->node6 -->
|
||||
<g id="edge6" class="edge">
|
||||
<title>node0->node6</title>
|
||||
<path fill="none" stroke="black" d="M1642.06,-393.18C1721.31,-368.56 1906.71,-310.95 2002.32,-281.24"/>
|
||||
<polygon fill="black" stroke="black" points="2003.28,-284.61 2011.79,-278.3 2001.21,-277.92 2003.28,-284.61"/>
|
||||
</g>
|
||||
<!-- node7 -->
|
||||
<g id="node8" class="node">
|
||||
<title>node7</title>
|
||||
<polygon fill="none" stroke="black" points="2541.44,-257.8 2512.56,-273.03 2395.66,-279.8 2278.76,-273.03 2249.89,-257.8 2330.79,-245.59 2460.54,-245.59 2541.44,-257.8"/>
|
||||
<text text-anchor="middle" x="2395.66" y="-257.53" font-family="Times,serif" font-size="12.00">/usr/local/cuda/lib64/libnvrtc.so</text>
|
||||
</g>
|
||||
<!-- node0->node7 -->
|
||||
<g id="edge7" class="edge">
|
||||
<title>node0->node7</title>
|
||||
<path fill="none" stroke="black" d="M1651.19,-396.45C1780.36,-373.26 2144.76,-307.85 2311.05,-277.99"/>
|
||||
<polygon fill="black" stroke="black" points="2311.47,-281.47 2320.7,-276.26 2310.24,-274.58 2311.47,-281.47"/>
|
||||
</g>
|
||||
<!-- node8 -->
|
||||
<g id="node9" class="node">
|
||||
<title>node8</title>
|
||||
<polygon fill="none" stroke="black" points="1642.01,-326.35 1642.01,-341.26 1620.13,-351.8 1589.19,-351.8 1567.31,-341.26 1567.31,-326.35 1589.19,-315.8 1620.13,-315.8 1642.01,-326.35"/>
|
||||
<text text-anchor="middle" x="1604.66" y="-329.53" font-family="Times,serif" font-size="12.00">fimdlp</text>
|
||||
</g>
|
||||
<!-- node0->node8 -->
|
||||
<g id="edge8" class="edge">
|
||||
<title>node0->node8</title>
|
||||
<path fill="none" stroke="black" d="M1604.66,-387.5C1604.66,-380.21 1604.66,-371.53 1604.66,-363.34"/>
|
||||
<polygon fill="black" stroke="black" points="1608.16,-363.42 1604.66,-353.42 1601.16,-363.42 1608.16,-363.42"/>
|
||||
</g>
|
||||
<!-- node19 -->
|
||||
<g id="node10" class="node">
|
||||
<title>node19</title>
|
||||
<polygon fill="none" stroke="black" points="2709.74,-267.37 2634.66,-279.8 2559.58,-267.37 2588.26,-247.24 2681.06,-247.24 2709.74,-267.37"/>
|
||||
<text text-anchor="middle" x="2634.66" y="-257.53" font-family="Times,serif" font-size="12.00">torch_library</text>
|
||||
</g>
|
||||
<!-- node0->node19 -->
|
||||
<g id="edge29" class="edge">
|
||||
<title>node0->node19</title>
|
||||
<path fill="none" stroke="black" d="M1655.87,-399.32C1798.23,-383.79 2210.64,-336.94 2550.66,-279.8 2559.43,-278.33 2568.68,-276.62 2577.72,-274.86"/>
|
||||
<polygon fill="black" stroke="black" points="2578.38,-278.3 2587.5,-272.92 2577.01,-271.43 2578.38,-278.3"/>
|
||||
</g>
|
||||
<!-- node8->node1 -->
|
||||
<g id="edge9" class="edge">
|
||||
<title>node8->node1</title>
|
||||
<path fill="none" stroke="black" d="M1566.84,-331.58C1419.81,-326.72 872.06,-307.69 421.66,-279.8 401.07,-278.53 379.38,-277.02 358.03,-275.43"/>
|
||||
<polygon fill="black" stroke="black" points="358.3,-271.94 348.06,-274.67 357.77,-278.92 358.3,-271.94"/>
|
||||
</g>
|
||||
<!-- node8->node2 -->
|
||||
<g id="edge10" class="edge">
|
||||
<title>node8->node2</title>
|
||||
<path fill="none" stroke="black" d="M1566.86,-330C1445.11,-320.95 1057.97,-292.18 831.67,-275.36"/>
|
||||
<polygon fill="black" stroke="black" points="832.09,-271.89 821.86,-274.63 831.57,-278.87 832.09,-271.89"/>
|
||||
</g>
|
||||
<!-- node8->node3 -->
|
||||
<g id="edge11" class="edge">
|
||||
<title>node8->node3</title>
|
||||
<path fill="none" stroke="black" d="M1567.08,-327.31C1495.4,-316.84 1336.86,-293.67 1230.62,-278.14"/>
|
||||
<polygon fill="black" stroke="black" points="1231.44,-274.72 1221.04,-276.74 1230.42,-281.65 1231.44,-274.72"/>
|
||||
</g>
|
||||
<!-- node8->node4 -->
|
||||
<g id="edge12" class="edge">
|
||||
<title>node8->node4</title>
|
||||
<path fill="none" stroke="black" d="M1578.53,-320.61C1555.96,-310.08 1522.92,-294.66 1496.64,-282.4"/>
|
||||
<polygon fill="black" stroke="black" points="1498.12,-279.22 1487.58,-278.17 1495.16,-285.57 1498.12,-279.22"/>
|
||||
</g>
|
||||
<!-- node8->node5 -->
|
||||
<g id="edge13" class="edge">
|
||||
<title>node8->node5</title>
|
||||
<path fill="none" stroke="black" d="M1627.78,-318.97C1644.15,-309.18 1666.44,-295.84 1685.2,-284.62"/>
|
||||
<polygon fill="black" stroke="black" points="1686.83,-287.73 1693.61,-279.59 1683.23,-281.72 1686.83,-287.73"/>
|
||||
</g>
|
||||
<!-- node8->node6 -->
|
||||
<g id="edge14" class="edge">
|
||||
<title>node8->node6</title>
|
||||
<path fill="none" stroke="black" d="M1642.45,-327.02C1712.36,-316.31 1863.89,-293.1 1964.32,-277.71"/>
|
||||
<polygon fill="black" stroke="black" points="1964.84,-281.18 1974.2,-276.2 1963.78,-274.26 1964.84,-281.18"/>
|
||||
</g>
|
||||
<!-- node8->node7 -->
|
||||
<g id="edge15" class="edge">
|
||||
<title>node8->node7</title>
|
||||
<path fill="none" stroke="black" d="M1642.33,-330.01C1740.75,-322.64 2013.75,-301.7 2240.66,-279.8 2254.16,-278.5 2268.32,-277.06 2282.35,-275.58"/>
|
||||
<polygon fill="black" stroke="black" points="2282.49,-279.08 2292.06,-274.54 2281.75,-272.12 2282.49,-279.08"/>
|
||||
</g>
|
||||
<!-- node8->node19 -->
|
||||
<g id="edge16" class="edge">
|
||||
<title>node8->node19</title>
|
||||
<path fill="none" stroke="black" d="M1642.25,-332.63C1770.06,-331.64 2199.48,-324.94 2550.66,-279.8 2560.1,-278.59 2570.07,-276.92 2579.71,-275.1"/>
|
||||
<polygon fill="black" stroke="black" points="2580.21,-278.57 2589.34,-273.21 2578.86,-271.7 2580.21,-278.57"/>
|
||||
</g>
|
||||
<!-- node20 -->
|
||||
<g id="node11" class="node">
|
||||
<title>node20</title>
|
||||
<polygon fill="none" stroke="black" points="2606.81,-185.8 2533.89,-201.03 2238.66,-207.8 1943.43,-201.03 1870.52,-185.8 2074.82,-173.59 2402.5,-173.59 2606.81,-185.8"/>
|
||||
<text text-anchor="middle" x="2238.66" y="-185.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node19->node20 -->
|
||||
<g id="edge17" class="edge">
|
||||
<title>node19->node20</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2583.63,-250.21C2572.76,-248.03 2561.34,-245.79 2550.66,-243.8 2482.14,-231.05 2404.92,-217.93 2344.44,-207.93"/>
|
||||
<polygon fill="black" stroke="black" points="2345.28,-204.52 2334.84,-206.34 2344.14,-211.42 2345.28,-204.52"/>
|
||||
</g>
|
||||
<!-- node9 -->
|
||||
<g id="node12" class="node">
|
||||
<title>node9</title>
|
||||
<polygon fill="none" stroke="black" points="2542.56,-123.37 2445.66,-135.8 2348.77,-123.37 2385.78,-103.24 2505.55,-103.24 2542.56,-123.37"/>
|
||||
<text text-anchor="middle" x="2445.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch_cpu_library</text>
|
||||
</g>
|
||||
<!-- node19->node9 -->
|
||||
<g id="edge18" class="edge">
|
||||
<title>node19->node9</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2635.72,-246.84C2636.4,-227.49 2634.61,-192.58 2615.66,-171.8 2601.13,-155.87 2551.93,-141.56 2510.18,-131.84"/>
|
||||
<polygon fill="black" stroke="black" points="2511.2,-128.48 2500.67,-129.68 2509.65,-135.31 2511.2,-128.48"/>
|
||||
</g>
|
||||
<!-- node13 -->
|
||||
<g id="node16" class="node">
|
||||
<title>node13</title>
|
||||
<polygon fill="none" stroke="black" points="3056.45,-195.37 2953.66,-207.8 2850.87,-195.37 2890.13,-175.24 3017.19,-175.24 3056.45,-195.37"/>
|
||||
<text text-anchor="middle" x="2953.66" y="-185.53" font-family="Times,serif" font-size="12.00">torch_cuda_library</text>
|
||||
</g>
|
||||
<!-- node19->node13 -->
|
||||
<g id="edge22" class="edge">
|
||||
<title>node19->node13</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2685.21,-249.71C2741.11,-237.45 2831.21,-217.67 2891.42,-204.46"/>
|
||||
<polygon fill="black" stroke="black" points="2891.8,-207.96 2900.82,-202.4 2890.3,-201.13 2891.8,-207.96"/>
|
||||
</g>
|
||||
<!-- node10 -->
|
||||
<g id="node13" class="node">
|
||||
<title>node10</title>
|
||||
<polygon fill="none" stroke="black" points="2362.4,-27.9 2285.6,-43.12 1974.66,-49.9 1663.72,-43.12 1586.93,-27.9 1802.1,-15.68 2147.22,-15.68 2362.4,-27.9"/>
|
||||
<text text-anchor="middle" x="1974.66" y="-27.63" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cpu.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node9->node10 -->
|
||||
<g id="edge19" class="edge">
|
||||
<title>node9->node10</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2381.16,-105.31C2301.63,-91.15 2165.65,-66.92 2073.05,-50.43"/>
|
||||
<polygon fill="black" stroke="black" points="2073.93,-47.03 2063.48,-48.72 2072.71,-53.92 2073.93,-47.03"/>
|
||||
</g>
|
||||
<!-- node11 -->
|
||||
<g id="node14" class="node">
|
||||
<title>node11</title>
|
||||
<polygon fill="none" stroke="black" points="2510.72,-37.46 2445.66,-49.9 2380.61,-37.46 2405.46,-17.34 2485.87,-17.34 2510.72,-37.46"/>
|
||||
<text text-anchor="middle" x="2445.66" y="-27.63" font-family="Times,serif" font-size="12.00">caffe2::mkl</text>
|
||||
</g>
|
||||
<!-- node9->node11 -->
|
||||
<g id="edge20" class="edge">
|
||||
<title>node9->node11</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2445.66,-102.95C2445.66,-91.68 2445.66,-75.4 2445.66,-61.37"/>
|
||||
<polygon fill="black" stroke="black" points="2449.16,-61.78 2445.66,-51.78 2442.16,-61.78 2449.16,-61.78"/>
|
||||
</g>
|
||||
<!-- node12 -->
|
||||
<g id="node15" class="node">
|
||||
<title>node12</title>
|
||||
<polygon fill="none" stroke="black" points="2794.95,-41.76 2661.66,-63.8 2528.37,-41.76 2579.28,-6.09 2744.04,-6.09 2794.95,-41.76"/>
|
||||
<text text-anchor="middle" x="2661.66" y="-34.75" font-family="Times,serif" font-size="12.00">dummy</text>
|
||||
<text text-anchor="middle" x="2661.66" y="-20.5" font-family="Times,serif" font-size="12.00">(protobuf::libprotobuf)</text>
|
||||
</g>
|
||||
<!-- node9->node12 -->
|
||||
<g id="edge21" class="edge">
|
||||
<title>node9->node12</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2481.82,-102.76C2512.55,-90.82 2557.5,-73.36 2594.77,-58.89"/>
|
||||
<polygon fill="black" stroke="black" points="2595.6,-62.32 2603.65,-55.44 2593.06,-55.79 2595.6,-62.32"/>
|
||||
</g>
|
||||
<!-- node13->node9 -->
|
||||
<g id="edge28" class="edge">
|
||||
<title>node13->node9</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2880.59,-179.79C2799.97,-169.71 2666.42,-152.57 2551.66,-135.8 2540.2,-134.13 2528.06,-132.27 2516.24,-130.41"/>
|
||||
<polygon fill="black" stroke="black" points="2516.96,-126.98 2506.54,-128.86 2515.87,-133.89 2516.96,-126.98"/>
|
||||
</g>
|
||||
<!-- node14 -->
|
||||
<g id="node17" class="node">
|
||||
<title>node14</title>
|
||||
<polygon fill="none" stroke="black" points="3346.69,-113.8 3268.85,-129.03 2953.66,-135.8 2638.48,-129.03 2560.63,-113.8 2778.75,-101.59 3128.58,-101.59 3346.69,-113.8"/>
|
||||
<text text-anchor="middle" x="2953.66" y="-113.53" font-family="Times,serif" font-size="12.00">-Wl,--no-as-needed,"/home/rmontanana/Code/libtorch/lib/libtorch_cuda.so" -Wl,--as-needed</text>
|
||||
</g>
|
||||
<!-- node13->node14 -->
|
||||
<g id="edge23" class="edge">
|
||||
<title>node13->node14</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M2953.66,-174.97C2953.66,-167.13 2953.66,-157.01 2953.66,-147.53"/>
|
||||
<polygon fill="black" stroke="black" points="2957.16,-147.59 2953.66,-137.59 2950.16,-147.59 2957.16,-147.59"/>
|
||||
</g>
|
||||
<!-- node15 -->
|
||||
<g id="node18" class="node">
|
||||
<title>node15</title>
|
||||
<polygon fill="none" stroke="black" points="3514.74,-123.37 3439.66,-135.8 3364.58,-123.37 3393.26,-103.24 3486.06,-103.24 3514.74,-123.37"/>
|
||||
<text text-anchor="middle" x="3439.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch::cudart</text>
|
||||
</g>
|
||||
<!-- node13->node15 -->
|
||||
<g id="edge24" class="edge">
|
||||
<title>node13->node15</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3028.35,-180.51C3109.24,-171.17 3241.96,-154.78 3355.66,-135.8 3364.43,-134.34 3373.69,-132.63 3382.72,-130.88"/>
|
||||
<polygon fill="black" stroke="black" points="3383.38,-134.31 3392.51,-128.93 3382.02,-127.45 3383.38,-134.31"/>
|
||||
</g>
|
||||
<!-- node17 -->
|
||||
<g id="node20" class="node">
|
||||
<title>node17</title>
|
||||
<polygon fill="none" stroke="black" points="3716.84,-123.37 3624.66,-135.8 3532.48,-123.37 3567.69,-103.24 3681.63,-103.24 3716.84,-123.37"/>
|
||||
<text text-anchor="middle" x="3624.66" y="-113.53" font-family="Times,serif" font-size="12.00">torch::nvtoolsext</text>
|
||||
</g>
|
||||
<!-- node13->node17 -->
|
||||
<g id="edge26" class="edge">
|
||||
<title>node13->node17</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3033.64,-183.25C3144.1,-175.14 3349.47,-158.53 3523.66,-135.8 3534.84,-134.35 3546.67,-132.57 3558.15,-130.72"/>
|
||||
<polygon fill="black" stroke="black" points="3558.68,-134.18 3567.98,-129.1 3557.54,-127.27 3558.68,-134.18"/>
|
||||
</g>
|
||||
<!-- node16 -->
|
||||
<g id="node19" class="node">
|
||||
<title>node16</title>
|
||||
<polygon fill="none" stroke="black" points="3510.78,-27.9 3496.7,-43.12 3439.66,-49.9 3382.63,-43.12 3368.54,-27.9 3408.01,-15.68 3471.31,-15.68 3510.78,-27.9"/>
|
||||
<text text-anchor="middle" x="3439.66" y="-27.63" font-family="Times,serif" font-size="12.00">CUDA::cudart</text>
|
||||
</g>
|
||||
<!-- node15->node16 -->
|
||||
<g id="edge25" class="edge">
|
||||
<title>node15->node16</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3439.66,-102.95C3439.66,-91.68 3439.66,-75.4 3439.66,-61.37"/>
|
||||
<polygon fill="black" stroke="black" points="3443.16,-61.78 3439.66,-51.78 3436.16,-61.78 3443.16,-61.78"/>
|
||||
</g>
|
||||
<!-- node18 -->
|
||||
<g id="node21" class="node">
|
||||
<title>node18</title>
|
||||
<polygon fill="none" stroke="black" points="3714.32,-27.9 3696.56,-43.12 3624.66,-49.9 3552.77,-43.12 3535.01,-27.9 3584.76,-15.68 3664.56,-15.68 3714.32,-27.9"/>
|
||||
<text text-anchor="middle" x="3624.66" y="-27.63" font-family="Times,serif" font-size="12.00">CUDA::nvToolsExt</text>
|
||||
</g>
|
||||
<!-- node17->node18 -->
|
||||
<g id="edge27" class="edge">
|
||||
<title>node17->node18</title>
|
||||
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M3624.66,-102.95C3624.66,-91.68 3624.66,-75.4 3624.66,-61.37"/>
|
||||
<polygon fill="black" stroke="black" points="3628.16,-61.78 3624.66,-51.78 3621.16,-61.78 3628.16,-61.78"/>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 18 KiB |
@ -5,6 +5,7 @@
|
||||
The hyperparameters defined in the algorithm are:
|
||||
|
||||
- ***bisection*** (*boolean*): If set to true allows the algorithm to add *k* models at once (as specified in the algorithm) to the ensemble. Default value: *true*.
|
||||
- ***bisection_best*** (*boolean*): If set to *true*, the algorithm will take as *priorAccuracy* the best accuracy computed. If set to *false⁺ it will take the last accuracy as *priorAccuracy*. Default value: *false*.
|
||||
|
||||
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
|
||||
|
||||
@ -26,4 +27,4 @@ The hyperparameters defined in the algorithm are:
|
||||
|
||||
## Operation
|
||||
|
||||
### [Algorithm](./algorithm.md)
|
||||
### [Base Algorithm](./algorithm.md)
|
||||
|
2912
docs/Doxyfile.in
Normal file
2912
docs/Doxyfile.in
Normal file
File diff suppressed because it is too large
Load Diff
@ -105,8 +105,7 @@
|
||||
|
||||
2. $numItemsPack \leftarrow 0$
|
||||
|
||||
10. If
|
||||
$(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
|
||||
10. If $(Vars == \emptyset \lor tolerance>maxTolerance) \; finished \leftarrow True$
|
||||
|
||||
11. $lastAccuracy \leftarrow max(lastAccuracy, actualAccuracy)$
|
||||
|
||||
|
BIN
docs/logo_small.png
Normal file
BIN
docs/logo_small.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
@ -1,5 +0,0 @@
|
||||
filter = bayesnet/
|
||||
exclude-directories = build_debug/lib/
|
||||
exclude = bayesnet/utils/loguru.*
|
||||
print-summary = yes
|
||||
sort = uncovered-percent
|
@ -1,168 +0,0 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
ArffFiles::ArffFiles() = default;
|
||||
|
||||
std::vector<std::string> ArffFiles::getLines() const
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
|
||||
unsigned long int ArffFiles::getSize() const
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassName() const
|
||||
{
|
||||
return className;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassType() const
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
|
||||
std::vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
|
||||
void ArffFiles::loadCommon(std::string fileName)
|
||||
{
|
||||
std::ifstream file(fileName);
|
||||
if (!file.is_open()) {
|
||||
throw std::invalid_argument("Unable to open file");
|
||||
}
|
||||
std::string line;
|
||||
std::string keyword;
|
||||
std::string attribute;
|
||||
std::string type;
|
||||
std::string type_w;
|
||||
while (getline(file, line)) {
|
||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
||||
std::stringstream ss(line);
|
||||
ss >> keyword >> attribute;
|
||||
type = "";
|
||||
while (ss >> type_w)
|
||||
type += type_w + " ";
|
||||
attributes.emplace_back(trim(attribute), trim(type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw std::invalid_argument("No attributes found");
|
||||
}
|
||||
|
||||
void ArffFiles::load(const std::string& fileName, bool classLast)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
if (classLast) {
|
||||
className = std::get<0>(attributes.back());
|
||||
classType = std::get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
labelIndex = static_cast<int>(attributes.size());
|
||||
} else {
|
||||
className = std::get<0>(attributes.front());
|
||||
classType = std::get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
labelIndex = 0;
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
void ArffFiles::load(const std::string& fileName, const std::string& name)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
bool found = false;
|
||||
for (int i = 0; i < attributes.size(); ++i) {
|
||||
if (attributes[i].first == name) {
|
||||
className = std::get<0>(attributes[i]);
|
||||
classType = std::get<1>(attributes[i]);
|
||||
attributes.erase(attributes.begin() + i);
|
||||
labelIndex = i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
throw std::invalid_argument("Class name not found");
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
|
||||
void ArffFiles::generateDataset(int labelIndex)
|
||||
{
|
||||
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
|
||||
auto yy = std::vector<std::string>(lines.size(), "");
|
||||
auto removeLines = std::vector<int>(); // Lines with missing values
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
std::stringstream ss(lines[i]);
|
||||
std::string value;
|
||||
int pos = 0;
|
||||
int xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
if (value == "?") {
|
||||
X[xIndex++][i] = -1;
|
||||
removeLines.push_back(i);
|
||||
} else
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto i : removeLines) {
|
||||
yy.erase(yy.begin() + i);
|
||||
for (auto& x : X) {
|
||||
x.erase(x.begin() + i);
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
|
||||
std::string ArffFiles::trim(const std::string& source)
|
||||
{
|
||||
std::string s(source);
|
||||
s.erase(0, s.find_first_not_of(" '\n\r\t"));
|
||||
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
|
||||
{
|
||||
std::vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
std::map<std::string, int> labelMap;
|
||||
int i = 0;
|
||||
for (const std::string& label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class ArffFiles {
|
||||
private:
|
||||
std::vector<std::string> lines;
|
||||
std::vector<std::pair<std::string, std::string>> attributes;
|
||||
std::string className;
|
||||
std::string classType;
|
||||
std::vector<std::vector<float>> X;
|
||||
std::vector<int> y;
|
||||
void generateDataset(int);
|
||||
void loadCommon(std::string);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(const std::string&, bool = true);
|
||||
void load(const std::string&, const std::string&);
|
||||
std::vector<std::string> getLines() const;
|
||||
unsigned long int getSize() const;
|
||||
std::string getClassName() const;
|
||||
std::string getClassType() const;
|
||||
static std::string trim(const std::string&);
|
||||
std::vector<std::vector<float>>& getX();
|
||||
std::vector<int>& getY();
|
||||
std::vector<std::pair<std::string, std::string>> getAttributes() const;
|
||||
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||
};
|
||||
|
||||
#endif
|
@ -1 +0,0 @@
|
||||
add_library(ArffFiles ArffFiles.cc)
|
@ -1 +1 @@
|
||||
Subproject commit bff6e35e2b239217f3940ed52429f94b745adc50
|
||||
Subproject commit 029fe3b4609dd84cd939b73357f37bbb75bcf82f
|
@ -1 +1 @@
|
||||
Subproject commit 71d6055be4488cf2e6443123ae8fc4a63ae289dc
|
||||
Subproject commit 2ac43e32ac1eac0c986702ec526cf5367a565ef0
|
2
lib/json
2
lib/json
@ -1 +1 @@
|
||||
Subproject commit 199dea11b17c533721b26249e2dcaee6ca1d51d3
|
||||
Subproject commit 378e091795a70fced276cd882bd8a6a428668fe5
|
2009
lib/log/loguru.cpp
Normal file
2009
lib/log/loguru.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1475
lib/log/loguru.hpp
Normal file
1475
lib/log/loguru.hpp
Normal file
File diff suppressed because it is too large
Load Diff
2
lib/mdlp
2
lib/mdlp
@ -1 +1 @@
|
||||
Subproject commit 5708dc3de944fc22d61a2dd071b63aa338e04db3
|
||||
Subproject commit 7d62d6af4a6ca944a3bbde0b61f651fd4b2d3f57
|
@ -5,16 +5,21 @@ project(bayesnet_sample)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
find_package(Torch REQUIRED)
|
||||
find_library(BayesNet NAMES BayesNet.a libBayesNet.a REQUIRED)
|
||||
find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a REQUIRED)
|
||||
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet)
|
||||
find_library(FImdlp NAMES libfimdlp.a PATHS REQUIRED)
|
||||
|
||||
message(STATUS "FImdlp=${FImdlp}")
|
||||
message(STATUS "FImdlp_INCLUDE_DIRS=${FImdlp_INCLUDE_DIRS}")
|
||||
message(STATUS "BayesNet=${BayesNet}")
|
||||
message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
|
||||
|
||||
include_directories(
|
||||
lib/Files
|
||||
lib/mdlp
|
||||
../tests/lib/Files
|
||||
lib/json/include
|
||||
/usr/local/include
|
||||
${FImdlp_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
add_subdirectory(lib/Files)
|
||||
add_subdirectory(lib/mdlp)
|
||||
add_executable(bayesnet_sample sample.cc)
|
||||
target_link_libraries(bayesnet_sample ArffFiles mdlp "${TORCH_LIBRARIES}" "${BayesNet}")
|
||||
target_link_libraries(bayesnet_sample fimdlp "${TORCH_LIBRARIES}" "${BayesNet}")
|
@ -1,174 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
ArffFiles::ArffFiles() = default;
|
||||
|
||||
std::vector<std::string> ArffFiles::getLines() const
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
|
||||
unsigned long int ArffFiles::getSize() const
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassName() const
|
||||
{
|
||||
return className;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassType() const
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
|
||||
std::vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
|
||||
void ArffFiles::loadCommon(std::string fileName)
|
||||
{
|
||||
std::ifstream file(fileName);
|
||||
if (!file.is_open()) {
|
||||
throw std::invalid_argument("Unable to open file");
|
||||
}
|
||||
std::string line;
|
||||
std::string keyword;
|
||||
std::string attribute;
|
||||
std::string type;
|
||||
std::string type_w;
|
||||
while (getline(file, line)) {
|
||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
||||
std::stringstream ss(line);
|
||||
ss >> keyword >> attribute;
|
||||
type = "";
|
||||
while (ss >> type_w)
|
||||
type += type_w + " ";
|
||||
attributes.emplace_back(trim(attribute), trim(type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw std::invalid_argument("No attributes found");
|
||||
}
|
||||
|
||||
void ArffFiles::load(const std::string& fileName, bool classLast)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
if (classLast) {
|
||||
className = std::get<0>(attributes.back());
|
||||
classType = std::get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
labelIndex = static_cast<int>(attributes.size());
|
||||
} else {
|
||||
className = std::get<0>(attributes.front());
|
||||
classType = std::get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
labelIndex = 0;
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
void ArffFiles::load(const std::string& fileName, const std::string& name)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
bool found = false;
|
||||
for (int i = 0; i < attributes.size(); ++i) {
|
||||
if (attributes[i].first == name) {
|
||||
className = std::get<0>(attributes[i]);
|
||||
classType = std::get<1>(attributes[i]);
|
||||
attributes.erase(attributes.begin() + i);
|
||||
labelIndex = i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
throw std::invalid_argument("Class name not found");
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
|
||||
void ArffFiles::generateDataset(int labelIndex)
|
||||
{
|
||||
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
|
||||
auto yy = std::vector<std::string>(lines.size(), "");
|
||||
auto removeLines = std::vector<int>(); // Lines with missing values
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
std::stringstream ss(lines[i]);
|
||||
std::string value;
|
||||
int pos = 0;
|
||||
int xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
if (value == "?") {
|
||||
X[xIndex++][i] = -1;
|
||||
removeLines.push_back(i);
|
||||
} else
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto i : removeLines) {
|
||||
yy.erase(yy.begin() + i);
|
||||
for (auto& x : X) {
|
||||
x.erase(x.begin() + i);
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
|
||||
std::string ArffFiles::trim(const std::string& source)
|
||||
{
|
||||
std::string s(source);
|
||||
s.erase(0, s.find_first_not_of(" '\n\r\t"));
|
||||
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
|
||||
{
|
||||
std::vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
std::map<std::string, int> labelMap;
|
||||
int i = 0;
|
||||
for (const std::string& label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class ArffFiles {
|
||||
private:
|
||||
std::vector<std::string> lines;
|
||||
std::vector<std::pair<std::string, std::string>> attributes;
|
||||
std::string className;
|
||||
std::string classType;
|
||||
std::vector<std::vector<float>> X;
|
||||
std::vector<int> y;
|
||||
void generateDataset(int);
|
||||
void loadCommon(std::string);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(const std::string&, bool = true);
|
||||
void load(const std::string&, const std::string&);
|
||||
std::vector<std::string> getLines() const;
|
||||
unsigned long int getSize() const;
|
||||
std::string getClassName() const;
|
||||
std::string getClassType() const;
|
||||
static std::string trim(const std::string&);
|
||||
std::vector<std::vector<float>>& getX();
|
||||
std::vector<int>& getY();
|
||||
std::vector<std::pair<std::string, std::string>> getAttributes() const;
|
||||
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||
};
|
||||
|
||||
#endif
|
@ -1 +0,0 @@
|
||||
add_library(ArffFiles ArffFiles.cc)
|
@ -1,11 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(mdlp)
|
||||
|
||||
if (POLICY CMP0135)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
endif ()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
add_library(mdlp CPPFImdlp.cpp Metrics.cpp)
|
||||
|
@ -1,222 +0,0 @@
|
||||
#include <numeric>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
|
||||
namespace mdlp {
|
||||
|
||||
CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
|
||||
max_depth(max_depth_),
|
||||
proposed_cuts(proposed)
|
||||
{
|
||||
}
|
||||
|
||||
CPPFImdlp::CPPFImdlp() = default;
|
||||
|
||||
CPPFImdlp::~CPPFImdlp() = default;
|
||||
|
||||
size_t CPPFImdlp::compute_max_num_cut_points() const
|
||||
{
|
||||
// Set the actual maximum number of cut points as a number or as a percentage of the number of samples
|
||||
if (proposed_cuts == 0) {
|
||||
return numeric_limits<size_t>::max();
|
||||
}
|
||||
if (proposed_cuts < 0 || proposed_cuts > static_cast<float>(X.size())) {
|
||||
throw invalid_argument("wrong proposed num_cuts value");
|
||||
}
|
||||
if (proposed_cuts < 1)
|
||||
return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
|
||||
return static_cast<size_t>(proposed_cuts);
|
||||
}
|
||||
|
||||
void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
X = X_;
|
||||
y = y_;
|
||||
num_cut_points = compute_max_num_cut_points();
|
||||
depth = 0;
|
||||
discretizedData.clear();
|
||||
cutPoints.clear();
|
||||
if (X.size() != y.size()) {
|
||||
throw invalid_argument("X and y must have the same size");
|
||||
}
|
||||
if (X.empty() || y.empty()) {
|
||||
throw invalid_argument("X and y must have at least one element");
|
||||
}
|
||||
if (min_length < 3) {
|
||||
throw invalid_argument("min_length must be greater than 2");
|
||||
}
|
||||
if (max_depth < 1) {
|
||||
throw invalid_argument("max_depth must be greater than 0");
|
||||
}
|
||||
indices = sortIndices(X_, y_);
|
||||
metrics.setData(y, indices);
|
||||
computeCutPoints(0, X.size(), 1);
|
||||
sort(cutPoints.begin(), cutPoints.end());
|
||||
if (num_cut_points > 0) {
|
||||
// Select the best (with lower entropy) cut points
|
||||
while (cutPoints.size() > num_cut_points) {
|
||||
resizeCutPoints();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
size_t n;
|
||||
size_t m;
|
||||
size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
|
||||
size_t idxNext = cut + 1 < end ? cut + 1 : cut;
|
||||
bool backWall; // true if duplicates reach beginning of the interval
|
||||
precision_t previous;
|
||||
precision_t actual;
|
||||
precision_t next;
|
||||
previous = X[indices[idxPrev]];
|
||||
actual = X[indices[cut]];
|
||||
next = X[indices[idxNext]];
|
||||
// definition 2 of the paper => X[t-1] < X[t]
|
||||
// get the first equal value of X in the interval
|
||||
while (idxPrev > start && actual == previous) {
|
||||
previous = X[indices[--idxPrev]];
|
||||
}
|
||||
backWall = idxPrev == start && actual == previous;
|
||||
// get the last equal value of X in the interval
|
||||
while (idxNext < end - 1 && actual == next) {
|
||||
next = X[indices[++idxNext]];
|
||||
}
|
||||
// # of duplicates before cutpoint
|
||||
n = cut - 1 - idxPrev;
|
||||
// # of duplicates after cutpoint
|
||||
m = idxNext - cut - 1;
|
||||
// Decide which values to use
|
||||
cut = cut + (backWall ? m + 1 : -n);
|
||||
actual = X[indices[cut]];
|
||||
return { (actual + previous) / 2, cut };
|
||||
}
|
||||
|
||||
void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
|
||||
{
|
||||
size_t cut;
|
||||
pair<precision_t, size_t> result;
|
||||
// Check if the interval length and the depth are Ok
|
||||
if (end - start < min_length || depth_ > max_depth)
|
||||
return;
|
||||
depth = depth_ > depth ? depth_ : depth;
|
||||
cut = getCandidate(start, end);
|
||||
if (cut == numeric_limits<size_t>::max())
|
||||
return;
|
||||
if (mdlp(start, cut, end)) {
|
||||
result = valueCutPoint(start, cut, end);
|
||||
cut = result.second;
|
||||
cutPoints.push_back(result.first);
|
||||
computeCutPoints(start, cut, depth_ + 1);
|
||||
computeCutPoints(cut, end, depth_ + 1);
|
||||
}
|
||||
}
|
||||
|
||||
size_t CPPFImdlp::getCandidate(size_t start, size_t end)
|
||||
{
|
||||
/* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
|
||||
E(A, TA; S) is minimal amongst all the candidate cut points. */
|
||||
size_t candidate = numeric_limits<size_t>::max();
|
||||
size_t elements = end - start;
|
||||
bool sameValues = true;
|
||||
precision_t entropy_left;
|
||||
precision_t entropy_right;
|
||||
precision_t minEntropy;
|
||||
// Check if all the values of the variable in the interval are the same
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
if (X[indices[idx]] != X[indices[start]]) {
|
||||
sameValues = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sameValues)
|
||||
return candidate;
|
||||
minEntropy = metrics.entropy(start, end);
|
||||
for (size_t idx = start + 1; idx < end; idx++) {
|
||||
// Cutpoints are always on boundaries (definition 2)
|
||||
if (y[indices[idx]] == y[indices[idx - 1]])
|
||||
continue;
|
||||
entropy_left = precision_t(idx - start) / static_cast<precision_t>(elements) * metrics.entropy(start, idx);
|
||||
entropy_right = precision_t(end - idx) / static_cast<precision_t>(elements) * metrics.entropy(idx, end);
|
||||
if (entropy_left + entropy_right < minEntropy) {
|
||||
minEntropy = entropy_left + entropy_right;
|
||||
candidate = idx;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
int k;
|
||||
int k1;
|
||||
int k2;
|
||||
precision_t ig;
|
||||
precision_t delta;
|
||||
precision_t ent;
|
||||
precision_t ent1;
|
||||
precision_t ent2;
|
||||
auto N = precision_t(end - start);
|
||||
k = metrics.computeNumClasses(start, end);
|
||||
k1 = metrics.computeNumClasses(start, cut);
|
||||
k2 = metrics.computeNumClasses(cut, end);
|
||||
ent = metrics.entropy(start, end);
|
||||
ent1 = metrics.entropy(start, cut);
|
||||
ent2 = metrics.entropy(cut, end);
|
||||
ig = metrics.informationGain(start, cut, end);
|
||||
delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
|
||||
(precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
|
||||
precision_t term = 1 / N * (log2(N - 1) + delta);
|
||||
return ig > term;
|
||||
}
|
||||
|
||||
// Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
|
||||
indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
|
||||
{
|
||||
indices_t idx(X_.size());
|
||||
iota(idx.begin(), idx.end(), 0);
|
||||
stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
|
||||
if (X_[i1] == X_[i2])
|
||||
return y_[i1] < y_[i2];
|
||||
else
|
||||
return X_[i1] < X_[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
void CPPFImdlp::resizeCutPoints()
|
||||
{
|
||||
//Compute entropy of each of the whole cutpoint set and discards the biggest value
|
||||
precision_t maxEntropy = 0;
|
||||
precision_t entropy;
|
||||
size_t maxEntropyIdx = 0;
|
||||
size_t begin = 0;
|
||||
size_t end;
|
||||
for (size_t idx = 0; idx < cutPoints.size(); idx++) {
|
||||
end = begin;
|
||||
while (X[indices[end]] < cutPoints[idx] && end < X.size())
|
||||
end++;
|
||||
entropy = metrics.entropy(begin, end);
|
||||
if (entropy > maxEntropy) {
|
||||
maxEntropy = entropy;
|
||||
maxEntropyIdx = idx;
|
||||
}
|
||||
begin = end;
|
||||
}
|
||||
cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
|
||||
}
|
||||
labels_t& CPPFImdlp::transform(const samples_t& data)
|
||||
{
|
||||
discretizedData.clear();
|
||||
discretizedData.reserve(data.size());
|
||||
for (const precision_t& item : data) {
|
||||
auto upper = upper_bound(cutPoints.begin(), cutPoints.end(), item);
|
||||
discretizedData.push_back(upper - cutPoints.begin());
|
||||
}
|
||||
return discretizedData;
|
||||
}
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CPPFIMDLP_H
|
||||
#define CPPFIMDLP_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
#include "Metrics.h"
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
|
||||
namespace mdlp {
|
||||
class CPPFImdlp {
|
||||
protected:
|
||||
size_t min_length = 3;
|
||||
int depth = 0;
|
||||
int max_depth = numeric_limits<int>::max();
|
||||
float proposed_cuts = 0;
|
||||
indices_t indices = indices_t();
|
||||
samples_t X = samples_t();
|
||||
labels_t y = labels_t();
|
||||
Metrics metrics = Metrics(y, indices);
|
||||
cutPoints_t cutPoints;
|
||||
size_t num_cut_points = numeric_limits<size_t>::max();
|
||||
labels_t discretizedData = labels_t();
|
||||
|
||||
static indices_t sortIndices(samples_t&, labels_t&);
|
||||
|
||||
void computeCutPoints(size_t, size_t, int);
|
||||
void resizeCutPoints();
|
||||
bool mdlp(size_t, size_t, size_t);
|
||||
size_t getCandidate(size_t, size_t);
|
||||
size_t compute_max_num_cut_points() const;
|
||||
pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
|
||||
|
||||
public:
|
||||
CPPFImdlp();
|
||||
CPPFImdlp(size_t, int, float);
|
||||
~CPPFImdlp();
|
||||
void fit(samples_t&, labels_t&);
|
||||
inline cutPoints_t getCutPoints() const { return cutPoints; };
|
||||
labels_t& transform(const samples_t&);
|
||||
inline int get_depth() const { return depth; };
|
||||
static inline string version() { return "1.1.2"; };
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Ricardo Montañana Gómez
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -1,78 +0,0 @@
|
||||
#include "Metrics.h"
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_),
|
||||
numClasses(computeNumClasses(0, indices.size()))
|
||||
{
|
||||
}
|
||||
|
||||
int Metrics::computeNumClasses(size_t start, size_t end)
|
||||
{
|
||||
set<int> nClasses;
|
||||
for (auto i = start; i < end; ++i) {
|
||||
nClasses.insert(y[indices[i]]);
|
||||
}
|
||||
return static_cast<int>(nClasses.size());
|
||||
}
|
||||
|
||||
void Metrics::setData(const labels_t& y_, const indices_t& indices_)
|
||||
{
|
||||
indices = indices_;
|
||||
y = y_;
|
||||
numClasses = computeNumClasses(0, indices.size());
|
||||
entropyCache.clear();
|
||||
igCache.clear();
|
||||
}
|
||||
|
||||
precision_t Metrics::entropy(size_t start, size_t end)
|
||||
{
|
||||
precision_t p;
|
||||
precision_t ventropy = 0;
|
||||
int nElements = 0;
|
||||
labels_t counts(numClasses + 1, 0);
|
||||
if (end - start < 2)
|
||||
return 0;
|
||||
if (entropyCache.find({ start, end }) != entropyCache.end()) {
|
||||
return entropyCache[{start, end}];
|
||||
}
|
||||
for (auto i = &indices[start]; i != &indices[end]; ++i) {
|
||||
counts[y[*i]]++;
|
||||
nElements++;
|
||||
}
|
||||
for (auto count : counts) {
|
||||
if (count > 0) {
|
||||
p = static_cast<precision_t>(count) / static_cast<precision_t>(nElements);
|
||||
ventropy -= p * log2(p);
|
||||
}
|
||||
}
|
||||
entropyCache[{start, end}] = ventropy;
|
||||
return ventropy;
|
||||
}
|
||||
|
||||
precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
|
||||
{
|
||||
precision_t iGain;
|
||||
precision_t entropyInterval;
|
||||
precision_t entropyLeft;
|
||||
precision_t entropyRight;
|
||||
size_t nElementsLeft = cut - start;
|
||||
size_t nElementsRight = end - cut;
|
||||
size_t nElements = end - start;
|
||||
if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
|
||||
return igCache[make_tuple(start, cut, end)];
|
||||
}
|
||||
entropyInterval = entropy(start, end);
|
||||
entropyLeft = entropy(start, cut);
|
||||
entropyRight = entropy(cut, end);
|
||||
iGain = entropyInterval -
|
||||
(static_cast<precision_t>(nElementsLeft) * entropyLeft +
|
||||
static_cast<precision_t>(nElementsRight) * entropyRight) /
|
||||
static_cast<precision_t>(nElements);
|
||||
igCache[make_tuple(start, cut, end)] = iGain;
|
||||
return iGain;
|
||||
}
|
||||
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef CCMETRICS_H
|
||||
#define CCMETRICS_H
|
||||
|
||||
#include "typesFImdlp.h"
|
||||
|
||||
namespace mdlp {
|
||||
class Metrics {
|
||||
protected:
|
||||
labels_t& y;
|
||||
indices_t& indices;
|
||||
int numClasses;
|
||||
cacheEnt_t entropyCache = cacheEnt_t();
|
||||
cacheIg_t igCache = cacheIg_t();
|
||||
public:
|
||||
Metrics(labels_t&, indices_t&);
|
||||
void setData(const labels_t&, const indices_t&);
|
||||
int computeNumClasses(size_t, size_t);
|
||||
precision_t entropy(size_t, size_t);
|
||||
precision_t informationGain(size_t, size_t, size_t);
|
||||
};
|
||||
}
|
||||
#endif
|
@ -1,41 +0,0 @@
|
||||
[![Build](https://github.com/rmontanana/mdlp/actions/workflows/build.yml/badge.svg)](https://github.com/rmontanana/mdlp/actions/workflows/build.yml)
|
||||
[![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_mdlp&metric=alert_status)](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_mdlp&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_mdlp)
|
||||
|
||||
# mdlp
|
||||
|
||||
Discretization algorithm based on the paper by Fayyad & Irani [Multi-Interval Discretization of Continuous-Valued Attributes for Classification Learning](https://www.ijcai.org/Proceedings/93-2/Papers/022.pdf)
|
||||
|
||||
The implementation tries to mitigate the problem of different label values with the same value of the variable:
|
||||
|
||||
- Sorts the values of the variable using the label values as a tie-breaker
|
||||
- Once found a valid candidate for the split, it checks if the previous value is the same as actual one, and tries to get previous one, or next if the former is not possible.
|
||||
|
||||
Other features:
|
||||
|
||||
- Intervals with the same value of the variable are not taken into account for cutpoints.
|
||||
- Intervals have to have more than two examples to be evaluated.
|
||||
|
||||
The algorithm returns the cut points for the variable.
|
||||
|
||||
## Sample
|
||||
|
||||
To run the sample, just execute the following commands:
|
||||
|
||||
```bash
|
||||
cd sample
|
||||
cmake -B build
|
||||
cd build
|
||||
make
|
||||
./sample -f iris -m 2
|
||||
./sample -h
|
||||
```
|
||||
|
||||
## Test
|
||||
|
||||
To run the tests and see coverage (llvm & gcovr have to be installed), execute the following commands:
|
||||
|
||||
```bash
|
||||
cd tests
|
||||
./test
|
||||
```
|
@ -1,24 +0,0 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
using namespace std;
|
||||
namespace mdlp {
|
||||
typedef float precision_t;
|
||||
typedef vector<precision_t> samples_t;
|
||||
typedef vector<int> labels_t;
|
||||
typedef vector<size_t> indices_t;
|
||||
typedef vector<precision_t> cutPoints_t;
|
||||
typedef map<pair<int, int>, precision_t> cacheEnt_t;
|
||||
typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
|
||||
}
|
||||
#endif
|
@ -4,7 +4,7 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <ArffFiles.h>
|
||||
#include <ArffFiles.hpp>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <bayesnet/ensembles/BoostAODE.h>
|
||||
|
||||
@ -60,9 +60,9 @@ int main(int argc, char* argv[])
|
||||
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
|
||||
std::cout << "Library version: " << clf.getVersion() << std::endl;
|
||||
tie(X, y, features, className, states) = loadDataset(file_name, true);
|
||||
clf.fit(X, y, features, className, states);
|
||||
clf.fit(X, y, features, className, states, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto score = clf.score(X, y);
|
||||
std::cout << "File: " << file_name << " score: " << score << std::endl;
|
||||
std::cout << "File: " << file_name << " Model: BoostAODE score: " << score << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,24 +1,28 @@
|
||||
if(ENABLE_TESTING)
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
${BayesNet_SOURCE_DIR}/tests/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp/src
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${BayesNet_SOURCE_DIR}
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc
|
||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc
|
||||
TestUtils.cc TestBayesEnsemble.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain )
|
||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestA2DE.cc
|
||||
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc ${BayesNet_SOURCES})
|
||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp PRIVATE Catch2::Catch2WithMain)
|
||||
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
||||
add_test(NAME Network COMMAND TestBayesNet "[Network]")
|
||||
add_test(NAME Node COMMAND TestBayesNet "[Node]")
|
||||
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
|
||||
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
|
||||
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
|
||||
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")
|
||||
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
|
||||
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
|
||||
add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
|
||||
add_test(NAME FeatureSelection COMMAND TestBayesNet "[FeatureSelection]")
|
||||
add_test(NAME Metrics COMMAND TestBayesNet "[Metrics]")
|
||||
add_test(NAME Models COMMAND TestBayesNet "[Models]")
|
||||
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
|
||||
add_test(NAME Modules COMMAND TestBayesNet "[Modules]")
|
||||
add_test(NAME Network COMMAND TestBayesNet "[Network]")
|
||||
add_test(NAME Node COMMAND TestBayesNet "[Node]")
|
||||
add_test(NAME MST COMMAND TestBayesNet "[MST]")
|
||||
endif(ENABLE_TESTING)
|
||||
|
49
tests/TestA2DE.cc
Normal file
49
tests/TestA2DE.cc
Normal file
@ -0,0 +1,49 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include "bayesnet/ensembles/A2DE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
|
||||
TEST_CASE("Fit and Score", "[A2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::A2DE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.831776).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.getNumberOfNodes() == 360);
|
||||
REQUIRE(clf.getNumberOfEdges() == 756);
|
||||
REQUIRE(clf.getNotes().size() == 0);
|
||||
}
|
||||
TEST_CASE("Test score with predict_voting", "[A2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::A2DE(true);
|
||||
auto hyperparameters = nlohmann::json{
|
||||
{"predict_voting", true},
|
||||
};
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.82243).epsilon(raw.epsilon));
|
||||
hyperparameters["predict_voting"] = false;
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.83178).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Test graph", "[A2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::A2DE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = clf.graph();
|
||||
REQUIRE(graph.size() == 78);
|
||||
REQUIRE(graph[0] == "digraph BayesNet {\nlabel=<BayesNet A2DE_0>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
|
||||
REQUIRE(graph[1] == "\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n");
|
||||
}
|
@ -18,47 +18,47 @@ TEST_CASE("Test Cannot build dataset with wrong data vector", "[Classifier]")
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
raw.yv.pop_back();
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
}
|
||||
TEST_CASE("Test Cannot build dataset with wrong data tensor", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto yshort = torch::zeros({ 149 }, torch::kInt32);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.featurest, raw.classNamet, raw.statest), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.featurest, raw.classNamet, raw.statest), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, yshort, raw.features, raw.className, raw.states, raw.smoothing), "* Error in X and y dimensions *\nX dimensions: [4, 150]\ny dimensions: [149]");
|
||||
}
|
||||
TEST_CASE("Invalid data type", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", false);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), "dataset (X, y) must be of type Integer");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "dataset (X, y) must be of type Integer");
|
||||
}
|
||||
TEST_CASE("Invalid number of features", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto Xt = torch::cat({ raw.Xt, torch::zeros({ 1, 150 }, torch::kInt32) }, 0);
|
||||
REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest), "Classifier: X 5 and features 4 must have the same number of features");
|
||||
REQUIRE_THROWS_AS(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing), "Classifier: X 5 and features 4 must have the same number of features");
|
||||
}
|
||||
TEST_CASE("Invalid class name", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, "duck", raw.statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, "duck", raw.statest), "class name not found in states");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, "duck", raw.states, raw.smoothing), "class name not found in states");
|
||||
}
|
||||
TEST_CASE("Invalid feature name", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto statest = raw.statest;
|
||||
auto statest = raw.states;
|
||||
statest.erase("petallength");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, statest), "feature [petallength] not found in states");
|
||||
REQUIRE_THROWS_AS(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(model.fit(raw.Xt, raw.yt, raw.features, raw.className, statest, raw.smoothing), "feature [petallength] not found in states");
|
||||
}
|
||||
TEST_CASE("Invalid hyperparameter", "[Classifier]")
|
||||
{
|
||||
@ -71,7 +71,7 @@ TEST_CASE("Topological order", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto order = model.topological_order();
|
||||
REQUIRE(order.size() == 4);
|
||||
REQUIRE(order[0] == "petallength");
|
||||
@ -83,9 +83,9 @@ TEST_CASE("Dump_cpt", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::TAN();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto cpt = model.dump_cpt();
|
||||
REQUIRE(cpt.size() == 1713);
|
||||
REQUIRE(cpt.size() == 1718);
|
||||
}
|
||||
TEST_CASE("Not fitted model", "[Classifier]")
|
||||
{
|
||||
@ -111,7 +111,7 @@ TEST_CASE("KDB Graph", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::KDB(2);
|
||||
auto raw = RawDatasets("iris", true);
|
||||
model.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
model.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = model.graph();
|
||||
REQUIRE(graph.size() == 15);
|
||||
}
|
||||
@ -119,7 +119,7 @@ TEST_CASE("KDBLd Graph", "[Classifier]")
|
||||
{
|
||||
auto model = bayesnet::KDBLd(2);
|
||||
auto raw = RawDatasets("iris", false);
|
||||
model.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
model.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = model.graph();
|
||||
REQUIRE(graph.size() == 15);
|
||||
}
|
@ -18,7 +18,7 @@ TEST_CASE("Topological Order", "[Ensemble]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto order = clf.topological_order();
|
||||
REQUIRE(order.size() == 0);
|
||||
}
|
||||
@ -26,7 +26,7 @@ TEST_CASE("Dump CPT", "[Ensemble]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto dump = clf.dump_cpt();
|
||||
REQUIRE(dump == "");
|
||||
}
|
||||
@ -34,7 +34,7 @@ TEST_CASE("Number of States", "[Ensemble]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfStates() == 76);
|
||||
}
|
||||
TEST_CASE("Show", "[Ensemble]")
|
||||
@ -46,7 +46,7 @@ TEST_CASE("Show", "[Ensemble]")
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
std::vector<std::string> expected = {
|
||||
"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||
"petallength -> sepallength, sepalwidth, petalwidth, ",
|
||||
@ -78,16 +78,16 @@ TEST_CASE("Graph", "[Ensemble]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = clf.graph();
|
||||
REQUIRE(graph.size() == 56);
|
||||
auto clf2 = bayesnet::AODE();
|
||||
clf2.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf2.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
graph = clf2.graph();
|
||||
REQUIRE(graph.size() == 56);
|
||||
raw = RawDatasets("glass", false);
|
||||
auto clf3 = bayesnet::AODELd();
|
||||
clf3.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
clf3.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
graph = clf3.graph();
|
||||
REQUIRE(graph.size() == 261);
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include "bayesnet/utils/BayesMetrics.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
#include "Timer.h"
|
||||
|
||||
TEST_CASE("Metrics Test", "[Metrics]")
|
||||
{
|
||||
@ -27,8 +27,8 @@ TEST_CASE("Metrics Test", "[Metrics]")
|
||||
{"diabetes", 0.0345470614}
|
||||
};
|
||||
map<pair<std::string, int>, std::vector<pair<int, int>>> resultsMST = {
|
||||
{ {"glass", 0}, { {0, 6}, {0, 5}, {0, 3}, {5, 1}, {5, 8}, {5, 4}, {6, 2}, {6, 7} } },
|
||||
{ {"glass", 1}, { {1, 5}, {5, 0}, {5, 8}, {5, 4}, {0, 6}, {0, 3}, {6, 2}, {6, 7} } },
|
||||
{ {"glass", 0}, { {0, 6}, {0, 5}, {0, 3}, {3, 4}, {5, 1}, {5, 8}, {6, 2}, {6, 7} } },
|
||||
{ {"glass", 1}, { {1, 5}, {5, 0}, {5, 8}, {0, 6}, {0, 3}, {3, 4}, {6, 2}, {6, 7} } },
|
||||
{ {"iris", 0}, { {0, 1}, {0, 2}, {1, 3} } },
|
||||
{ {"iris", 1}, { {1, 0}, {1, 3}, {0, 2} } },
|
||||
{ {"ecoli", 0}, { {0, 1}, {0, 2}, {1, 5}, {1, 3}, {5, 6}, {5, 4} } },
|
||||
@ -37,8 +37,8 @@ TEST_CASE("Metrics Test", "[Metrics]")
|
||||
{ {"diabetes", 1}, { {1, 4}, {4, 3}, {3, 2}, {3, 5}, {2, 0}, {0, 7}, {0, 6} } }
|
||||
};
|
||||
auto raw = RawDatasets(file_name, true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.featurest, raw.classNamet, raw.classNumStates);
|
||||
bayesnet::Metrics metricsv(raw.Xv, raw.yv, raw.featurest, raw.classNamet, raw.classNumStates);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
bayesnet::Metrics metricsv(raw.Xv, raw.yv, raw.features, raw.className, raw.classNumStates);
|
||||
|
||||
SECTION("Test Constructor")
|
||||
{
|
||||
@ -69,10 +69,199 @@ TEST_CASE("Metrics Test", "[Metrics]")
|
||||
auto weights_matrix = metrics.conditionalEdge(raw.weights);
|
||||
auto weights_matrixv = metricsv.conditionalEdge(raw.weights);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
auto result = metrics.maximumSpanningTree(raw.featurest, weights_matrix, i);
|
||||
auto resultv = metricsv.maximumSpanningTree(raw.featurest, weights_matrixv, i);
|
||||
auto result = metrics.maximumSpanningTree(raw.features, weights_matrix, i);
|
||||
auto resultv = metricsv.maximumSpanningTree(raw.features, weights_matrixv, i);
|
||||
REQUIRE(result == resultsMST.at({ file_name, i }));
|
||||
REQUIRE(resultv == resultsMST.at({ file_name, i }));
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_CASE("Select all features ordered by Mutual Information", "[Metrics]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
auto kBest = metrics.SelectKBestWeighted(raw.weights, true, 0);
|
||||
REQUIRE(kBest.size() == raw.features.size());
|
||||
REQUIRE(kBest == std::vector<int>({ 1, 0, 3, 2 }));
|
||||
}
|
||||
TEST_CASE("Entropy Test", "[Metrics]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
auto result = metrics.entropy(raw.dataset.index({ 0, "..." }), raw.weights);
|
||||
REQUIRE(result == Catch::Approx(0.9848175048828125).epsilon(raw.epsilon));
|
||||
auto data = torch::tensor({ 0, 0, 0, 0, 0, 0, 0, 1, 1, 1 }, torch::kInt32);
|
||||
auto weights = torch::tensor({ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, torch::kFloat32);
|
||||
result = metrics.entropy(data, weights);
|
||||
REQUIRE(result == Catch::Approx(0.61086434125900269).epsilon(raw.epsilon));
|
||||
data = torch::tensor({ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 }, torch::kInt32);
|
||||
result = metrics.entropy(data, weights);
|
||||
REQUIRE(result == Catch::Approx(0.693147180559945).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Conditional Entropy", "[Metrics]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
auto expected = std::map<std::pair<int, int>, double>{
|
||||
{ { 0, 1 }, 1.32674 },
|
||||
{ { 0, 2 }, 0.236253 },
|
||||
{ { 0, 3 }, 0.1202 },
|
||||
{ { 1, 2 }, 0.252551 },
|
||||
{ { 1, 3 }, 0.10515 },
|
||||
{ { 2, 3 }, 0.108323 },
|
||||
};
|
||||
for (int i = 0; i < raw.features.size() - 1; ++i) {
|
||||
for (int j = i + 1; j < raw.features.size(); ++j) {
|
||||
double result = metrics.conditionalEntropy(raw.dataset.index({ i, "..." }), raw.dataset.index({ j, "..." }), raw.yt, raw.weights);
|
||||
REQUIRE(result == Catch::Approx(expected.at({ i, j })).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_CASE("Conditional Mutual Information", "[Metrics]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
auto expected = std::map<std::pair<int, int>, double>{
|
||||
{ { 0, 1 }, 0.0 },
|
||||
{ { 0, 2 }, 0.287696 },
|
||||
{ { 0, 3 }, 0.403749 },
|
||||
{ { 1, 2 }, 1.17112 },
|
||||
{ { 1, 3 }, 1.31852 },
|
||||
{ { 2, 3 }, 0.210068 },
|
||||
};
|
||||
for (int i = 0; i < raw.features.size() - 1; ++i) {
|
||||
for (int j = i + 1; j < raw.features.size(); ++j) {
|
||||
double result = metrics.conditionalMutualInformation(raw.dataset.index({ i, "..." }), raw.dataset.index({ j, "..." }), raw.yt, raw.weights);
|
||||
REQUIRE(result == Catch::Approx(expected.at({ i, j })).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_CASE("Select K Pairs descending", "[Metrics]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
std::vector<int> empty;
|
||||
auto results = metrics.SelectKPairs(raw.weights, empty, false);
|
||||
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
|
||||
{ { 1, 3 }, 1.31852 },
|
||||
{ { 1, 2 }, 1.17112 },
|
||||
{ { 0, 3 }, 0.403749 },
|
||||
{ { 0, 2 }, 0.287696 },
|
||||
{ { 2, 3 }, 0.210068 },
|
||||
{ { 0, 1 }, 0.0 },
|
||||
};
|
||||
auto scores = metrics.getScoresKPairs();
|
||||
for (int i = 0; i < results.size(); ++i) {
|
||||
auto result = results[i];
|
||||
auto expect = expected[i];
|
||||
auto score = scores[i];
|
||||
REQUIRE(result.first == expect.first.first);
|
||||
REQUIRE(result.second == expect.first.second);
|
||||
REQUIRE(score.first.first == expect.first.first);
|
||||
REQUIRE(score.first.second == expect.first.second);
|
||||
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
|
||||
}
|
||||
REQUIRE(results.size() == 6);
|
||||
REQUIRE(scores.size() == 6);
|
||||
}
|
||||
TEST_CASE("Select K Pairs ascending", "[Metrics]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
std::vector<int> empty;
|
||||
auto results = metrics.SelectKPairs(raw.weights, empty, true);
|
||||
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
|
||||
{ { 0, 1 }, 0.0 },
|
||||
{ { 2, 3 }, 0.210068 },
|
||||
{ { 0, 2 }, 0.287696 },
|
||||
{ { 0, 3 }, 0.403749 },
|
||||
{ { 1, 2 }, 1.17112 },
|
||||
{ { 1, 3 }, 1.31852 },
|
||||
};
|
||||
auto scores = metrics.getScoresKPairs();
|
||||
for (int i = 0; i < results.size(); ++i) {
|
||||
auto result = results[i];
|
||||
auto expect = expected[i];
|
||||
auto score = scores[i];
|
||||
REQUIRE(result.first == expect.first.first);
|
||||
REQUIRE(result.second == expect.first.second);
|
||||
REQUIRE(score.first.first == expect.first.first);
|
||||
REQUIRE(score.first.second == expect.first.second);
|
||||
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
|
||||
}
|
||||
REQUIRE(results.size() == 6);
|
||||
REQUIRE(scores.size() == 6);
|
||||
}
|
||||
TEST_CASE("Select K Pairs with features excluded", "[Metrics]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
std::vector<int> excluded = { 0, 3 };
|
||||
auto results = metrics.SelectKPairs(raw.weights, excluded, true);
|
||||
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
|
||||
{ { 1, 2 }, 1.17112 },
|
||||
};
|
||||
auto scores = metrics.getScoresKPairs();
|
||||
for (int i = 0; i < results.size(); ++i) {
|
||||
auto result = results[i];
|
||||
auto expect = expected[i];
|
||||
auto score = scores[i];
|
||||
REQUIRE(result.first == expect.first.first);
|
||||
REQUIRE(result.second == expect.first.second);
|
||||
REQUIRE(score.first.first == expect.first.first);
|
||||
REQUIRE(score.first.second == expect.first.second);
|
||||
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
|
||||
}
|
||||
REQUIRE(results.size() == 1);
|
||||
REQUIRE(scores.size() == 1);
|
||||
}
|
||||
TEST_CASE("Select K Pairs with number of pairs descending", "[Metrics]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
std::vector<int> empty;
|
||||
auto results = metrics.SelectKPairs(raw.weights, empty, false, 3);
|
||||
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
|
||||
{ { 1, 3 }, 1.31852 },
|
||||
{ { 1, 2 }, 1.17112 },
|
||||
{ { 0, 3 }, 0.403749 }
|
||||
};
|
||||
auto scores = metrics.getScoresKPairs();
|
||||
REQUIRE(results.size() == 3);
|
||||
REQUIRE(scores.size() == 3);
|
||||
for (int i = 0; i < results.size(); ++i) {
|
||||
auto result = results[i];
|
||||
auto expect = expected[i];
|
||||
auto score = scores[i];
|
||||
REQUIRE(result.first == expect.first.first);
|
||||
REQUIRE(result.second == expect.first.second);
|
||||
REQUIRE(score.first.first == expect.first.first);
|
||||
REQUIRE(score.first.second == expect.first.second);
|
||||
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
TEST_CASE("Select K Pairs with number of pairs ascending", "[Metrics]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
bayesnet::Metrics metrics(raw.dataset, raw.features, raw.className, raw.classNumStates);
|
||||
std::vector<int> empty;
|
||||
auto results = metrics.SelectKPairs(raw.weights, empty, true, 3);
|
||||
auto expected = std::vector<std::pair<std::pair<int, int>, double>>{
|
||||
{ { 0, 3 }, 0.403749 },
|
||||
{ { 1, 2 }, 1.17112 },
|
||||
{ { 1, 3 }, 1.31852 }
|
||||
};
|
||||
auto scores = metrics.getScoresKPairs();
|
||||
REQUIRE(results.size() == 3);
|
||||
REQUIRE(scores.size() == 3);
|
||||
for (int i = 0; i < results.size(); ++i) {
|
||||
auto result = results[i];
|
||||
auto expect = expected[i];
|
||||
auto score = scores[i];
|
||||
REQUIRE(result.first == expect.first.first);
|
||||
REQUIRE(result.second == expect.first.second);
|
||||
REQUIRE(score.first.first == expect.first.first);
|
||||
REQUIRE(score.first.second == expect.first.second);
|
||||
REQUIRE(score.second == Catch::Approx(expect.second).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
@ -20,20 +20,20 @@
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
const std::string ACTUAL_VERSION = "1.0.4.1";
|
||||
const std::string ACTUAL_VERSION = "1.0.6";
|
||||
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
{
|
||||
map <pair<std::string, std::string>, float> scores{
|
||||
// Diabetes
|
||||
{{"diabetes", "AODE"}, 0.82161}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
|
||||
{{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
|
||||
{{"diabetes", "AODELd"}, 0.8125f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.7890625f}, {{"diabetes", "TANLd"}, 0.803385437f}, {{"diabetes", "BoostAODE"}, 0.83984f},
|
||||
// Ecoli
|
||||
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
|
||||
{{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f},
|
||||
{{"ecoli", "AODELd"}, 0.875f}, {{"ecoli", "KDBLd"}, 0.880952358f}, {{"ecoli", "SPODELd"}, 0.839285731f}, {{"ecoli", "TANLd"}, 0.848214269f}, {{"ecoli", "BoostAODE"}, 0.89583f},
|
||||
// Glass
|
||||
{{"glass", "AODE"}, 0.79439}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
|
||||
{{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f},
|
||||
{{"glass", "AODELd"}, 0.799065411f}, {{"glass", "KDBLd"}, 0.82710278f}, {{"glass", "SPODELd"}, 0.780373812f}, {{"glass", "TANLd"}, 0.869158864f}, {{"glass", "BoostAODE"}, 0.84579f},
|
||||
// Iris
|
||||
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
|
||||
{{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f}
|
||||
@ -54,16 +54,16 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
auto clf = models[name];
|
||||
auto discretize = name.substr(name.length() - 2) != "Ld";
|
||||
auto raw = RawDatasets(file_name, discretize);
|
||||
clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf->score(raw.Xt, raw.yt);
|
||||
INFO("Classifier: " + name + " File: " + file_name);
|
||||
INFO("Classifier: " << name << " File: " << file_name);
|
||||
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
||||
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
|
||||
}
|
||||
}
|
||||
SECTION("Library check version")
|
||||
{
|
||||
INFO("Checking version of " + name + " classifier");
|
||||
INFO("Checking version of " << name << " classifier");
|
||||
REQUIRE(clf->getVersion() == ACTUAL_VERSION);
|
||||
}
|
||||
delete clf;
|
||||
@ -71,17 +71,17 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
TEST_CASE("Models features & Graph", "[Models]")
|
||||
{
|
||||
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
|
||||
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
||||
"class -> sepallength", "class -> sepalwidth", "class -> petallength", "class -> petalwidth", "petallength [shape=circle] \n",
|
||||
"petallength -> sepallength", "petalwidth [shape=circle] \n", "sepallength [shape=circle] \n",
|
||||
"sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n"
|
||||
"\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
|
||||
"\"class\" -> \"sepallength\"", "\"class\" -> \"sepalwidth\"", "\"class\" -> \"petallength\"", "\"class\" -> \"petalwidth\"", "\"petallength\" [shape=circle] \n",
|
||||
"\"petallength\" -> \"sepallength\"", "\"petalwidth\" [shape=circle] \n", "\"sepallength\" [shape=circle] \n",
|
||||
"\"sepallength\" -> \"sepalwidth\"", "\"sepalwidth\" [shape=circle] \n", "\"sepalwidth\" -> \"petalwidth\"", "}\n"
|
||||
}
|
||||
);
|
||||
SECTION("Test TAN")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::TAN();
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 19);
|
||||
@ -93,10 +93,10 @@ TEST_CASE("Models features & Graph", "[Models]")
|
||||
{
|
||||
auto clf = bayesnet::TANLd();
|
||||
auto raw = RawDatasets("iris", false);
|
||||
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||
REQUIRE(clf.getNumberOfStates() == 19);
|
||||
REQUIRE(clf.getNumberOfStates() == 27);
|
||||
REQUIRE(clf.getClassNumStates() == 3);
|
||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
|
||||
REQUIRE(clf.graph("Test") == graph);
|
||||
@ -106,7 +106,7 @@ TEST_CASE("Get num features & num edges", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::KDB(2);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||
REQUIRE(clf.getNumberOfEdges() == 8);
|
||||
}
|
||||
@ -166,7 +166,7 @@ TEST_CASE("Model predict_proba", "[Models]")
|
||||
SECTION("Test " + model + " predict_proba")
|
||||
{
|
||||
auto clf = models[model];
|
||||
clf->fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf->fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto y_pred_proba = clf->predict_proba(raw.Xv);
|
||||
auto yt_pred_proba = clf->predict_proba(raw.Xt);
|
||||
auto y_pred = clf->predict(raw.Xv);
|
||||
@ -203,7 +203,7 @@ TEST_CASE("AODE voting-proba", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::AODE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
clf.setHyperparameters({
|
||||
@ -222,9 +222,9 @@ TEST_CASE("SPODELd dataset", "[Models]")
|
||||
auto raw = RawDatasets("iris", false);
|
||||
auto clf = bayesnet::SPODELd(0);
|
||||
// raw.dataset.to(torch::kFloat32);
|
||||
clf.fit(raw.dataset, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xt, raw.yt);
|
||||
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.97333f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.97333f).epsilon(raw.epsilon));
|
||||
@ -233,13 +233,13 @@ TEST_CASE("KDB with hyperparameters", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::KDB(2);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
clf.setHyperparameters({
|
||||
{"k", 3},
|
||||
{"theta", 0.7},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto scoret = clf.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
|
||||
@ -248,7 +248,7 @@ TEST_CASE("Incorrect type of data for SPODELd", "[Models]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::SPODELd(0);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.featurest, raw.classNamet, raw.statest), std::runtime_error);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
||||
}
|
||||
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
|
||||
{
|
||||
|
@ -12,8 +12,10 @@
|
||||
#include <string>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
#include "bayesnet/network/Node.h"
|
||||
#include "bayesnet/utils/bayesnetUtils.h"
|
||||
|
||||
const double threshold = 1e-4;
|
||||
void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::string& className)
|
||||
{
|
||||
std::vector<pair<int, int>> network = { {0, 1}, {0, 2}, {1, 3} };
|
||||
@ -28,13 +30,11 @@ void buildModel(bayesnet::Network& net, const std::vector<std::string>& features
|
||||
net.addEdge(className, feature);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
{
|
||||
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto net = bayesnet::Network();
|
||||
double threshold = 1e-4;
|
||||
|
||||
SECTION("Test get features")
|
||||
{
|
||||
@ -73,9 +73,9 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
net3.initialize();
|
||||
net2.initialize();
|
||||
net.initialize();
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
buildModel(net2, raw.featurest, raw.classNamet);
|
||||
buildModel(net3, raw.featurest, raw.classNamet);
|
||||
buildModel(net, raw.features, raw.className);
|
||||
buildModel(net2, raw.features, raw.className);
|
||||
buildModel(net3, raw.features, raw.className);
|
||||
std::vector<pair<std::string, std::string>> edges = {
|
||||
{"class", "sepallength"}, {"class", "sepalwidth"}, {"class", "petallength"},
|
||||
{"class", "petalwidth" }, {"sepallength", "sepalwidth"}, {"sepallength", "petallength"},
|
||||
@ -114,9 +114,9 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
REQUIRE(children == children3);
|
||||
}
|
||||
// Fit networks
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
net2.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest);
|
||||
net3.fit(raw.Xt, raw.yt, raw.weights, raw.featurest, raw.classNamet, raw.statest);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
net2.fit(raw.dataset, raw.weights, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
net3.fit(raw.Xt, raw.yt, raw.weights, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(net.getStates() == net2.getStates());
|
||||
REQUIRE(net.getStates() == net3.getStates());
|
||||
REQUIRE(net.getFeatures() == net2.getFeatures());
|
||||
@ -149,6 +149,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test show")
|
||||
{
|
||||
INFO("Test show");
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@ -162,6 +163,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test topological_sort")
|
||||
{
|
||||
INFO("Test topological sort");
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@ -175,6 +177,7 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test graph")
|
||||
{
|
||||
INFO("Test graph");
|
||||
net.addNode("A");
|
||||
net.addNode("B");
|
||||
net.addNode("C");
|
||||
@ -183,17 +186,18 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
auto str = net.graph("Test Graph");
|
||||
REQUIRE(str.size() == 7);
|
||||
REQUIRE(str[0] == "digraph BayesNet {\nlabel=<BayesNet Test Graph>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
|
||||
REQUIRE(str[1] == "A [shape=circle] \n");
|
||||
REQUIRE(str[2] == "A -> B");
|
||||
REQUIRE(str[3] == "A -> C");
|
||||
REQUIRE(str[4] == "B [shape=circle] \n");
|
||||
REQUIRE(str[5] == "C [shape=circle] \n");
|
||||
REQUIRE(str[1] == "\"A\" [shape=circle] \n");
|
||||
REQUIRE(str[2] == "\"A\" -> \"B\"");
|
||||
REQUIRE(str[3] == "\"A\" -> \"C\"");
|
||||
REQUIRE(str[4] == "\"B\" [shape=circle] \n");
|
||||
REQUIRE(str[5] == "\"C\" [shape=circle] \n");
|
||||
REQUIRE(str[6] == "}\n");
|
||||
}
|
||||
SECTION("Test predict")
|
||||
{
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
INFO("Test predict");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
std::vector<int> y_test = { 2, 2, 0, 2, 1 };
|
||||
auto y_pred = net.predict(test);
|
||||
@ -201,8 +205,9 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test predict_proba")
|
||||
{
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
INFO("Test predict_proba");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
std::vector<std::vector<double>> y_test = {
|
||||
{0.450237, 0.0866621, 0.463101},
|
||||
@ -222,15 +227,17 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
}
|
||||
SECTION("Test score")
|
||||
{
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
INFO("Test score");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = net.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.97333333).margin(threshold));
|
||||
}
|
||||
SECTION("Copy constructor")
|
||||
{
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
INFO("Test copy constructor");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto net2 = bayesnet::Network(net);
|
||||
REQUIRE(net.getFeatures() == net2.getFeatures());
|
||||
REQUIRE(net.getEdges() == net2.getEdges());
|
||||
@ -250,9 +257,10 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
REQUIRE(node->getCPT().equal(node2->getCPT()));
|
||||
}
|
||||
}
|
||||
SECTION("Test oddities")
|
||||
SECTION("Network oddities")
|
||||
{
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
INFO("Network oddities");
|
||||
buildModel(net, raw.features, raw.className);
|
||||
// predict without fitting
|
||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
|
||||
auto test_tensor = bayesnet::vectorToTensor(test);
|
||||
@ -266,52 +274,69 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
||||
REQUIRE_THROWS_WITH(net.score(raw.Xv, raw.yv), "You must call fit() before calling predict()");
|
||||
// predict with wrong data
|
||||
auto netx = bayesnet::Network();
|
||||
buildModel(netx, raw.featuresv, raw.classNamev);
|
||||
netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
buildModel(netx, raw.features, raw.className);
|
||||
netx.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
std::vector<std::vector<int>> test2 = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1} };
|
||||
auto test_tensor2 = bayesnet::vectorToTensor(test2, false);
|
||||
REQUIRE_THROWS_AS(netx.predict(test2), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test2), "Sample size (3) does not match the number of features (4)");
|
||||
REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "Sample size (3) does not match the number of features (4)");
|
||||
REQUIRE_THROWS_AS(netx.predict(test2), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test2), "(V) Sample size (3) does not match the number of features (4)");
|
||||
REQUIRE_THROWS_AS(netx.predict(test_tensor2), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(netx.predict(test_tensor2), "(T) Sample size (3) does not match the number of features (4)");
|
||||
// fit with wrong data
|
||||
// Weights
|
||||
auto net2 = bayesnet::Network();
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.featuresv, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
std::string invalid_weights = "Weights (0) must have the same number of elements as samples (150) in Network::fit";
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.featuresv, raw.classNamev, raw.statesv), invalid_weights);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, std::vector<double>(), raw.features, raw.className, raw.states, raw.smoothing), invalid_weights);
|
||||
// X & y
|
||||
std::string invalid_labels = "X and y must have the same number of samples in Network::fit (150 != 0)";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv), invalid_labels);
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, std::vector<int>(), raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing), invalid_labels);
|
||||
// Features
|
||||
std::string invalid_features = "X and features must have the same number of features in Network::fit (4 != 0)";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.classNamev, raw.statesv), invalid_features);
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, std::vector<std::string>(), raw.className, raw.states, raw.smoothing), invalid_features);
|
||||
// Different number of features
|
||||
auto net3 = bayesnet::Network();
|
||||
auto test2y = { 1, 2, 3, 4, 5 };
|
||||
buildModel(net3, raw.featuresv, raw.classNamev);
|
||||
auto features3 = raw.featuresv;
|
||||
buildModel(net3, raw.features, raw.className);
|
||||
auto features3 = raw.features;
|
||||
features3.pop_back();
|
||||
std::string invalid_features2 = "X and local features must have the same number of features in Network::fit (3 != 4)";
|
||||
REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.classNamev, raw.statesv), invalid_features2);
|
||||
REQUIRE_THROWS_AS(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net3.fit(test2, test2y, std::vector<double>(5, 0), features3, raw.className, raw.states, raw.smoothing), invalid_features2);
|
||||
// Uninitialized network
|
||||
std::string network_invalid = "The network has not been initialized. You must call addNode() before calling fit()";
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), network_invalid);
|
||||
REQUIRE_THROWS_AS(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net2.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), network_invalid);
|
||||
// Classname
|
||||
std::string invalid_classname = "Class Name not found in Network::features";
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, "duck", raw.statesv), invalid_classname);
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, "duck", raw.states, raw.smoothing), invalid_classname);
|
||||
// Invalid feature
|
||||
auto features2 = raw.featuresv;
|
||||
auto features2 = raw.features;
|
||||
features2.pop_back();
|
||||
features2.push_back("duck");
|
||||
std::string invalid_feature = "Feature duck not found in Network::features";
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.classNamev, raw.statesv), invalid_feature);
|
||||
REQUIRE_THROWS_AS(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.fit(raw.Xv, raw.yv, raw.weightsv, features2, raw.className, raw.states, raw.smoothing), invalid_feature);
|
||||
// Add twice the same node name to the network => Nothing should happen
|
||||
net.addNode("A");
|
||||
net.addNode("A");
|
||||
// invalid state in checkfit
|
||||
auto net4 = bayesnet::Network();
|
||||
buildModel(net4, raw.features, raw.className);
|
||||
std::string invalid_state = "Feature sepallength not found in states";
|
||||
REQUIRE_THROWS_AS(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>(), raw.smoothing), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net4.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, std::map<std::string, std::vector<int>>(), raw.smoothing), invalid_state);
|
||||
// Try to add node or edge to a fitted network
|
||||
auto net5 = bayesnet::Network();
|
||||
buildModel(net5, raw.features, raw.className);
|
||||
net5.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE_THROWS_AS(net5.addNode("A"), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net5.addNode("A"), "Cannot add node to a fitted network. Initialize first.");
|
||||
REQUIRE_THROWS_AS(net5.addEdge("A", "B"), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(net5.addEdge("A", "B"), "Cannot add edge to a fitted network. Initialize first.");
|
||||
}
|
||||
|
||||
}
|
||||
@ -332,15 +357,6 @@ TEST_CASE("Cicle in Network", "[Network]")
|
||||
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Adding this edge forms a cycle in the graph.");
|
||||
}
|
||||
TEST_CASE("Test max threads constructor", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
REQUIRE(net.getMaxThreads() == 0.95f);
|
||||
auto net2 = bayesnet::Network(4);
|
||||
REQUIRE(net2.getMaxThreads() == 4);
|
||||
auto net3 = bayesnet::Network(1.75);
|
||||
REQUIRE(net3.getMaxThreads() == 1.75);
|
||||
}
|
||||
TEST_CASE("Edges troubles", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
@ -350,19 +366,22 @@ TEST_CASE("Edges troubles", "[Network]")
|
||||
REQUIRE_THROWS_WITH(net.addEdge("A", "C"), "Child node C does not exist");
|
||||
REQUIRE_THROWS_AS(net.addEdge("C", "A"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("C", "A"), "Parent node C does not exist");
|
||||
net.addEdge("A", "B");
|
||||
REQUIRE_THROWS_AS(net.addEdge("A", "B"), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(net.addEdge("A", "B"), "Edge A -> B already exists");
|
||||
}
|
||||
TEST_CASE("Dump CPT", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
buildModel(net, raw.featuresv, raw.classNamev);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
buildModel(net, raw.features, raw.className);
|
||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto res = net.dump_cpt();
|
||||
std::string expected = R"(* class: (3) : [3]
|
||||
0.3333
|
||||
0.3333
|
||||
0.3333
|
||||
[ CPUFloatType{3} ]
|
||||
[ CPUDoubleType{3} ]
|
||||
* petallength: (4) : [4, 3, 3]
|
||||
(1,.,.) =
|
||||
0.9388 0.1000 0.2000
|
||||
@ -383,7 +402,7 @@ TEST_CASE("Dump CPT", "[Network]")
|
||||
0.0204 0.1000 0.2000
|
||||
0.1250 0.0526 0.1667
|
||||
0.2000 0.0606 0.8235
|
||||
[ CPUFloatType{4,3,3} ]
|
||||
[ CPUDoubleType{4,3,3} ]
|
||||
* petalwidth: (3) : [3, 6, 3]
|
||||
(1,.,.) =
|
||||
0.5000 0.0417 0.0714
|
||||
@ -408,12 +427,12 @@ TEST_CASE("Dump CPT", "[Network]")
|
||||
0.1111 0.0909 0.8000
|
||||
0.0667 0.2000 0.8667
|
||||
0.0303 0.2500 0.7500
|
||||
[ CPUFloatType{3,6,3} ]
|
||||
[ CPUDoubleType{3,6,3} ]
|
||||
* sepallength: (3) : [3, 3]
|
||||
0.8679 0.1321 0.0377
|
||||
0.0943 0.3019 0.0566
|
||||
0.0377 0.5660 0.9057
|
||||
[ CPUFloatType{3,3} ]
|
||||
[ CPUDoubleType{3,3} ]
|
||||
* sepalwidth: (6) : [6, 3, 3]
|
||||
(1,.,.) =
|
||||
0.0392 0.5000 0.2857
|
||||
@ -444,8 +463,136 @@ TEST_CASE("Dump CPT", "[Network]")
|
||||
0.5098 0.0833 0.1429
|
||||
0.5000 0.0476 0.1250
|
||||
0.2857 0.0571 0.1132
|
||||
[ CPUFloatType{6,3,3} ]
|
||||
[ CPUDoubleType{6,3,3} ]
|
||||
)";
|
||||
REQUIRE(res == expected);
|
||||
}
|
||||
|
||||
TEST_CASE("Test Smoothing A", "[Network]")
|
||||
{
|
||||
/*
|
||||
Tomando m = 1 Pa = 0.5
|
||||
Si estoy calculando P(A | C), con C en{ 0,1,2 } y tengo :
|
||||
AC = { 11, 12, 11, 10, 10, 12, 10, 01, 00, 02 }
|
||||
Entonces:
|
||||
P(A = 1 | C = 0) = (3 + 1 / 2 * 1) / (4 + 1) = 3.5 / 5
|
||||
P(A = 0 | C = 0) = (1 + 1 / 2 * 1) / (4 + 1) = 1.5 / 5
|
||||
Donde m aquí es el número de veces de C = 0 que es la que condiciona y la a priori vuelve a ser sobre A que es sobre las que estaríamos calculando esas marginales.
|
||||
P(A = 1 | C = 1) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 4
|
||||
P(A = 0 | C = 1) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 4
|
||||
P(A = 1 | C = 2) = (2 + 1 / 2 * 1) / (3 + 1) = 2.5 / 5
|
||||
P(A = 0 | C = 2) = (1 + 1 / 2 * 1) / (3 + 1) = 1.5 / 5
|
||||
En realidad es parecido a Laplace, que en este caso p.e.con C = 0 sería
|
||||
P(A = 1 | C = 0) = (3 + 1) / (4 + 2) = 4 / 6
|
||||
P(A = 0 | C = 0) = (1 + 1) / (4 + 2) = 2 / 6
|
||||
*/
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("A");
|
||||
net.addNode("C");
|
||||
net.addEdge("C", "A");
|
||||
std::vector<int> C = { 1, 2, 1, 0, 0, 2, 0, 1, 0, 2 };
|
||||
std::vector<std::vector<int>> A = { { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } };
|
||||
std::map<std::string, std::vector<int>> states = { { "A", {0, 1} }, { "C", {0, 1, 2} } };
|
||||
auto weights = std::vector<double>(C.size(), 1);
|
||||
//
|
||||
// Laplace
|
||||
//
|
||||
net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto cpt_c_laplace = net.getNodes().at("C")->getCPT();
|
||||
REQUIRE(cpt_c_laplace.size(0) == 3);
|
||||
auto laplace_c = std::vector<float>({ 0.3846, 0.3077, 0.3077 });
|
||||
for (int i = 0; i < laplace_c.size(); ++i) {
|
||||
REQUIRE(cpt_c_laplace.index({ i }).item<float>() == Catch::Approx(laplace_c[i]).margin(threshold));
|
||||
}
|
||||
auto cpt_a_laplace = net.getNodes().at("A")->getCPT();
|
||||
REQUIRE(cpt_a_laplace.size(0) == 2);
|
||||
REQUIRE(cpt_a_laplace.size(1) == 3);
|
||||
auto laplace_a = std::vector<std::vector<float>>({ {0.3333, 0.4000,0.4000}, {0.6667, 0.6000, 0.6000} });
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
REQUIRE(cpt_a_laplace.index({ i, j }).item<float>() == Catch::Approx(laplace_a[i][j]).margin(threshold));
|
||||
}
|
||||
}
|
||||
//
|
||||
// Cestnik
|
||||
//
|
||||
net.fit(A, C, weights, { "A" }, "C", states, bayesnet::Smoothing_t::CESTNIK);
|
||||
auto cpt_c_cestnik = net.getNodes().at("C")->getCPT();
|
||||
REQUIRE(cpt_c_cestnik.size(0) == 3);
|
||||
auto cestnik_c = std::vector<float>({ 0.3939, 0.3030, 0.3030 });
|
||||
for (int i = 0; i < laplace_c.size(); ++i) {
|
||||
REQUIRE(cpt_c_cestnik.index({ i }).item<float>() == Catch::Approx(cestnik_c[i]).margin(threshold));
|
||||
}
|
||||
auto cpt_a_cestnik = net.getNodes().at("A")->getCPT();
|
||||
REQUIRE(cpt_a_cestnik.size(0) == 2);
|
||||
REQUIRE(cpt_a_cestnik.size(1) == 3);
|
||||
auto cestnik_a = std::vector<std::vector<float>>({ {0.3000, 0.3750, 0.3750}, {0.7000, 0.6250, 0.6250} });
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
REQUIRE(cpt_a_cestnik.index({ i, j }).item<float>() == Catch::Approx(cestnik_a[i][j]).margin(threshold));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Test Smoothing B", "[Network]")
|
||||
{
|
||||
auto net = bayesnet::Network();
|
||||
net.addNode("X");
|
||||
net.addNode("Y");
|
||||
net.addNode("Z");
|
||||
net.addNode("C");
|
||||
net.addEdge("C", "X");
|
||||
net.addEdge("C", "Y");
|
||||
net.addEdge("C", "Z");
|
||||
net.addEdge("Y", "Z");
|
||||
std::vector<int> C = { 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1 };
|
||||
std::vector<std::vector<int>> Data = {
|
||||
{ 0,0,1,1,0,1,0,1,0,1,0,0,0,1,0,1,0,0},
|
||||
{ 1,2,0,2,2,2,1,0,0,1,1,1,0,1,2,1,0,2},
|
||||
{ 2,1,3,3,2,0,0,1,3,2,1,2,2,3,0,0,1,2}
|
||||
};
|
||||
std::map<std::string, std::vector<int>> states = {
|
||||
{ "X", {0, 1} },
|
||||
{ "Y", {0, 1, 2} },
|
||||
{ "Z", {0, 1, 2, 3} },
|
||||
{ "C", {0, 1} }
|
||||
};
|
||||
auto weights = std::vector<double>(C.size(), 1);
|
||||
// See https://www.overleaf.com/read/tfnhpfysfkfx#2d576c example for calculations
|
||||
INFO("Test Smoothing B - Laplace");
|
||||
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::LAPLACE);
|
||||
auto laplace_values = std::vector<std::vector<float>>({ {0.377418, 0.622582}, {0.217821, 0.782179} });
|
||||
auto laplace_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
|
||||
for (auto i = 0; i < 2; ++i) {
|
||||
for (auto j = 0; j < 2; ++j) {
|
||||
REQUIRE(laplace_score.at(i).at(j) == Catch::Approx(laplace_values.at(i).at(j)).margin(threshold));
|
||||
}
|
||||
}
|
||||
INFO("Test Smoothing B - Original");
|
||||
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::ORIGINAL);
|
||||
auto original_values = std::vector<std::vector<float>>({ {0.344769, 0.655231}, {0.0421263, 0.957874} });
|
||||
auto original_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
|
||||
for (auto i = 0; i < 2; ++i) {
|
||||
for (auto j = 0; j < 2; ++j) {
|
||||
REQUIRE(original_score.at(i).at(j) == Catch::Approx(original_values.at(i).at(j)).margin(threshold));
|
||||
}
|
||||
}
|
||||
INFO("Test Smoothing B - Cestnik");
|
||||
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::CESTNIK);
|
||||
auto cestnik_values = std::vector<std::vector<float>>({ {0.353422, 0.646578}, {0.12364, 0.87636} });
|
||||
auto cestnik_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
|
||||
for (auto i = 0; i < 2; ++i) {
|
||||
for (auto j = 0; j < 2; ++j) {
|
||||
REQUIRE(cestnik_score.at(i).at(j) == Catch::Approx(cestnik_values.at(i).at(j)).margin(threshold));
|
||||
}
|
||||
}
|
||||
INFO("Test Smoothing B - No smoothing");
|
||||
net.fit(Data, C, weights, { "X", "Y", "Z" }, "C", states, bayesnet::Smoothing_t::NONE);
|
||||
auto nosmooth_values = std::vector<std::vector<float>>({ {0.342465753, 0.65753424}, {0.0, 1.0} });
|
||||
auto nosmooth_score = net.predict_proba({ {0, 1}, {1, 2}, {2, 3} });
|
||||
for (auto i = 0; i < 2; ++i) {
|
||||
for (auto j = 0; j < 2; ++j) {
|
||||
REQUIRE(nosmooth_score.at(i).at(j) == Catch::Approx(nosmooth_values.at(i).at(j)).margin(threshold));
|
||||
}
|
||||
}
|
||||
}
|
@ -7,7 +7,9 @@
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
|
||||
@ -48,6 +50,75 @@ TEST_CASE("Test Node children and parents", "[Node]")
|
||||
REQUIRE(parents.size() == 0);
|
||||
REQUIRE(children.size() == 0);
|
||||
}
|
||||
TEST_CASE("Test Node computeCPT", "[Node]")
|
||||
{
|
||||
// Generate a test to test the computeCPT method of the Node class
|
||||
// Create a dataset with 3 features and 4 samples
|
||||
// The dataset is a 2D tensor with 4 rows and 4 columns
|
||||
auto dataset = torch::tensor({ {1, 0, 0, 1}, {1, 1, 2, 0}, {0, 1, 2, 1}, {0, 1, 0, 1} });
|
||||
auto states = std::vector<int>({ 2, 3, 3 });
|
||||
// Create a vector with the names of the features
|
||||
auto features = std::vector<std::string>{ "F1", "F2", "F3" };
|
||||
// Create a vector with the names of the classes
|
||||
auto className = std::string("Class");
|
||||
// weights
|
||||
auto weights = torch::tensor({ 1.0, 1.0, 1.0, 1.0 }, torch::kDouble);
|
||||
std::vector<bayesnet::Node> nodes;
|
||||
for (int i = 0; i < features.size(); i++) {
|
||||
auto node = bayesnet::Node(features[i]);
|
||||
node.setNumStates(states[i]);
|
||||
nodes.push_back(node);
|
||||
}
|
||||
// Create node class with 2 states
|
||||
nodes.push_back(bayesnet::Node(className));
|
||||
nodes[features.size()].setNumStates(2);
|
||||
// The network is c->f1, f2, f3 y f1->f2, f3
|
||||
for (int i = 0; i < features.size(); i++) {
|
||||
// Add class node as parent of all feature nodes
|
||||
nodes[i].addParent(&nodes[features.size()]);
|
||||
// Node[0] -> Node[1], Node[2]
|
||||
if (i > 0)
|
||||
nodes[i].addParent(&nodes[0]);
|
||||
}
|
||||
features.push_back(className);
|
||||
// Compute the conditional probability table
|
||||
nodes[1].computeCPT(dataset, features, 0.0, weights);
|
||||
// Get the conditional probability table
|
||||
auto cpTable = nodes[1].getCPT();
|
||||
// Get the dimensions of the conditional probability table
|
||||
auto dimensions = cpTable.sizes();
|
||||
// Check the dimensions of the conditional probability table
|
||||
REQUIRE(dimensions.size() == 3);
|
||||
REQUIRE(dimensions[0] == 3);
|
||||
REQUIRE(dimensions[1] == 2);
|
||||
REQUIRE(dimensions[2] == 2);
|
||||
// Check the values of the conditional probability table
|
||||
REQUIRE(cpTable[0][0][0].item<float>() == Catch::Approx(0));
|
||||
REQUIRE(cpTable[0][0][1].item<float>() == Catch::Approx(0));
|
||||
REQUIRE(cpTable[0][1][0].item<float>() == Catch::Approx(0));
|
||||
REQUIRE(cpTable[0][1][1].item<float>() == Catch::Approx(1));
|
||||
REQUIRE(cpTable[1][0][0].item<float>() == Catch::Approx(0));
|
||||
REQUIRE(cpTable[1][0][1].item<float>() == Catch::Approx(1));
|
||||
REQUIRE(cpTable[1][1][0].item<float>() == Catch::Approx(1));
|
||||
REQUIRE(cpTable[1][1][1].item<float>() == Catch::Approx(0));
|
||||
// Compute evidence
|
||||
for (auto& node : nodes) {
|
||||
node.computeCPT(dataset, features, 0.0, weights);
|
||||
}
|
||||
auto evidence = std::map<std::string, int>{ { "F1", 0 }, { "F2", 1 }, { "F3", 1 } };
|
||||
REQUIRE(nodes[3].getFactorValue(evidence) == 0.5);
|
||||
// Oddities
|
||||
auto features_back = features;
|
||||
// Remove a parent from features
|
||||
features.pop_back();
|
||||
REQUIRE_THROWS_AS(nodes[0].computeCPT(dataset, features, 0.0, weights), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(nodes[0].computeCPT(dataset, features, 0.0, weights), "Feature parent Class not found in dataset");
|
||||
// Remove a feature from features
|
||||
features = features_back;
|
||||
features.erase(features.begin());
|
||||
REQUIRE_THROWS_AS(nodes[0].computeCPT(dataset, features, 0.0, weights), std::logic_error);
|
||||
REQUIRE_THROWS_WITH(nodes[0].computeCPT(dataset, features, 0.0, weights), "Feature F1 not found in dataset");
|
||||
}
|
||||
TEST_CASE("TEST MinFill method", "[Node]")
|
||||
{
|
||||
// Generate a test to test the minFill method of the Node class
|
||||
|
218
tests/TestBoostA2DE.cc
Normal file
218
tests/TestBoostA2DE.cc
Normal file
@ -0,0 +1,218 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <type_traits>
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include "bayesnet/utils/BayesMetrics.h"
|
||||
#include "bayesnet/ensembles/BoostA2DE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
|
||||
TEST_CASE("Build basic model", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 342);
|
||||
REQUIRE(clf.getNumberOfEdges() == 684);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Pairs not used in train: 20");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 38");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
REQUIRE(score == Catch::Approx(0.919271).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Feature_select IWSS", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 140);
|
||||
REQUIRE(clf.getNumberOfEdges() == 294);
|
||||
REQUIRE(clf.getNotes().size() == 4);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
||||
REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2");
|
||||
REQUIRE(clf.getNotes()[3] == "Number of models: 14");
|
||||
}
|
||||
TEST_CASE("Feature_select FCBF", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 110);
|
||||
REQUIRE(clf.getNumberOfEdges() == 231);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
|
||||
REQUIRE(clf.getNotes()[1] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[2] == "Pairs not used in train: 2");
|
||||
REQUIRE(clf.getNotes()[3] == "Number of models: 11");
|
||||
}
|
||||
TEST_CASE("Test used features in train note and score", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
auto clf = bayesnet::BoostA2DE(true);
|
||||
clf.setHyperparameters({
|
||||
{"order", "asc"},
|
||||
{"convergence", true},
|
||||
{"select_features","CFS"},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 144);
|
||||
REQUIRE(clf.getNumberOfEdges() == 288);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 16");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.856771).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.856771).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Voting vs proba", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostA2DE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
clf.setHyperparameters({
|
||||
{"predict_voting",true},
|
||||
});
|
||||
auto score_voting = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_voting = clf.predict_proba(raw.Xv);
|
||||
REQUIRE(score_proba == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||
REQUIRE(score_voting == Catch::Approx(0.946667).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_voting[83][2] == Catch::Approx(0.53508).epsilon(raw.epsilon));
|
||||
REQUIRE(pred_proba[83][2] == Catch::Approx(0.48394).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.dump_cpt() == "");
|
||||
REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||
}
|
||||
TEST_CASE("Order asc, desc & random", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("glass", true);
|
||||
std::map<std::string, double> scores{
|
||||
{"asc", 0.752336f }, { "desc", 0.813084f }, { "rand", 0.850467 }
|
||||
};
|
||||
for (const std::string& order : { "asc", "desc", "rand" }) {
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.setHyperparameters({
|
||||
{"order", order},
|
||||
{"bisection", false},
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("BoostA2DE order: " + order);
|
||||
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
}
|
||||
}
|
||||
TEST_CASE("Oddities2", "[BoostA2DE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto bad_hyper = nlohmann::json{
|
||||
{ { "order", "duck" } },
|
||||
{ { "select_features", "duck" } },
|
||||
{ { "maxTolerance", 0 } },
|
||||
{ { "maxTolerance", 5 } },
|
||||
};
|
||||
for (const auto& hyper : bad_hyper.items()) {
|
||||
INFO("BoostA2DE hyper: " + hyper.value().dump());
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
}
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0 } }), std::invalid_argument);
|
||||
auto bad_hyper_fit = nlohmann::json{
|
||||
{ { "select_features","IWSS" }, { "threshold", -0.01 } },
|
||||
{ { "select_features","IWSS" }, { "threshold", 0.51 } },
|
||||
{ { "select_features","FCBF" }, { "threshold", 1e-8 } },
|
||||
{ { "select_features","FCBF" }, { "threshold", 1.01 } },
|
||||
};
|
||||
for (const auto& hyper : bad_hyper_fit.items()) {
|
||||
INFO("BoostA2DE hyper: " + hyper.value().dump());
|
||||
clf.setHyperparameters(hyper.value());
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
}
|
||||
}
|
||||
TEST_CASE("No features selected", "[BoostA2DE]")
|
||||
{
|
||||
// Check that the note "No features selected in initialization" is added
|
||||
//
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.setHyperparameters({ {"select_features","FCBF"}, {"threshold", 1 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNotes().size() == 1);
|
||||
REQUIRE(clf.getNotes()[0] == "No features selected in initialization");
|
||||
}
|
||||
TEST_CASE("Bisection Best", "[BoostA2DE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"block_update", false},
|
||||
{"convergence_best", false},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 480);
|
||||
REQUIRE(clf.getNumberOfEdges() == 1152);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes().at(0) == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes().at(1) == "Pairs not used in train: 83");
|
||||
REQUIRE(clf.getNotes().at(2) == "Number of models: 32");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(0.966667f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.966667f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Block Update", "[BoostA2DE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
auto raw = RawDatasets("spambase", true, 500);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"block_update", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
});
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 58);
|
||||
REQUIRE(clf.getNumberOfEdges() == 165);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Pairs not used in train: 1588");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
//
|
||||
// std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;
|
||||
// std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;
|
||||
// std::cout << "Notes size " << clf.getNotes().size() << std::endl;
|
||||
// for (auto note : clf.getNotes()) {
|
||||
// std::cout << note << std::endl;
|
||||
// }
|
||||
// std::cout << "Score " << score << std::endl;
|
||||
}
|
||||
TEST_CASE("Test graph b2a2de", "[BoostA2DE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostA2DE();
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto graph = clf.graph();
|
||||
REQUIRE(graph.size() == 26);
|
||||
REQUIRE(graph[0] == "digraph BayesNet {\nlabel=<BayesNet BoostA2DE_0>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
|
||||
REQUIRE(graph[1] == "\"class\" [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n");
|
||||
}
|
@ -8,6 +8,7 @@
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
@ -17,7 +18,7 @@ TEST_CASE("Feature_select CFS", "[BoostAODE]")
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "CFS"} });
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
@ -29,7 +30,7 @@ TEST_CASE("Feature_select IWSS", "[BoostAODE]")
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
@ -41,11 +42,11 @@ TEST_CASE("Feature_select FCBF", "[BoostAODE]")
|
||||
auto raw = RawDatasets("glass", true);
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||
REQUIRE(clf.getNumberOfEdges() == 153);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 5 of 9 with FCBF");
|
||||
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||
}
|
||||
TEST_CASE("Test used features in train note and score", "[BoostAODE]")
|
||||
@ -57,7 +58,7 @@ TEST_CASE("Test used features in train note and score", "[BoostAODE]")
|
||||
{"convergence", true},
|
||||
{"select_features","CFS"},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 72);
|
||||
REQUIRE(clf.getNumberOfEdges() == 120);
|
||||
REQUIRE(clf.getNotes().size() == 2);
|
||||
@ -65,14 +66,14 @@ TEST_CASE("Test used features in train note and score", "[BoostAODE]")
|
||||
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.80078).epsilon(raw.epsilon));
|
||||
REQUIRE(score == Catch::Approx(0.809895813).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.809895813).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Voting vs proba", "[BoostAODE]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
auto clf = bayesnet::BoostAODE(false);
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_proba = clf.score(raw.Xv, raw.yv);
|
||||
auto pred_proba = clf.predict_proba(raw.Xv);
|
||||
clf.setHyperparameters({
|
||||
@ -101,10 +102,10 @@ TEST_CASE("Order asc, desc & random", "[BoostAODE]")
|
||||
{"maxTolerance", 1},
|
||||
{"convergence", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
INFO("BoostAODE order: " + order);
|
||||
INFO("BoostAODE order: " << order);
|
||||
REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||
}
|
||||
@ -120,7 +121,7 @@ TEST_CASE("Oddities", "[BoostAODE]")
|
||||
{ { "maxTolerance", 5 } },
|
||||
};
|
||||
for (const auto& hyper : bad_hyper.items()) {
|
||||
INFO("BoostAODE hyper: " + hyper.value().dump());
|
||||
INFO("BoostAODE hyper: " << hyper.value().dump());
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
||||
}
|
||||
REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0 } }), std::invalid_argument);
|
||||
@ -131,54 +132,82 @@ TEST_CASE("Oddities", "[BoostAODE]")
|
||||
{ { "select_features","FCBF" }, { "threshold", 1.01 } },
|
||||
};
|
||||
for (const auto& hyper : bad_hyper_fit.items()) {
|
||||
INFO("BoostAODE hyper: " + hyper.value().dump());
|
||||
INFO("BoostAODE hyper: " << hyper.value().dump());
|
||||
clf.setHyperparameters(hyper.value());
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv), std::invalid_argument);
|
||||
REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Bisection", "[BoostAODE]")
|
||||
TEST_CASE("Bisection Best", "[BoostAODE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("mfeat-factors", true);
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1200, true, false);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"block_update", false},
|
||||
{"convergence_best", false},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||
REQUIRE(clf.getNumberOfEdges() == 431);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Used features in train: 16 of 216");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 210);
|
||||
REQUIRE(clf.getNumberOfEdges() == 378);
|
||||
REQUIRE(clf.getNotes().size() == 1);
|
||||
REQUIRE(clf.getNotes().at(0) == "Number of models: 14");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.991666675f).epsilon(raw.epsilon));
|
||||
}
|
||||
TEST_CASE("Bisection Best vs Last", "[BoostAODE]")
|
||||
{
|
||||
auto raw = RawDatasets("kdd_JapaneseVowels", true, 1500, true, false);
|
||||
auto clf = bayesnet::BoostAODE(true);
|
||||
auto hyperparameters = nlohmann::json{
|
||||
{"bisection", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
{"convergence_best", true},
|
||||
};
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_best = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon));
|
||||
// Now we will set the hyperparameter to use the last accuracy
|
||||
hyperparameters["convergence_best"] = false;
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
auto score_last = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
|
||||
}
|
||||
|
||||
TEST_CASE("Block Update", "[BoostAODE]")
|
||||
{
|
||||
auto clf = bayesnet::BoostAODE();
|
||||
auto raw = RawDatasets("mfeat-factors", true);
|
||||
auto raw = RawDatasets("mfeat-factors", true, 500);
|
||||
clf.setHyperparameters({
|
||||
{"bisection", true},
|
||||
{"block_update", true},
|
||||
{"maxTolerance", 3},
|
||||
{"convergence", true},
|
||||
});
|
||||
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
|
||||
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||
REQUIRE(clf.getNumberOfEdges() == 431);
|
||||
clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
||||
REQUIRE(clf.getNumberOfNodes() == 868);
|
||||
REQUIRE(clf.getNumberOfEdges() == 1724);
|
||||
REQUIRE(clf.getNotes().size() == 3);
|
||||
REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
||||
REQUIRE(clf.getNotes()[1] == "Used features in train: 16 of 216");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 1");
|
||||
auto score = clf.score(raw.Xv, raw.yv);
|
||||
auto scoret = clf.score(raw.Xt, raw.yt);
|
||||
REQUIRE(score == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
||||
REQUIRE(clf.getNotes()[1] == "Used features in train: 19 of 216");
|
||||
REQUIRE(clf.getNotes()[2] == "Number of models: 4");
|
||||
auto score = clf.score(raw.X_test, raw.y_test);
|
||||
auto scoret = clf.score(raw.X_test, raw.y_test);
|
||||
REQUIRE(score == Catch::Approx(0.99f).epsilon(raw.epsilon));
|
||||
REQUIRE(scoret == Catch::Approx(0.99f).epsilon(raw.epsilon));
|
||||
//
|
||||
// std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;
|
||||
// std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;
|
||||
// std::cout << "Notes size " << clf.getNotes().size() << std::endl;
|
||||
// for (auto note : clf.getNotes()) {
|
||||
// std::cout << note << std::endl;
|
||||
// }
|
||||
// std::cout << "Score " << score << std::endl;
|
||||
}
|
@ -14,14 +14,15 @@
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold)
|
||||
bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold, int max_features = 0)
|
||||
{
|
||||
max_features = max_features == 0 ? raw.features.size() : max_features;
|
||||
if (selector == "CFS") {
|
||||
return new bayesnet::CFS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights);
|
||||
return new bayesnet::CFS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights);
|
||||
} else if (selector == "FCBF") {
|
||||
return new bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, threshold);
|
||||
return new bayesnet::FCBF(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
||||
} else if (selector == "IWSS") {
|
||||
return new bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, threshold);
|
||||
return new bayesnet::IWSS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -80,10 +81,35 @@ TEST_CASE("Oddities", "[FeatureSelection]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
// FCBF Limits
|
||||
REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.featuresv, raw.classNamev, raw.featuresv.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
|
||||
REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
|
||||
// Not fitted error
|
||||
auto selector = build_selector(raw, "CFS", 0);
|
||||
const std::string message = "FeatureSelect not fitted";
|
||||
REQUIRE_THROWS_AS(selector->getFeatures(), std::runtime_error);
|
||||
REQUIRE_THROWS_AS(selector->getScores(), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(selector->getFeatures(), message);
|
||||
REQUIRE_THROWS_WITH(selector->getScores(), message);
|
||||
delete selector;
|
||||
}
|
||||
TEST_CASE("Test threshold limits", "[FeatureSelection]")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
// FCBF Limits
|
||||
auto selector = build_selector(raw, "FCBF", 0.051);
|
||||
selector->fit();
|
||||
REQUIRE(selector->getFeatures().size() == 2);
|
||||
delete selector;
|
||||
selector = build_selector(raw, "FCBF", 1e-7, 3);
|
||||
selector->fit();
|
||||
REQUIRE(selector->getFeatures().size() == 3);
|
||||
delete selector;
|
||||
selector = build_selector(raw, "IWSS", 0.5, 5);
|
||||
selector->fit();
|
||||
REQUIRE(selector->getFeatures().size() == 5);
|
||||
delete selector;
|
||||
}
|
72
tests/TestMST.cc
Normal file
72
tests/TestMST.cc
Normal file
@ -0,0 +1,72 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/catch_approx.hpp>
|
||||
#include <catch2/generators/catch_generators.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/utils/Mst.h"
|
||||
|
||||
|
||||
TEST_CASE("MST::insertElement tests", "[MST]")
|
||||
{
|
||||
bayesnet::MST mst({}, torch::tensor({}), 0);
|
||||
SECTION("Insert into an empty list")
|
||||
{
|
||||
std::list<int> variables;
|
||||
mst.insertElement(variables, 5);
|
||||
REQUIRE(variables == std::list<int>{5});
|
||||
}
|
||||
SECTION("Insert a non-duplicate element")
|
||||
{
|
||||
std::list<int> variables = { 1, 2, 3 };
|
||||
mst.insertElement(variables, 4);
|
||||
REQUIRE(variables == std::list<int>{4, 1, 2, 3});
|
||||
}
|
||||
SECTION("Insert a duplicate element")
|
||||
{
|
||||
std::list<int> variables = { 1, 2, 3 };
|
||||
mst.insertElement(variables, 2);
|
||||
REQUIRE(variables == std::list<int>{1, 2, 3});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("MST::reorder tests", "[MST]")
|
||||
{
|
||||
bayesnet::MST mst({}, torch::tensor({}), 0);
|
||||
SECTION("Reorder simple graph")
|
||||
{
|
||||
std::vector<std::pair<float, std::pair<int, int>>> T = { {2.0, {1, 2}}, {1.0, {0, 1}} };
|
||||
auto result = mst.reorder(T, 0);
|
||||
REQUIRE(result == std::vector<std::pair<int, int>>{{0, 1}, { 1, 2 }});
|
||||
}
|
||||
SECTION("Reorder with disconnected graph")
|
||||
{
|
||||
std::vector<std::pair<float, std::pair<int, int>>> T = { {2.0, {2, 3}}, {1.0, {0, 1}} };
|
||||
auto result = mst.reorder(T, 0);
|
||||
REQUIRE(result == std::vector<std::pair<int, int>>{{0, 1}, { 2, 3 }});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("MST::maximumSpanningTree tests", "[MST]")
|
||||
{
|
||||
std::vector<std::string> features = { "A", "B", "C" };
|
||||
auto weights = torch::tensor({
|
||||
{0.0, 1.0, 2.0},
|
||||
{1.0, 0.0, 3.0},
|
||||
{2.0, 3.0, 0.0}
|
||||
});
|
||||
bayesnet::MST mst(features, weights, 0);
|
||||
|
||||
SECTION("MST of a complete graph")
|
||||
{
|
||||
auto result = mst.maximumSpanningTree();
|
||||
REQUIRE(result.size() == 2); // Un MST para 3 nodos tiene 2 aristas
|
||||
}
|
||||
}
|
43
tests/TestModulesVersions.cc
Normal file
43
tests/TestModulesVersions.cc
Normal file
@ -0,0 +1,43 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <catch2/matchers/catch_matchers.hpp>
|
||||
#include <string>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <folding.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#define TO_STR2(x) #x
|
||||
#define TO_STR(x) TO_STR2(x)
|
||||
#define JSON_VERSION (TO_STR(NLOHMANN_JSON_VERSION_MAJOR) "." TO_STR(NLOHMANN_JSON_VERSION_MINOR))
|
||||
#include "TestUtils.h"
|
||||
|
||||
std::map<std::string, std::string> modules = {
|
||||
{ "mdlp", "2.0.1" },
|
||||
{ "Folding", "1.1.0" },
|
||||
{ "json", "3.11" },
|
||||
{ "ArffFiles", "1.1.0" }
|
||||
};
|
||||
|
||||
TEST_CASE("MDLP", "[Modules]")
|
||||
{
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
REQUIRE(fimdlp.version() == modules["mdlp"]);
|
||||
}
|
||||
TEST_CASE("Folding", "[Modules]")
|
||||
{
|
||||
auto folding = folding::KFold(5, 200);
|
||||
REQUIRE(folding.version() == modules["Folding"]);
|
||||
}
|
||||
TEST_CASE("NLOHMANN_JSON", "[Modules]")
|
||||
{
|
||||
REQUIRE(JSON_VERSION == modules["json"]);
|
||||
}
|
||||
TEST_CASE("ArffFiles", "[Modules]")
|
||||
{
|
||||
auto handler = ArffFiles();
|
||||
REQUIRE(handler.version() == modules["ArffFiles"]);
|
||||
}
|
@ -4,6 +4,7 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <random>
|
||||
#include "TestUtils.h"
|
||||
#include "bayesnet/config.h"
|
||||
|
||||
@ -15,97 +16,110 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
pair<std::vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<std::string> features)
|
||||
class ShuffleArffFiles : public ArffFiles {
|
||||
public:
|
||||
ShuffleArffFiles(int num_samples = 0, bool shuffle = false) : ArffFiles(), num_samples(num_samples), shuffle(shuffle) {}
|
||||
void load(const std::string& file_name, bool class_last = true)
|
||||
{
|
||||
ArffFiles::load(file_name, class_last);
|
||||
if (num_samples > 0) {
|
||||
if (num_samples > getY().size()) {
|
||||
throw std::invalid_argument("num_lines must be less than the number of lines in the file");
|
||||
}
|
||||
auto indices = std::vector<int>(num_samples);
|
||||
std::iota(indices.begin(), indices.end(), 0);
|
||||
if (shuffle) {
|
||||
std::mt19937 g{ 173 };
|
||||
std::shuffle(indices.begin(), indices.end(), g);
|
||||
}
|
||||
auto XX = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(num_samples));
|
||||
auto yy = std::vector<int>(num_samples);
|
||||
for (int i = 0; i < num_samples; i++) {
|
||||
yy[i] = getY()[indices[i]];
|
||||
for (int j = 0; j < attributes.size(); j++) {
|
||||
XX[j][i] = X[j][indices[i]];
|
||||
}
|
||||
}
|
||||
X = XX;
|
||||
y = yy;
|
||||
}
|
||||
}
|
||||
private:
|
||||
int num_samples;
|
||||
bool shuffle;
|
||||
};
|
||||
|
||||
RawDatasets::RawDatasets(const std::string& file_name, bool discretize_, int num_samples_, bool shuffle_, bool class_last, bool debug)
|
||||
{
|
||||
std::vector<mdlp::labels_t> Xd;
|
||||
num_samples = num_samples_;
|
||||
shuffle = shuffle_;
|
||||
discretize = discretize_;
|
||||
// Xt can be either discretized or not
|
||||
// Xv is always discretized
|
||||
loadDataset(file_name, class_last);
|
||||
auto yresized = torch::transpose(yt.view({ yt.size(0), 1 }), 0, 1);
|
||||
dataset = torch::cat({ Xt, yresized }, 0);
|
||||
nSamples = dataset.size(1);
|
||||
weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble);
|
||||
weightsv = std::vector<double>(nSamples, 1.0 / nSamples);
|
||||
classNumStates = discretize ? states.at(className).size() : 0;
|
||||
auto fold = folding::StratifiedKFold(5, yt, 271);
|
||||
auto [train, test] = fold.getFold(0);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
// Get train and validation sets
|
||||
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
|
||||
y_train = dataset.index({ -1, train_t });
|
||||
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
|
||||
y_test = dataset.index({ -1, test_t });
|
||||
if (debug)
|
||||
std::cout << to_string();
|
||||
}
|
||||
|
||||
map<std::string, int> RawDatasets::discretizeDataset(std::vector<mdlp::samples_t>& X)
|
||||
{
|
||||
|
||||
map<std::string, int> maxes;
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
for (int i = 0; i < X.size(); i++) {
|
||||
fimdlp.fit(X[i], y);
|
||||
fimdlp.fit(X[i], yv);
|
||||
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
||||
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
|
||||
Xd.push_back(xd);
|
||||
Xv.push_back(xd);
|
||||
}
|
||||
return { Xd, maxes };
|
||||
return maxes;
|
||||
}
|
||||
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
void RawDatasets::loadDataset(const std::string& name, bool class_last)
|
||||
{
|
||||
std::vector<mdlp::labels_t> Xd;
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
for (int i = 0; i < X.size(); i++) {
|
||||
fimdlp.fit(X[i], y);
|
||||
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
||||
Xd.push_back(xd);
|
||||
}
|
||||
return Xd;
|
||||
}
|
||||
|
||||
bool file_exists(const std::string& name)
|
||||
{
|
||||
if (FILE* file = fopen(name.c_str(), "r")) {
|
||||
fclose(file);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last, bool discretize_dataset)
|
||||
{
|
||||
auto handler = ArffFiles();
|
||||
auto handler = ShuffleArffFiles(num_samples, shuffle);
|
||||
handler.load(Paths::datasets() + static_cast<std::string>(name) + ".arff", class_last);
|
||||
// Get Dataset X, y
|
||||
std::vector<mdlp::samples_t>& X = handler.getX();
|
||||
mdlp::labels_t& y = handler.getY();
|
||||
yv = handler.getY();
|
||||
// Get className & Features
|
||||
auto className = handler.getClassName();
|
||||
std::vector<std::string> features;
|
||||
auto attributes = handler.getAttributes();
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
|
||||
torch::Tensor Xd;
|
||||
auto states = map<std::string, std::vector<int>>();
|
||||
if (discretize_dataset) {
|
||||
auto Xr = discretizeDataset(X, y);
|
||||
Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
||||
auto item = states.at(features[i]);
|
||||
iota(begin(item), end(item), 0);
|
||||
Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
|
||||
}
|
||||
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
|
||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
} else {
|
||||
Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
Xd.index_put_({ i, "..." }, torch::tensor(X[i]));
|
||||
}
|
||||
}
|
||||
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
|
||||
}
|
||||
|
||||
tuple<std::vector<std::vector<int>>, std::vector<int>, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadFile(const std::string& name)
|
||||
{
|
||||
auto handler = ArffFiles();
|
||||
handler.load(Paths::datasets() + static_cast<std::string>(name) + ".arff");
|
||||
// Get Dataset X, y
|
||||
std::vector<mdlp::samples_t>& X = handler.getX();
|
||||
mdlp::labels_t& y = handler.getY();
|
||||
// Get className & Features
|
||||
auto className = handler.getClassName();
|
||||
std::vector<std::string> features;
|
||||
className = handler.getClassName();
|
||||
auto attributes = handler.getAttributes();
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
|
||||
// Discretize Dataset
|
||||
std::vector<mdlp::labels_t> Xd;
|
||||
map<std::string, int> maxes;
|
||||
tie(Xd, maxes) = discretize(X, y, features);
|
||||
maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
||||
map<std::string, std::vector<int>> states;
|
||||
for (auto feature : features) {
|
||||
states[feature] = std::vector<int>(maxes[feature]);
|
||||
auto maxValues = discretizeDataset(X);
|
||||
maxValues[className] = *max_element(yv.begin(), yv.end()) + 1;
|
||||
if (discretize) {
|
||||
// discretize the tensor as well
|
||||
Xt = torch::zeros({ static_cast<int>(Xv.size()), static_cast<int>(Xv[0].size()) }, torch::kInt32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = std::vector<int>(maxValues[features[i]]);
|
||||
iota(begin(states.at(features[i])), end(states.at(features[i])), 0);
|
||||
Xt.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kInt32));
|
||||
}
|
||||
states[className] = std::vector<int>(maxValues[className]);
|
||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
} else {
|
||||
Xt = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
Xt.index_put_({ i, "..." }, torch::tensor(X[i]));
|
||||
}
|
||||
}
|
||||
states[className] = std::vector<int>(maxes[className]);
|
||||
return { Xd, y, features, className, states };
|
||||
yt = torch::tensor(yv, torch::kInt32);
|
||||
}
|
||||
|
||||
|
@ -11,39 +11,62 @@
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <tuple>
|
||||
#include <ArffFiles.h>
|
||||
#include <ArffFiles.hpp>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <folding.hpp>
|
||||
#include <bayesnet/network/Network.h>
|
||||
|
||||
bool file_exists(const std::string& name);
|
||||
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
|
||||
std::tuple<vector<vector<int>>, std::vector<int>, std::vector<string>, std::string, map<std::string, std::vector<int>>> loadFile(const std::string& name);
|
||||
std::tuple<torch::Tensor, torch::Tensor, std::vector<string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last, bool discretize_dataset);
|
||||
|
||||
class RawDatasets {
|
||||
public:
|
||||
RawDatasets(const std::string& file_name, bool discretize)
|
||||
{
|
||||
// Xt can be either discretized or not
|
||||
tie(Xt, yt, featurest, classNamet, statest) = loadDataset(file_name, true, discretize);
|
||||
// Xv is always discretized
|
||||
tie(Xv, yv, featuresv, classNamev, statesv) = loadFile(file_name);
|
||||
auto yresized = torch::transpose(yt.view({ yt.size(0), 1 }), 0, 1);
|
||||
dataset = torch::cat({ Xt, yresized }, 0);
|
||||
nSamples = dataset.size(1);
|
||||
weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble);
|
||||
weightsv = std::vector<double>(nSamples, 1.0 / nSamples);
|
||||
classNumStates = discretize ? statest.at(classNamet).size() : 0;
|
||||
}
|
||||
RawDatasets(const std::string& file_name, bool discretize_, int num_samples_ = 0, bool shuffle_ = false, bool class_last = true, bool debug = false);
|
||||
torch::Tensor Xt, yt, dataset, weights;
|
||||
torch::Tensor X_train, y_train, X_test, y_test;
|
||||
std::vector<vector<int>> Xv;
|
||||
std::vector<double> weightsv;
|
||||
std::vector<int> yv;
|
||||
std::vector<string> featurest, featuresv;
|
||||
map<std::string, std::vector<int>> statest, statesv;
|
||||
std::string classNamet, classNamev;
|
||||
std::vector<double> weightsv;
|
||||
std::vector<string> features;
|
||||
std::string className;
|
||||
map<std::string, std::vector<int>> states;
|
||||
int nSamples, classNumStates;
|
||||
double epsilon = 1e-5;
|
||||
bool discretize;
|
||||
int num_samples = 0;
|
||||
bool shuffle = false;
|
||||
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::ORIGINAL;
|
||||
private:
|
||||
std::string to_string()
|
||||
{
|
||||
std::string features_ = "";
|
||||
for (auto& f : features) {
|
||||
features_ += f + " ";
|
||||
}
|
||||
std::string states_ = "";
|
||||
for (auto& s : states) {
|
||||
states_ += s.first + " ";
|
||||
for (auto& v : s.second) {
|
||||
states_ += std::to_string(v) + " ";
|
||||
}
|
||||
states_ += "\n";
|
||||
}
|
||||
return "Xt dimensions: " + std::to_string(Xt.size(0)) + " " + std::to_string(Xt.size(1)) + "\n"
|
||||
"Xv dimensions: " + std::to_string(Xv.size()) + " " + std::to_string(Xv[0].size()) + "\n"
|
||||
+ "yt dimensions: " + std::to_string(yt.size(0)) + "\n"
|
||||
+ "yv dimensions: " + std::to_string(yv.size()) + "\n"
|
||||
+ "X_train dimensions: " + std::to_string(X_train.size(0)) + " " + std::to_string(X_train.size(1)) + "\n"
|
||||
+ "X_test dimensions: " + std::to_string(X_test.size(0)) + " " + std::to_string(X_test.size(1)) + "\n"
|
||||
+ "y_train dimensions: " + std::to_string(y_train.size(0)) + "\n"
|
||||
+ "y_test dimensions: " + std::to_string(y_test.size(0)) + "\n"
|
||||
+ "features: " + std::to_string(features.size()) + "\n"
|
||||
+ features_ + "\n"
|
||||
+ "className: " + className + "\n"
|
||||
+ "states: " + std::to_string(states.size()) + "\n"
|
||||
+ "nSamples: " + std::to_string(nSamples) + "\n"
|
||||
+ "classNumStates: " + std::to_string(classNumStates) + "\n"
|
||||
+ "states: " + states_ + "\n";
|
||||
}
|
||||
map<std::string, int> discretizeDataset(std::vector<mdlp::samples_t>& X);
|
||||
void loadDataset(const std::string& name, bool class_last);
|
||||
};
|
||||
|
||||
#endif //TEST_UTILS_H
|
41
tests/Timer.h
Normal file
41
tests/Timer.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
namespace platform {
|
||||
class Timer {
|
||||
private:
|
||||
std::chrono::high_resolution_clock::time_point begin;
|
||||
std::chrono::high_resolution_clock::time_point end;
|
||||
public:
|
||||
Timer() = default;
|
||||
~Timer() = default;
|
||||
void start() { begin = std::chrono::high_resolution_clock::now(); }
|
||||
void stop() { end = std::chrono::high_resolution_clock::now(); }
|
||||
double getDuration()
|
||||
{
|
||||
stop();
|
||||
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (end - begin);
|
||||
return time_span.count();
|
||||
}
|
||||
double getLapse()
|
||||
{
|
||||
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (std::chrono::high_resolution_clock::now() - begin);
|
||||
return time_span.count();
|
||||
}
|
||||
std::string getDurationString(bool lapse = false)
|
||||
{
|
||||
double duration = lapse ? getLapse() : getDuration();
|
||||
return translate2String(duration);
|
||||
}
|
||||
std::string translate2String(double duration)
|
||||
{
|
||||
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
|
||||
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
|
||||
std::stringstream ss;
|
||||
ss << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit;
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
} /* namespace platform */
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user