Compare commits

...

121 Commits

Author SHA1 Message Date
06de13df98 Add date/time to header of report best 2023-09-25 10:04:53 +02:00
de4fa6a04f Add color to totals 2023-09-23 10:30:39 +02:00
3a7bf4e672 Fix ranking order mistake 2023-09-23 01:33:23 +02:00
cd0bc02a74 Add report/build all with totals and ranks 2023-09-23 01:14:02 +02:00
c8597a794e Begin report all models 2023-09-22 18:13:32 +02:00
b30416364d Fix mistake in best results file name 2023-09-22 14:14:39 +02:00
3a16589220 Add best config for debug in vscode 2023-09-22 01:04:36 +02:00
c4f9187e2a Complete best build and report 2023-09-22 01:03:55 +02:00
c4d0a5b4e6 Split Result from Results 2023-09-21 23:30:17 +02:00
7bfafe555f Begin BestResults build 2023-09-21 23:04:11 +02:00
337b6f7e79 Rename BestResult to BestScore 2023-09-21 19:30:07 +02:00
5fa0b957dd Fix mistake in idx range in manage 2023-09-20 19:12:07 +02:00
67252fc41d Fix CMakeLists libxlsxwriter for Linux 2023-09-20 19:02:53 +02:00
94ae9456a0 Fix libxslxwriter linking problem 2023-09-20 18:50:11 +02:00
781993e326 Resolve some warnings 2023-09-20 17:54:15 +02:00
8257a6ae39 Add message of not exist Best Results 2023-09-20 13:50:34 +02:00
fc81730dfc Merge pull request 'Exchange OpenXLSX to libxlsxwriter' (#8) from libxlsxwriter into main
Add multiple sheets to excel file
Add format and color to sheets
Add comparison with ZeroR
Add comparison with Best Results
Separate contextual menu from general in manage
2023-09-20 11:17:16 +00:00
d8734ff082 Separate contextual menu from general 2023-09-20 13:15:33 +02:00
03533461c8 Add compare to best results in manage 2023-09-20 12:51:19 +02:00
68f22a673d Add comparison to report console 2023-09-20 11:40:01 +02:00
b9bc0088f3 Add format to unique dataset results summary 2023-09-20 10:30:45 +02:00
c280e254ca Remove OpenXLSX submodule 2023-09-20 01:09:58 +02:00
3d0f29fda3 Remove .vscode/settings.json from repository 2023-09-20 01:01:40 +02:00
20a6ebab7c Support to add any number of sheets to excel 2023-09-20 00:58:01 +02:00
925f71166c Fix mistake in comparison 2023-09-19 23:46:49 +02:00
f69f415b92 Complete comparison with ZeroR 2023-09-19 17:55:03 +02:00
1bdfbd1620 Complete adding color to format 2023-09-19 14:07:41 +02:00
06fb135526 First approach 2023-09-18 23:26:22 +02:00
501ea0ab4e Fix CMakeList manage build with Linux 2023-09-18 19:27:40 +02:00
847c6761d7 Add Linux specific link library to cmake 2023-09-17 10:42:19 +02:00
6030885fc3 Add partial result filter to manage 2023-09-16 17:27:18 +02:00
89df7f4db0 Add library to manage link 2023-09-14 01:41:49 +02:00
41257ed566 If ! convergence don't predict test 2023-09-10 19:50:36 +02:00
506369e46b Add Convergence hyperparameter 2023-09-07 11:27:35 +02:00
d908f389f5 Begin using validation as finish condition 2023-09-06 10:51:07 +02:00
5a7c8f1818 Add status to classifier and Experiment 2023-09-05 13:39:43 +02:00
64fc7bd9dd Add show dataset detail in report 2023-09-05 09:26:49 +02:00
0b7beda78c Add threads without limit to network fit 2023-09-04 21:24:11 +02:00
05b670dfc0 Add detail to fold progress in main 2023-09-03 16:33:48 +02:00
de62d42b74 Fix make debug command 2023-09-03 14:13:10 +02:00
edb957d22e Add filter complete results to manage 2023-09-03 14:07:11 +02:00
4de5cb4c6c Merge pull request 'Solve Ensemble models exceptions on certain datasets' (#7) from solveexceptions into main
Reviewed-on: #7
2023-09-02 15:29:33 +00:00
c35030f137 Upgrade models version and Add class diagram 2023-09-02 14:39:43 +02:00
182b07ed90 Solve voting vector error 2023-09-02 13:58:12 +02:00
7806f961e2 Remove threads 2023-08-31 20:30:28 +02:00
7c3e315ae7 Add Linux specific options to compile 2023-08-29 18:20:55 +02:00
284ef6dfd1 Add significanceModels to AODELd 2023-08-24 12:58:53 +02:00
1c6af619b5 Exception if hyperparameters not valid 2023-08-24 12:09:35 +02:00
86ffdfd6f3 Add const feature and className to fit models 2023-08-23 23:15:39 +02:00
d82148079d Add KDB hyperparameters K and theta 2023-08-23 00:44:10 +02:00
067430fd1b Add xlsxopen submodule 2023-08-22 23:45:11 +02:00
f5d0d16365 Merge pull request 'Add excel report to manage results' (#6) from xlsx into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/6
2023-08-22 21:40:11 +00:00
97ca8ac084 Move check valid hyperparameters to Classifier 2023-08-22 22:12:20 +02:00
1c1385b768 Fix maxModels mistake in BoostAODE if !repeatSp
Throw exception if wrong hyperparmeter is supplied
2023-08-22 21:55:17 +02:00
35432b6294 Fix time std was not saved in experiment 2023-08-22 12:30:27 +02:00
c59dd30e53 Complete Excel Report with data 2023-08-22 11:55:15 +02:00
d2da0ddb88 Create ReportExcel eq to ReportConsole 2023-08-21 17:51:49 +02:00
8066701c3c Refactor Report class into ReportBase & ReportCons 2023-08-21 17:16:29 +02:00
0f66ac73d0 Revert "Refactor Report into ReportBase & ReportConsole"
This reverts commit 4370bf51d7.
2023-08-21 17:15:14 +02:00
4370bf51d7 Refactor Report into ReportBase & ReportConsole 2023-08-21 17:14:23 +02:00
2b7353b9e0 Add default sorting by date in manage 2023-08-21 16:30:10 +02:00
b686b3c9c3 Enhance copy in Makefile 2023-08-21 12:18:23 +02:00
2dd04a6c44 enhance saving results and add Makefile copy 2023-08-21 11:57:45 +02:00
1da83662d0 Always save results 2023-08-21 10:55:20 +02:00
3ac9593c65 Fix mistake in sample 2023-08-20 20:36:46 +02:00
6b317accf1 Add hyperparameters and processing order to Boost 2023-08-20 20:31:23 +02:00
4964aab722 Add hyperparameters management in experiments 2023-08-20 17:57:38 +02:00
7a6ec73d63 Merge pull request 'boostAode' (#5) from boostAode into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/5
Implement boostAODE
add list datasets
add manage results
2023-08-20 09:02:07 +00:00
1a534888d6 Fix report format 2023-08-19 23:30:44 +02:00
59ffd179f4 Fix report format 2023-08-19 21:26:48 +02:00
9972738deb Add list datasets and add locale format 2023-08-19 19:05:16 +02:00
bafcb26bb6 Add manage to build target 2023-08-18 13:43:53 +02:00
2d7999d5f2 Add manage to release targets 2023-08-18 13:43:13 +02:00
a6bb22dfb5 Complete first BoostAODE 2023-08-18 11:50:34 +02:00
704dc937be Remove FeatureSel, add SelectKBest to BayesMetrics 2023-08-16 19:05:18 +02:00
a3e665eed6 make weights double 2023-08-16 12:46:09 +02:00
918a7b4180 Remove unneeded output 2023-08-16 12:36:38 +02:00
80b20f35b4 Fix weights mistakes in computation 2023-08-16 12:32:51 +02:00
4d4780c1d5 Add BoostAODE model based on AODE 2023-08-15 16:16:04 +02:00
fa612c531e Complete Adding weights to Models 2023-08-15 15:59:56 +02:00
24b68f9ae2 Add weigths as parameter 2023-08-15 15:04:56 +02:00
a062ebf445 Merge pull request 'reports' (#4) from reports into boostAode
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/4
2023-08-14 16:58:48 +00:00
2a3fc9aa45 Add colors and enhace input control 2023-08-14 17:03:06 +02:00
55d21294d5 Add class Paths and enhance input 2023-08-14 00:40:31 +02:00
3691cb4a61 Add totals and filter by scoreName and model 2023-08-13 18:13:00 +02:00
054567c65a Add sorting capacity 2023-08-13 17:10:18 +02:00
2729b92f06 Summary list 2023-08-13 16:19:17 +02:00
f26ea1f0ac Add weights to BayesMetrics 2023-08-13 12:56:06 +02:00
af0419c9da First approx with const 1 weights 2023-08-13 00:59:02 +02:00
90c92e5c56 Merge pull request 'Add states as result in Proposal methods' (#3) from optimize_memory into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/3
2023-08-12 14:16:55 +00:00
182b52a887 Add states as result in Proposal methods 2023-08-12 16:16:17 +02:00
6679b90a82 Merge pull request 'optimize_memory' (#2) from optimize_memory into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/2
2023-08-12 14:15:03 +00:00
405887f833 Solved Ld poor results 2023-08-12 11:49:18 +02:00
3a85481a5a Redo pass states to Network Fit needed in crossval
fix mistake in headerline (report)
2023-08-12 11:10:53 +02:00
0ad5505c16 Spodeld working with poor accuracy 2023-08-10 02:06:18 +02:00
323444b74a const functions 2023-08-08 01:53:41 +02:00
ef1bffcac3 Fixed normal classifiers 2023-08-07 13:50:11 +02:00
06db8f51ce Refactor library and models to lighten data stored
Refactro Ensemble to inherit from Classifier insted of BaseClassifier
2023-08-07 12:49:37 +02:00
e74565ba01 update clang-tidy 2023-08-07 00:44:12 +02:00
2da0fb5d8f Merge branch 'main' into TANNew 2023-08-06 11:40:10 +02:00
14ea51648a Complete AODELd 2023-08-06 11:31:44 +02:00
9e94f4e140 Rename suffix of proposal classifier to Ld 2023-08-05 23:23:31 +02:00
1d0fd629c9 Add SPODENew to models 2023-08-05 23:11:36 +02:00
506ef34c6f Add report output to main 2023-08-05 20:29:05 +02:00
7f45495837 Refactor New classifiers to extract predict 2023-08-05 18:39:48 +02:00
1a09ccca4c Add KDBNew fix computeCPT error 2023-08-05 14:40:42 +02:00
a1c6ab18f3 TANNew restructured with poor results 2023-08-04 20:11:22 +02:00
64ac8fb4f2 TANNew as a TAN variant working 2023-08-04 19:42:18 +02:00
c568ba111d Add Proposal class 2023-08-04 13:05:12 +02:00
45c1d052ac Compile TANNew with poor accuracy 2023-08-04 01:35:45 +02:00
eb1cec58a3 Complete nxm 2023-08-03 20:22:33 +02:00
f520b40016 Almost complete proposal in TANNew 2023-08-02 02:21:55 +02:00
cdfb45d2cb Add topological order to Network 2023-08-02 00:56:52 +02:00
f63a9a64f9 Update Makefile to add Release & Debug build 2023-08-01 19:02:37 +02:00
285f0938a6 Update mdlp library 2023-08-01 17:33:01 +02:00
8f8f9773ce Make TANNew same as TAN with local discretization 2023-08-01 13:17:12 +02:00
a9ba21560d Add environment platform to experiment result 2023-08-01 10:55:53 +02:00
a18fbe5594 Begin implementation 2023-07-31 19:53:55 +02:00
adf650d257 Max threading 2023-07-31 18:49:18 +02:00
43bb017d5d Fix problem with tensors way 2023-07-30 19:00:02 +02:00
53697648e7 Merge branch 'aftermath' into main 2023-07-30 01:05:31 +02:00
88 changed files with 3314 additions and 711 deletions

View File

@@ -13,5 +13,4 @@ HeaderFilterRegex: 'src/*'
AnalyzeTemporaryDtors: false
WarningsAsErrors: ''
FormatStyle: file
FormatStyleOptions: ''
...

31
.clang-uml Normal file
View File

@@ -0,0 +1,31 @@
compilation_database_dir: build
output_directory: puml
diagrams:
BayesNet:
type: class
glob:
- src/BayesNet/*.cc
- src/Platform/*.cc
using_namespace: bayesnet
include:
namespaces:
- bayesnet
- platform
plantuml:
after:
- "note left of {{ alias(\"MyProjectMain\") }}: Main class of myproject library."
sequence:
type: sequence
glob:
- src/Platform/main.cc
combine_free_functions_into_file_participants: true
using_namespace:
- std
- bayesnet
- platform
include:
paths:
- src/BayesNet
- src/Platform
start_from:
- function: main(int,const char **)

2
.gitignore vendored
View File

@@ -35,3 +35,5 @@ build/
*.dSYM/**
cmake-build*/**
.idea
puml/**
.vscode/settings.json

3
.gitmodules vendored
View File

@@ -10,3 +10,6 @@
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
[submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git

54
.vscode/launch.json vendored
View File

@@ -10,12 +10,13 @@
"-d",
"iris",
"-m",
"TAN",
"TANLd",
"-s",
"271",
"-p",
"../../data/",
"--tensors"
"/Users/rmontanana/Code/discretizbench/datasets/",
],
"cwd": "${workspaceFolder}/build/sample/",
//"cwd": "${workspaceFolder}/build/sample/",
},
{
"type": "lldb",
@@ -24,17 +25,50 @@
"program": "${workspaceFolder}/build/src/Platform/main",
"args": [
"-m",
"TAN",
"BoostAODE",
"-p",
"/Users/rmontanana/Code/discretizbench/datasets",
"--discretize",
"--stratified",
"--title",
"Debug test",
"-d",
"ionosphere"
"mfeat-morphological",
"--discretize"
// "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "${workspaceFolder}/build/src/Platform",
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "best",
"program": "${workspaceFolder}/build/src/Platform/best",
"args": [
"-m",
"BoostAODE",
"-s",
"accuracy",
"--build",
],
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "manage",
"program": "${workspaceFolder}/build/src/Platform/manage",
"args": [
"-n",
"20"
],
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "list",
"program": "${workspaceFolder}/build/src/Platform/list",
"args": [],
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{
"name": "Build & debug active file",

109
.vscode/settings.json vendored
View File

@@ -1,109 +0,0 @@
{
"files.associations": {
"*.rmd": "markdown",
"*.py": "python",
"vector": "cpp",
"__bit_reference": "cpp",
"__bits": "cpp",
"__config": "cpp",
"__debug": "cpp",
"__errc": "cpp",
"__hash_table": "cpp",
"__locale": "cpp",
"__mutex_base": "cpp",
"__node_handle": "cpp",
"__nullptr": "cpp",
"__split_buffer": "cpp",
"__string": "cpp",
"__threading_support": "cpp",
"__tuple": "cpp",
"array": "cpp",
"atomic": "cpp",
"bitset": "cpp",
"cctype": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"compare": "cpp",
"complex": "cpp",
"concepts": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"exception": "cpp",
"initializer_list": "cpp",
"ios": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"locale": "cpp",
"memory": "cpp",
"mutex": "cpp",
"new": "cpp",
"optional": "cpp",
"ostream": "cpp",
"ratio": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"string": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"typeinfo": "cpp",
"unordered_map": "cpp",
"variant": "cpp",
"algorithm": "cpp",
"iostream": "cpp",
"iomanip": "cpp",
"numeric": "cpp",
"set": "cpp",
"__tree": "cpp",
"deque": "cpp",
"list": "cpp",
"map": "cpp",
"unordered_set": "cpp",
"any": "cpp",
"condition_variable": "cpp",
"forward_list": "cpp",
"fstream": "cpp",
"stack": "cpp",
"thread": "cpp",
"__memory": "cpp",
"filesystem": "cpp",
"*.toml": "toml",
"utility": "cpp",
"__verbose_abort": "cpp",
"bit": "cpp",
"random": "cpp",
"*.tcc": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory_resource": "cpp",
"format": "cpp",
"valarray": "cpp",
"regex": "cpp",
"span": "cpp",
"cfenv": "cpp",
"cinttypes": "cpp",
"csetjmp": "cpp",
"future": "cpp",
"queue": "cpp",
"typeindex": "cpp",
"shared_mutex": "cpp",
"*.ipp": "cpp",
"cassert": "cpp",
"charconv": "cpp",
"source_location": "cpp",
"ranges": "cpp"
},
"cmake.configureOnOpen": false,
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools"
}

23
.vscode/tasks.json vendored
View File

@@ -32,6 +32,29 @@
],
"group": "build",
"detail": "Task generated by Debugger."
},
{
"type": "cppbuild",
"label": "C/C++: g++ build active file",
"command": "/usr/bin/g++",
"args": [
"-fdiagnostics-color=always",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "Task generated by Debugger."
}
]
}

View File

@@ -1,16 +1,20 @@
cmake_minimum_required(VERSION 3.20)
project(BayesNet
VERSION 0.1.0
VERSION 0.2.0
DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX
)
if (CODE_COVERAGE AND NOT ENABLE_TESTING)
MESSAGE(FATAL_ERROR "Code coverage requires testing enabled")
endif (CODE_COVERAGE AND NOT ENABLE_TESTING)
find_package(Torch REQUIRED)
if (POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
cmake_policy(SET CMP0135 NEW)
endif ()
# Global CMake variables
@@ -24,26 +28,33 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
# Options
# -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" ON)
option(CODE_COVERAGE "Collect coverage from test library" ON)
set(CMAKE_BUILD_TYPE "Debug")
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# CMakes modules
# --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule)
include(StaticAnalyzers) # clang-tidy
include(CodeCoverage)
if (CODE_COVERAGE)
enable_testing()
include(CodeCoverage)
MESSAGE("Code coverage enabled")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE)
if (ENABLE_CLANG_TIDY)
include(StaticAnalyzers) # clang-tidy
endif (ENABLE_CLANG_TIDY)
# External libraries - dependencies of BayesNet
# ---------------------------------------------
# include(FetchContent)
add_git_submodule("lib/mdlp")
add_git_submodule("lib/catch2")
add_git_submodule("lib/argparse")
add_git_submodule("lib/json")
find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib)
# Subdirectories
# --------------
@@ -59,18 +70,10 @@ file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform
# Testing
# -------
if (ENABLE_TESTING)
MESSAGE("Testing enabled")
enable_testing()
if (CODE_COVERAGE)
#include(CodeCoverage)
MESSAGE("Code coverage enabled")
set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE)
#find_package(Catch2 3 REQUIRED)
add_git_submodule("lib/catch2")
include(CTest)
#include(Catch)
add_subdirectory(tests)
endif (ENABLE_TESTING)

View File

@@ -11,17 +11,45 @@ setup: ## Install dependencies for tests and coverage
pip install gcovr; \
fi
dest ?= ../discretizbench
copy: ## Copy binary files to selected folder
@echo "Destination folder: $(dest)"
make build
@echo ">>> Copying files to $(dest)"
@cp build/src/Platform/main $(dest)
@cp build/src/Platform/list $(dest)
@cp build/src/Platform/manage $(dest)
@cp build/src/Platform/best $(dest)
@echo ">>> Done"
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
build: ## Build the project
@echo ">>> Building BayesNet ...";
build: ## Build the main and BayesNetSample
cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32
clean: ## Clean the debug info
@echo ">>> Cleaning Debug BayesNet ...";
find . -name "*.gcda" -print0 | xargs -0 rm
@echo ">>> Done";
clang-uml: ## Create uml class and sequence diagrams
clang-uml -p --add-compile-flag -I /usr/lib/gcc/x86_64-redhat-linux/8/include/
debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet ...";
@if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build;
cmake -S . -B build; \
cd build; \
make; \
cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
cmake --build build -t main -t BayesNetSample -t manage -t list -t best -t unit_tests -j 32;
@echo ">>> Done";
release: ## Build a Release version of the project
@echo ">>> Building Release BayesNet ...";
@if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32;
@echo ">>> Done";
test: ## Run tests

View File

@@ -2,4 +2,36 @@
Bayesian Network Classifier with libtorch from scratch
## 0. Setup
### libxlswriter
Before compiling BayesNet.
```bash
cd lib/libxlsxwriter
make
sudo make install
```
It has to be installed in /usr/local/lib otherwise CMakeLists.txt has to be modified accordingly
Environment variable has to be set:
```bash
export LD_LIBRARY_PATH=/usr/local/lib
```
### Release
```bash
make release
```
### Debug & Tests
```bash
make debug
```
## 1. Introduction

View File

@@ -1 +0,0 @@
null

BIN
diagrams/BayesNet.pdf Executable file

Binary file not shown.

View File

@@ -1,5 +1,4 @@
filter = src/
exclude = external/
exclude = tests/
exclude-directories = build/lib/
print-summary = yes
sort-percentage = yes

View File

@@ -1,2 +1 @@
add_library(ArffFiles ArffFiles.cc)
#target_link_libraries(BayesNet "${TORCH_LIBRARIES}")

1
lib/libxlsxwriter Submodule

Submodule lib/libxlsxwriter added at 44e72c5862

View File

@@ -3,5 +3,6 @@ include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,16 +1,16 @@
#include <iostream>
#include <torch/torch.h>
#include <string>
#include <thread>
#include <map>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "ArffFiles.h"
#include "BayesMetrics.h"
#include "CPPFImdlp.h"
#include "Folding.h"
#include "Models.h"
#include "modelRegister.h"
#include <fstream>
using namespace std;
@@ -42,7 +42,7 @@ bool file_exists(const std::string& name)
}
pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vector<vector<int>> X, vector<int> y)
{
vector<vector<int>> Xr;
vector<vector<int>> Xr; // nxm
vector<int> yr;
for (int col = 0; col < X.size(); ++col) {
Xr.push_back(vector<int>());
@@ -58,6 +58,52 @@ pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vect
int main(int argc, char** argv)
{
torch::Tensor weights_ = torch::full({ 10 }, 1.0 / 10, torch::kFloat64);
torch::Tensor y_ = torch::tensor({ 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }, torch::kInt32);
torch::Tensor ypred = torch::tensor({ 1, 1, 1, 0, 0, 1, 1, 1, 1, 0 }, torch::kInt32);
cout << "Initial weights_: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << ", ";
}
cout << "end." << endl;
cout << "y_: " << endl;
for (int i = 0; i < 10; i++) {
cout << y_.index({ i }).item<int>() << ", ";
}
cout << "end." << endl;
cout << "ypred: " << endl;
for (int i = 0; i < 10; i++) {
cout << ypred.index({ i }).item<int>() << ", ";
}
cout << "end." << endl;
auto mask_wrong = ypred != y_;
auto mask_right = ypred == y_;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double epsilon_t = masked_weights.sum().item<double>();
cout << "epsilon_t: " << epsilon_t << endl;
double wt = (1 - epsilon_t) / epsilon_t;
cout << "wt: " << wt << endl;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
cout << "alpha_t: " << alpha_t << endl;
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
cout << "exp(alpha_t): " << exp(alpha_t) << endl;
cout << "exp(-alpha_t): " << exp(-alpha_t) << endl;
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
cout << "totalWeights: " << totalWeights << endl;
cout << "Before normalization: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << endl;
}
weights_ = weights_ / totalWeights;
cout << "After normalization: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << endl;
}
map<string, bool> datasets = {
{"diabetes", true},
{"ecoli", true},
@@ -96,6 +142,7 @@ int main(int argc, char** argv)
}
);
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
@@ -113,7 +160,7 @@ int main(int argc, char** argv)
throw runtime_error("Number of folds must be an integer");
}});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors;
bool class_last, stratified, tensors, dump_cpt;
string model_name, file_name, path, complete_file_name;
int nFolds, seed;
try {
@@ -126,6 +173,7 @@ int main(int argc, char** argv)
tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds");
seed = program.get<int>("seed");
dump_cpt = program.get<bool>("dumpcpt");
class_last = datasets[file_name];
if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
@@ -161,13 +209,23 @@ int main(int argc, char** argv)
states[className] = vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states);
auto score = clf->score(Xd, y);
if (dump_cpt) {
cout << "--- CPT Tables ---" << endl;
clf->dump_cpt();
}
auto lines = clf->show();
auto graph = clf->graph();
for (auto line : lines) {
cout << line << endl;
}
cout << "--- Topological Order ---" << endl;
auto order = clf->topological_order();
for (auto name : order) {
cout << name << ", ";
}
cout << "end." << endl;
auto score = clf->score(Xd, y);
cout << "Score: " << score << endl;
auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot");
file << graph;
@@ -183,11 +241,11 @@ int main(int argc, char** argv)
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
}
float total_score = 0, total_score_train = 0, score_train, score_test;
Fold* fold;
platform::Fold* fold;
if (stratified)
fold = new StratifiedKFold(nFolds, y, seed);
fold = new platform::StratifiedKFold(nFolds, y, seed);
else
fold = new KFold(nFolds, y.size(), seed);
fold = new platform::KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i);
cout << "Fold: " << i + 1 << endl;
@@ -199,6 +257,7 @@ int main(int argc, char** argv)
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest });
clf->fit(Xtraint, ytraint, features, className, states);
auto temp = clf->predict(Xtraint);
score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt);
} else {
@@ -208,6 +267,10 @@ int main(int argc, char** argv)
score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest);
}
if (dump_cpt) {
cout << "--- CPT Tables ---" << endl;
clf->dump_cpt();
}
total_score_train += score_train;
total_score += score_test;
cout << "Score Train: " << score_train << endl;
@@ -216,6 +279,5 @@ int main(int argc, char** argv)
}
cout << "**********************************************************************************" << endl;
cout << "Average Score Train: " << total_score_train / nFolds << endl;
cout << "Average Score Test : " << total_score / nFolds << endl;
return 0;
cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
}

View File

@@ -2,14 +2,16 @@
namespace bayesnet {
AODE::AODE() : Ensemble() {}
void AODE::train()
void AODE::buildModel(const torch::Tensor& weights)
{
models.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i));
}
n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
}
vector<string> AODE::graph(string title)
vector<string> AODE::graph(const string& title) const
{
return Ensemble::graph(title);
}

View File

@@ -5,11 +5,11 @@
namespace bayesnet {
class AODE : public Ensemble {
protected:
void train() override;
void buildModel(const torch::Tensor& weights) override;
public:
AODE();
virtual ~AODE() {};
vector<string> graph(string title = "AODE") override;
vector<string> graph(const string& title = "AODE") const override;
};
}
#endif

41
src/BayesNet/AODELd.cc Normal file
View File

@@ -0,0 +1,41 @@
#include "AODELd.h"
#include "Models.h"
namespace bayesnet {
using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
Ensemble::fit(dataset, features, className, states);
return *this;
}
void AODELd::buildModel(const torch::Tensor& weights)
{
models.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODELd>(i));
}
n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
}
void AODELd::trainModel(const torch::Tensor& weights)
{
for (const auto& model : models) {
model->fit(Xf, y, features, className, states);
}
}
vector<string> AODELd::graph(const string& name) const
{
return Ensemble::graph(name);
}
}

21
src/BayesNet/AODELd.h Normal file
View File

@@ -0,0 +1,21 @@
#ifndef AODELD_H
#define AODELD_H
#include "Ensemble.h"
#include "Proposal.h"
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
class AODELd : public Ensemble, public Proposal {
protected:
void trainModel(const torch::Tensor& weights) override;
void buildModel(const torch::Tensor& weights) override;
public:
AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) override;
virtual ~AODELd() = default;
vector<string> graph(const string& name = "AODELd") const override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !AODELD_H

View File

@@ -1,23 +1,36 @@
#ifndef BASE_H
#define BASE_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <vector>
namespace bayesnet {
using namespace std;
enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier {
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
public:
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
// X is nxm vector, y is nx1 vector
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0;
vector<int> virtual predict(vector<vector<int>>& X) = 0;
status_t virtual getStatus() const = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes() = 0;
int virtual getNumberOfEdges() = 0;
int virtual getNumberOfStates() = 0;
vector<string> virtual show() = 0;
vector<string> virtual graph(string title = "") = 0;
virtual ~BaseClassifier() = default;
const string inline getVersion() const { return "0.1.0"; };
int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() const = 0;
vector<string> virtual show() const = 0;
vector<string> virtual graph(const string& title = "") const = 0;
const string inline getVersion() const { return "0.2.0"; };
vector<string> virtual topological_order() = 0;
void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;
};
}
#endif

View File

@@ -1,13 +1,15 @@
#include "BayesMetrics.h"
#include "Mst.h"
namespace bayesnet {
Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates)
//samples is nxm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates)
: samples(samples)
, features(features)
, className(className)
, classNumStates(classNumStates)
{
}
//samples is nxm vector used to fit the model
Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates)
: features(features)
, className(className)
@@ -15,9 +17,48 @@ namespace bayesnet {
, samples(torch::zeros({ static_cast<int>(vsamples[0].size()), static_cast<int>(vsamples.size() + 1) }, torch::kInt32))
{
for (int i = 0; i < vsamples.size(); ++i) {
samples.index_put_({ "...", i }, torch::tensor(vsamples[i], torch::kInt32));
samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32));
}
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32));
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
}
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
{
// Return the K Best features
auto n = samples.size(0) - 1;
if (k == 0) {
k = n;
}
// compute scores
scoresKBest.clear();
featuresKBest.clear();
auto label = samples.index({ -1, "..." });
for (int i = 0; i < n; ++i) {
scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
featuresKBest.push_back(i);
}
// sort & reduce scores and features
if (ascending) {
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
{ return scoresKBest[i] < scoresKBest[j]; });
sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
if (k < n) {
for (int i = 0; i < n - k; ++i) {
featuresKBest.erase(featuresKBest.begin());
scoresKBest.erase(scoresKBest.begin());
}
}
} else {
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
{ return scoresKBest[i] > scoresKBest[j]; });
sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
featuresKBest.resize(k);
scoresKBest.resize(k);
}
return featuresKBest;
}
vector<double> Metrics::getScoresKBest() const
{
return scoresKBest;
}
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
{
@@ -30,28 +71,29 @@ namespace bayesnet {
}
return result;
}
torch::Tensor Metrics::conditionalEdge()
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
{
auto result = vector<double>();
auto source = vector<string>(features);
source.push_back(className);
auto combinations = doCombinations(source);
// Compute class prior
auto margin = torch::zeros({ classNumStates });
auto margin = torch::zeros({ classNumStates }, torch::kFloat);
for (int value = 0; value < classNumStates; ++value) {
auto mask = samples.index({ "...", -1 }) == value;
margin[value] = mask.sum().item<float>() / samples.sizes()[0];
auto mask = samples.index({ -1, "..." }) == value;
margin[value] = mask.sum().item<double>() / samples.size(1);
}
for (auto [first, second] : combinations) {
int index_first = find(features.begin(), features.end(), first) - features.begin();
int index_second = find(features.begin(), features.end(), second) - features.begin();
double accumulated = 0;
for (int value = 0; value < classNumStates; ++value) {
auto mask = samples.index({ "...", -1 }) == value;
auto first_dataset = samples.index({ mask, index_first });
auto second_dataset = samples.index({ mask, index_second });
auto mi = mutualInformation(first_dataset, second_dataset);
auto pb = margin[value].item<float>();
auto mask = samples.index({ -1, "..." }) == value;
auto first_dataset = samples.index({ index_first, mask });
auto second_dataset = samples.index({ index_second, mask });
auto weights_dataset = weights.index({ mask });
auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
auto pb = margin[value].item<double>();
accumulated += pb * mi;
}
result.push_back(accumulated);
@@ -67,31 +109,33 @@ namespace bayesnet {
}
return matrix;
}
vector<float> Metrics::conditionalEdgeWeights()
// To use in Python
vector<float> Metrics::conditionalEdgeWeights(vector<float>& weights_)
{
auto matrix = conditionalEdge();
const torch::Tensor weights = torch::tensor(weights_);
auto matrix = conditionalEdge(weights);
std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
return v;
}
double Metrics::entropy(torch::Tensor& feature)
double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
{
torch::Tensor counts = feature.bincount();
int totalWeight = counts.sum().item<int>();
torch::Tensor counts = feature.bincount(weights);
double totalWeight = counts.sum().item<double>();
torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
torch::Tensor logProbs = torch::log(probs);
torch::Tensor entropy = -probs * logProbs;
return entropy.nansum().item<double>();
}
// H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
double Metrics::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{
int numSamples = firstFeature.sizes()[0];
torch::Tensor featureCounts = secondFeature.bincount();
torch::Tensor featureCounts = secondFeature.bincount(weights);
unordered_map<int, unordered_map<int, double>> jointCounts;
double totalWeight = 0;
for (auto i = 0; i < numSamples; i++) {
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
totalWeight += 1;
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
totalWeight += weights[i].item<float>();
}
if (totalWeight == 0)
return 0;
@@ -112,16 +156,16 @@ namespace bayesnet {
return entropyValue;
}
// I(X;Y) = H(Y) - H(Y|X)
double Metrics::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{
return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
}
/*
Compute the maximum spanning tree considering the weights as distances
and the indices of the weights as nodes of this square matrix using
Kruskal algorithm
*/
vector<pair<int, int>> Metrics::maximumSpanningTree(vector<string> features, Tensor& weights, int root)
vector<pair<int, int>> Metrics::maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root)
{
auto mst = MST(features, weights, root);
return mst.maximumSpanningTree();

View File

@@ -8,21 +8,25 @@ namespace bayesnet {
using namespace torch;
class Metrics {
private:
Tensor samples;
Tensor samples; // nxm tensor used to fit the model
vector<string> features;
string className;
int classNumStates = 0;
vector<double> scoresKBest;
vector<int> featuresKBest; // sorted indices of the features
double entropy(const Tensor& feature, const Tensor& weights);
double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<pair<string, string>> doCombinations(const vector<string>&);
public:
Metrics() = default;
Metrics(Tensor&, vector<string>&, string&, int);
Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
double entropy(Tensor&);
double conditionalEntropy(Tensor&, Tensor&);
double mutualInformation(Tensor&, Tensor&);
vector<float> conditionalEdgeWeights();
Tensor conditionalEdge();
vector<pair<string, string>> doCombinations(const vector<string>&);
vector<pair<int, int>> maximumSpanningTree(vector<string> features, Tensor& weights, int root);
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates);
vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending=false, unsigned k = 0);
vector<double> getScoresKBest() const;
double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python
Tensor conditionalEdge(const torch::Tensor& weights);
vector<pair<int, int>> maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root);
};
}
#endif

148
src/BayesNet/BoostAODE.cc Normal file
View File

@@ -0,0 +1,148 @@
#include "BoostAODE.h"
#include <set>
#include "BayesMetrics.h"
#include "Colors.h"
#include "Folding.h"
#include <limits.h>
namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() {}
void BoostAODE::buildModel(const torch::Tensor& weights)
{
// Models shall be built in trainModel
}
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
}
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
}
if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"];
}
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
}
}
void BoostAODE::validationInit()
{
auto y_ = dataset.index({ -1, "..." });
if (convergence) {
// Prepare train & validation sets from train data
auto fold = platform::StratifiedKFold(5, y_, 271);
dataset_ = torch::clone(dataset);
// save input dataset
auto [train, test] = fold.getFold(0);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
// Get train and validation sets
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
y_train = dataset.index({ -1, train_t });
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
y_test = dataset.index({ -1, test_t });
dataset = X_train;
m = X_train.size(1);
auto n_classes = states.at(className).size();
metrics = Metrics(dataset, features, className, n_classes);
// Build dataset with train data
buildDataset(y_train);
} else {
// Use all data to train
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
y_train = y_;
}
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
models.clear();
n_models = 0;
if (maxModels == 0)
maxModels = .1 * n > 10 ? .1 * n : n;
validationInit();
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool exitCondition = false;
unordered_set<int> featuresUsed;
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double delta = 1.0;
double threshold = 1e-4;
int tolerance = 5; // number of times the accuracy can be lower than the threshold
int count = 0; // number of times the accuracy is lower than the threshold
fitted = true; // to enable predict
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == maxModels
// epsiolon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing
while (!exitCondition) {
// Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
unique_ptr<Classifier> model;
auto feature = featureSelection[0];
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
bool found = false;
for (auto feat : featureSelection) {
if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
continue;
}
found = true;
feature = feat;
break;
}
if (!found) {
exitCondition = true;
continue;
}
}
featuresUsed.insert(feature);
model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
auto ypred = model->predict(X_train);
// Step 3.1: Compute the classifier amout of say
auto mask_wrong = ypred != y_train;
auto mask_right = ypred == y_train;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double epsilon_t = masked_weights.sum().item<double>();
double wt = (1 - epsilon_t) / epsilon_t;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
weights_ = weights_ / totalWeights;
// Step 3.4: Store classifier and its accuracy to weigh its future vote
models.push_back(std::move(model));
significanceModels.push_back(alpha_t);
n_models++;
if (convergence) {
auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) {
priorAccuracy = accuracy;
} else {
delta = accuracy - priorAccuracy;
}
if (delta < threshold) {
count++;
}
}
exitCondition = n_models == maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance;
}
if (featuresUsed.size() != features.size()) {
status = WARNING;
}
}
vector<string> BoostAODE::graph(const string& title) const
{
return Ensemble::graph(title);
}
}

25
src/BayesNet/BoostAODE.h Normal file
View File

@@ -0,0 +1,25 @@
#ifndef BOOSTAODE_H
#define BOOSTAODE_H
#include "Ensemble.h"
#include "SPODE.h"
namespace bayesnet {
class BoostAODE : public Ensemble {
public:
BoostAODE();
virtual ~BoostAODE() {};
vector<string> graph(const string& title = "BoostAODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override;
private:
torch::Tensor dataset_;
torch::Tensor X_train, y_train, X_test, y_test;
void validationInit();
bool repeatSparent = false;
int maxModels = 0;
bool ascending = false; //Process KBest features ascending or descending order
bool convergence = false; //if true, stop when the model does not improve
};
}
#endif

View File

@@ -1,2 +1,9 @@
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc Mst.cc)
target_link_libraries(BayesNet "${TORCH_LIBRARIES}")
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -5,56 +5,77 @@ namespace bayesnet {
using namespace torch;
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
Classifier& Classifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
Classifier& Classifier::build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights)
{
dataset = torch::cat({ X, y.view({y.size(0), 1}) }, 1);
this->features = features;
this->className = className;
this->states = states;
m = dataset.size(1);
n = dataset.size(0) - 1;
checkFitParameters();
auto n_classes = states[className].size();
auto n_classes = states.at(className).size();
metrics = Metrics(dataset, features, className, n_classes);
train();
if (Xv == vector<vector<int>>()) {
// fit with tensors
model.fit(X, y, features, className);
} else {
// fit with vectors
model.fit(Xv, yv, features, className);
}
model.initialize();
buildModel(weights);
trainModel(weights);
fitted = true;
return *this;
}
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
void Classifier::buildDataset(Tensor& ytmp)
{
this->X = torch::transpose(X, 0, 1);
this->y = y;
Xv = vector<vector<int>>();
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
return build(features, className, states);
}
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, kInt32);
Xv = X;
for (int i = 0; i < X.size(); ++i) {
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32));
try {
auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
dataset = torch::cat({ dataset, yresized }, 0);
}
this->y = torch::tensor(y, kInt32);
yv = y;
return build(features, className, states);
catch (const std::exception& e) {
std::cerr << e.what() << '\n';
cout << "X dimensions: " << dataset.sizes() << "\n";
cout << "y dimensions: " << ytmp.sizes() << "\n";
exit(1);
}
}
void Classifier::trainModel(const torch::Tensor& weights)
{
model.fit(dataset, weights, features, className, states);
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states)
{
dataset = X;
buildDataset(y);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states)
{
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, kInt32);
for (int i = 0; i < X.size(); ++i) {
dataset.index_put_({ i, "..." }, torch::tensor(X[i], kInt32));
}
auto ytmp = torch::tensor(y, kInt32);
buildDataset(ytmp);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states)
{
this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights)
{
this->dataset = dataset;
return build(features, className, states, weights);
}
void Classifier::checkFitParameters()
{
auto sizes = X.sizes();
m = sizes[0];
n = sizes[1];
if (m != y.size(0)) {
throw invalid_argument("X and y must have the same number of samples");
if (torch::is_floating_point(dataset)) {
throw invalid_argument("dataset (X, y) must be of type Integer");
}
if (n != features.size()) {
throw invalid_argument("X and features must have the same number of features");
throw invalid_argument("Classifier: X " + to_string(n) + " and features " + to_string(features.size()) + " must have the same number of features");
}
if (states.find(className) == states.end()) {
throw invalid_argument("className not found in states");
@@ -65,23 +86,12 @@ namespace bayesnet {
}
}
}
Tensor Classifier::predict(Tensor& X)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
}
auto m_ = X.size(0);
auto n_ = X.size(1);
//auto Xt = torch::transpose(X, 0, 1);
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
auto temp = X.index({ "...", i });
Xd[i] = vector<int>(temp.data_ptr<int>(), temp.data_ptr<int>() + temp.numel());
}
auto yp = model.predict(Xd);
auto ypred = torch::tensor(yp, torch::kInt32);
return ypred;
return model.predict(X);
}
vector<int> Classifier::predict(vector<vector<int>>& X)
{
@@ -102,8 +112,7 @@ namespace bayesnet {
if (!fitted) {
throw logic_error("Classifier has not been fitted");
}
auto Xt = torch::transpose(X, 0, 1);
Tensor y_pred = predict(Xt);
Tensor y_pred = predict(X);
return (y_pred == y).sum().item<float>() / y.size(0);
}
float Classifier::score(vector<vector<int>>& X, vector<int>& y)
@@ -111,37 +120,53 @@ namespace bayesnet {
if (!fitted) {
throw logic_error("Classifier has not been fitted");
}
auto m_ = X[0].size();
auto n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
}
return model.score(Xd, y);
return model.score(X, y);
}
vector<string> Classifier::show()
vector<string> Classifier::show() const
{
return model.show();
}
void Classifier::addNodes()
{
// Add all nodes to the network
for (auto feature : features) {
model.addNode(feature, states[feature].size());
for (const auto& feature : features) {
model.addNode(feature);
}
model.addNode(className, states[className].size());
model.addNode(className);
}
int Classifier::getNumberOfNodes()
int Classifier::getNumberOfNodes() const
{
// Features does not include class
return fitted ? model.getFeatures().size() + 1 : 0;
}
int Classifier::getNumberOfEdges()
int Classifier::getNumberOfEdges() const
{
return fitted ? model.getEdges().size() : 0;
return fitted ? model.getNumEdges() : 0;
}
int Classifier::getNumberOfStates()
int Classifier::getNumberOfStates() const
{
return fitted ? model.getStates() : 0;
}
vector<string> Classifier::topological_order()
{
return model.topological_sort();
}
void Classifier::dump_cpt() const
{
model.dump_cpt();
}
void Classifier::checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
void Classifier::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const vector<string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
}
}

View File

@@ -10,36 +10,42 @@ using namespace torch;
namespace bayesnet {
class Classifier : public BaseClassifier {
private:
bool fitted;
Classifier& build(vector<string>& features, string className, map<string, vector<int>>& states);
Classifier& build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights);
protected:
Network model;
bool fitted;
int m, n; // m: number of samples, n: number of features
Tensor X;
vector<vector<int>> Xv;
Tensor y;
vector<int> yv;
Tensor dataset;
Network model;
Metrics metrics;
vector<string> features;
string className;
map<string, vector<int>> states;
Tensor dataset; // (n+1)xm tensor
status_t status = NORMAL;
void checkFitParameters();
virtual void train() = 0;
virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override;
void checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters);
void buildDataset(torch::Tensor& y);
public:
Classifier(Network model);
virtual ~Classifier() = default;
Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) override;
void addNodes();
int getNumberOfNodes() override;
int getNumberOfEdges() override;
int getNumberOfStates() override;
Tensor predict(Tensor& X);
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
Tensor predict(Tensor& X) override;
status_t getStatus() const override { return status; }
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
vector<string> show() override;
vector<string> show() const override;
vector<string> topological_order() override;
void dump_cpt() const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
};
}
#endif

View File

@@ -3,69 +3,55 @@
namespace bayesnet {
using namespace torch;
Ensemble::Ensemble() : m(0), n(0), n_models(0), metrics(Metrics()), fitted(false) {}
Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states)
Ensemble::Ensemble() : Classifier(Network()), n_models(0) {}
void Ensemble::trainModel(const torch::Tensor& weights)
{
dataset = cat({ X, y.view({y.size(0), 1}) }, 1);
this->features = features;
this->className = className;
this->states = states;
auto n_classes = states[className].size();
metrics = Metrics(dataset, features, className, n_classes);
// Build models
train();
// Train models
n_models = models.size();
for (auto i = 0; i < n_models; ++i) {
models[i]->fit(Xv, yv, features, className, states);
// fit with vectors
models[i]->fit(dataset, features, className, states);
}
fitted = true;
return *this;
}
Ensemble& Ensemble::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
vector<int> Ensemble::voting(Tensor& y_pred)
{
this->X = X;
this->y = y;
Xv = vector<vector<int>>();
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
return build(features, className, states);
}
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, kInt32);
Xv = X;
for (int i = 0; i < X.size(); ++i) {
this->X.index_put_({ "...", i }, torch::tensor(X[i], kInt32));
auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final;
int numClasses = states.at(className).size();
// y_pred is m x n_models with the prediction of every model for each sample
for (int i = 0; i < y_pred.size(0); ++i) {
// votes store in each index (value of class) the significance added by each model
// i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
vector<double> votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
votes[y_pred_[i][j]] += significanceModels.at(j);
}
// argsort in descending order
auto indices = argsort(votes);
y_pred_final.push_back(indices[0]);
}
this->y = torch::tensor(y, kInt32);
yv = y;
return build(features, className, states);
return y_pred_final;
}
Tensor Ensemble::predict(Tensor& X)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
}
Tensor y_pred = torch::zeros({ X.size(0), n_models }, kInt32);
Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32);
auto threads{ vector<thread>() };
mutex mtx;
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, models[i]->predict(X));
threads.push_back(thread([&, i]() {
auto ypredict = models[i]->predict(X);
lock_guard<mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict);
}));
}
for (auto& thread : threads) {
thread.join();
}
return torch::tensor(voting(y_pred));
}
vector<int> Ensemble::voting(Tensor& y_pred)
{
auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final;
for (int i = 0; i < y_pred.size(0); ++i) {
vector<float> votes(states[className].size(), 0);
for (int j = 0; j < y_pred.size(1); ++j) {
votes[y_pred_[i][j]] += 1;
}
auto indices = argsort(votes);
y_pred_final.push_back(indices[0]);
}
return y_pred_final;
}
vector<int> Ensemble::predict(vector<vector<int>>& X)
{
if (!fitted) {
@@ -110,9 +96,8 @@ namespace bayesnet {
}
}
return (double)correct / y_pred.size();
}
vector<string> Ensemble::show()
vector<string> Ensemble::show() const
{
auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) {
@@ -121,7 +106,7 @@ namespace bayesnet {
}
return result;
}
vector<string> Ensemble::graph(string title)
vector<string> Ensemble::graph(const string& title) const
{
auto result = vector<string>();
for (auto i = 0; i < n_models; ++i) {
@@ -130,7 +115,7 @@ namespace bayesnet {
}
return result;
}
int Ensemble::getNumberOfNodes()
int Ensemble::getNumberOfNodes() const
{
int nodes = 0;
for (auto i = 0; i < n_models; ++i) {
@@ -138,7 +123,7 @@ namespace bayesnet {
}
return nodes;
}
int Ensemble::getNumberOfEdges()
int Ensemble::getNumberOfEdges() const
{
int edges = 0;
for (auto i = 0; i < n_models; ++i) {
@@ -146,7 +131,7 @@ namespace bayesnet {
}
return edges;
}
int Ensemble::getNumberOfStates()
int Ensemble::getNumberOfStates() const
{
int nstates = 0;
for (auto i = 0; i < n_models; ++i) {

View File

@@ -8,39 +8,34 @@ using namespace std;
using namespace torch;
namespace bayesnet {
class Ensemble : public BaseClassifier {
class Ensemble : public Classifier {
private:
bool fitted;
long n_models;
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
protected:
unsigned n_models;
vector<unique_ptr<Classifier>> models;
int m, n; // m: number of samples, n: number of features
Tensor X;
vector<vector<int>> Xv;
Tensor y;
vector<int> yv;
Tensor dataset;
Metrics metrics;
vector<string> features;
string className;
map<string, vector<int>> states;
void virtual train() = 0;
vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override;
vector<int> voting(Tensor& y_pred);
public:
Ensemble();
virtual ~Ensemble() = default;
Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Ensemble& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Tensor predict(Tensor& X);
Tensor predict(Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
int getNumberOfNodes() override;
int getNumberOfEdges() override;
int getNumberOfStates() override;
vector<string> show() override;
vector<string> graph(string title) override;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
vector<string> show() const override;
vector<string> graph(const string& title) const override;
vector<string> topological_order() override
{
return vector<string>();
}
void dump_cpt() const override
{
}
};
}
#endif

View File

@@ -4,7 +4,19 @@ namespace bayesnet {
using namespace torch;
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::train()
void KDB::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const vector<string> validKeys = { "k", "theta" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("k")) {
k = hyperparameters["k"];
}
if (hyperparameters.contains("theta")) {
theta = hyperparameters["theta"];
}
}
void KDB::buildModel(const torch::Tensor& weights)
{
/*
1. For each feature Xi, compute mutual information, I(X;C),
@@ -27,25 +39,25 @@ namespace bayesnet {
*/
// 1. For each feature Xi, compute mutual information, I(X;C),
// where C is the class.
vector <float> mi;
addNodes();
const Tensor& y = dataset.index({ -1, "..." });
vector<double> mi;
for (auto i = 0; i < features.size(); i++) {
Tensor firstFeature = X.index({ "...", i });
mi.push_back(metrics.mutualInformation(firstFeature, y));
Tensor firstFeature = dataset.index({ i, "..." });
mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
}
// 2. Compute class conditional mutual information I(Xi;XjIC), f or each
auto conditionalEdgeWeights = metrics.conditionalEdge();
auto conditionalEdgeWeights = metrics.conditionalEdge(weights);
// 3. Let the used variable list, S, be empty.
vector<int> S;
// 4. Let the DAG network being constructed, BN, begin with a single
// class node, C.
model.addNode(className, states[className].size());
// 5. Repeat until S includes all domain features
// 5.1. Select feature Xmax which is not in S and has the largest value
// I(Xmax;C).
auto order = argsort(mi);
for (auto idx : order) {
// 5.2. Add a node to BN representing Xmax.
model.addNode(features[idx], states[features[idx]].size());
// 5.3. Add an arc from C to Xmax in BN.
model.addEdge(className, features[idx]);
// 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
@@ -79,11 +91,12 @@ namespace bayesnet {
exit_cond = num == n_edges || candidates.size(0) == 0;
}
}
vector<string> KDB::graph(string title)
vector<string> KDB::graph(const string& title) const
{
string header{ title };
if (title == "KDB") {
title += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")";
header += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")";
}
return model.graph(title);
return model.graph(header);
}
}

View File

@@ -1,5 +1,6 @@
#ifndef KDB_H
#define KDB_H
#include <torch/torch.h>
#include "Classifier.h"
#include "bayesnetUtils.h"
namespace bayesnet {
@@ -11,11 +12,12 @@ namespace bayesnet {
float theta;
void add_m_edges(int idx, vector<int>& S, Tensor& weights);
protected:
void train() override;
void buildModel(const torch::Tensor& weights) override;
public:
explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {};
vector<string> graph(string name = "KDB") override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
vector<string> graph(const string& name = "KDB") const override;
};
}
#endif

30
src/BayesNet/KDBLd.cc Normal file
View File

@@ -0,0 +1,30 @@
#include "KDBLd.h"
namespace bayesnet {
using namespace std;
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
KDB::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor KDBLd::predict(Tensor& X)
{
auto Xt = prepareX(X);
return KDB::predict(Xt);
}
vector<string> KDBLd::graph(const string& name) const
{
return KDB::graph(name);
}
}

19
src/BayesNet/KDBLd.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef KDBLD_H
#define KDBLD_H
#include "KDB.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class KDBLd : public KDB, public Proposal {
private:
public:
explicit KDBLd(int k);
virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !KDBLD_H

View File

@@ -94,7 +94,7 @@ namespace bayesnet {
return result;
}
MST::MST(vector<string>& features, Tensor& weights, int root) : features(features), weights(weights), root(root) {}
MST::MST(const vector<string>& features, const Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
vector<pair<int, int>> MST::maximumSpanningTree()
{
auto num_features = features.size();

View File

@@ -13,7 +13,7 @@ namespace bayesnet {
int root = 0;
public:
MST() = default;
MST(vector<string>& features, Tensor& weights, int root);
MST(const vector<string>& features, const Tensor& weights, const int root);
vector<pair<int, int>> maximumSpanningTree();
};
class Graph {

View File

@@ -3,15 +3,24 @@
#include "Network.h"
#include "bayesnetUtils.h"
namespace bayesnet {
Network::Network() : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(0.8), fitted(false) {}
Network::Network(float maxT) : laplaceSmoothing(1), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.getmaxThreads()), fitted(other.fitted)
Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
getmaxThreads()), fitted(other.fitted)
{
for (const auto& pair : other.nodes) {
nodes[pair.first] = std::make_unique<Node>(*pair.second);
}
}
void Network::initialize()
{
features = vector<string>();
className = "";
classNumStates = 0;
fitted = false;
nodes.clear();
samples = torch::Tensor();
}
float Network::getmaxThreads()
{
return maxThreads;
@@ -20,28 +29,28 @@ namespace bayesnet {
{
return samples;
}
void Network::addNode(const string& name, int numStates)
void Network::addNode(const string& name)
{
if (name == "") {
throw invalid_argument("Node name cannot be empty");
}
if (nodes.find(name) != nodes.end()) {
return;
}
if (find(features.begin(), features.end(), name) == features.end()) {
features.push_back(name);
}
if (nodes.find(name) != nodes.end()) {
// if node exists update its number of states and remove parents, children and CPT
nodes[name]->clear();
nodes[name]->setNumStates(numStates);
return;
}
nodes[name] = std::make_unique<Node>(name, numStates);
nodes[name] = std::make_unique<Node>(name);
}
vector<string> Network::getFeatures()
vector<string> Network::getFeatures() const
{
return features;
}
int Network::getClassNumStates()
int Network::getClassNumStates() const
{
return classNumStates;
}
int Network::getStates()
int Network::getStates() const
{
int result = 0;
for (auto& node : nodes) {
@@ -49,7 +58,7 @@ namespace bayesnet {
}
return result;
}
string Network::getClassName()
string Network::getClassName() const
{
return className;
}
@@ -94,83 +103,121 @@ namespace bayesnet {
{
return nodes;
}
void Network::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& featureNames, const string& className)
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights)
{
features = featureNames;
this->className = className;
dataset.clear();
// Specific part
classNumStates = torch::max(y).item<int>() + 1;
samples = torch::cat({ X, y.view({ y.size(0), 1 }) }, 1);
for (int i = 0; i < featureNames.size(); ++i) {
auto column = torch::flatten(X.index({ "...", i }));
auto k = vector<int>();
for (auto z = 0; z < X.size(0); ++z) {
k.push_back(column[z].item<int>());
if (weights.size(0) != n_samples) {
throw invalid_argument("Weights (" + to_string(weights.size(0)) + ") must have the same number of elements as samples (" + to_string(n_samples) + ") in Network::fit");
}
if (n_samples != n_samples_y) {
throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")");
}
if (n_features != featureNames.size()) {
throw invalid_argument("X and features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(featureNames.size()) + ")");
}
if (n_features != features.size() - 1) {
throw invalid_argument("X and local features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(features.size() - 1) + ")");
}
if (find(features.begin(), features.end(), className) == features.end()) {
throw invalid_argument("className not found in Network::features");
}
for (auto& feature : featureNames) {
if (find(features.begin(), features.end(), feature) == features.end()) {
throw invalid_argument("Feature " + feature + " not found in Network::features");
}
if (states.find(feature) == states.end()) {
throw invalid_argument("Feature " + feature + " not found in states");
}
dataset[featureNames[i]] = k;
}
dataset[className] = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
completeFit();
}
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className)
void Network::setStates(const map<string, vector<int>>& states)
{
features = featureNames;
// Set states to every Node in the network
for_each(features.begin(), features.end(), [this, &states](const string& feature) {
nodes.at(feature)->setNumStates(states.at(feature).size());
});
classNumStates = nodes.at(className)->getNumStates();
}
// X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
{
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className;
dataset.clear();
// Specific part
classNumStates = *max_element(labels.begin(), labels.end()) + 1;
// Build dataset & tensor of samples
samples = torch::zeros({ static_cast<int>(input_data[0].size()), static_cast<int>(input_data.size() + 1) }, torch::kInt32);
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X , ytmp }, 0);
for (int i = 0; i < featureNames.size(); ++i) {
dataset[featureNames[i]] = input_data[i];
samples.index_put_({ "...", i }, torch::tensor(input_data[i], torch::kInt32));
auto row_feature = X.index({ i, "..." });
}
dataset[className] = labels;
samples.index_put_({ "...", -1 }, torch::tensor(labels, torch::kInt32));
completeFit();
completeFit(states, weights);
}
void Network::completeFit()
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
{
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
if (maxThreadsRunning < 1) {
maxThreadsRunning = 1;
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className;
this->samples = samples;
completeFit(states, weights);
}
// input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights_, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
{
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
this->className = className;
// Build tensor of samples (nxm) (n+1 because of the class)
samples = torch::zeros({ static_cast<int>(input_data.size() + 1), static_cast<int>(input_data[0].size()) }, torch::kInt32);
for (int i = 0; i < featureNames.size(); ++i) {
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
}
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(states, weights);
}
void Network::completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights)
{
setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
vector<thread> threads;
mutex mtx;
condition_variable cv;
int activeThreads = 0;
int nextNodeIndex = 0;
while (nextNodeIndex < nodes.size()) {
unique_lock<mutex> lock(mtx);
cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() {
while (true) {
unique_lock<mutex> lock(mtx);
if (nextNodeIndex >= nodes.size()) {
break; // No more work remaining
}
auto& pair = *std::next(nodes.begin(), nextNodeIndex);
++nextNodeIndex;
lock.unlock();
pair.second->computeCPT(dataset, laplaceSmoothing);
lock.lock();
nodes[pair.first] = std::move(pair.second);
lock.unlock();
}
lock_guard<mutex> lock(mtx);
--activeThreads;
cv.notify_one();
for (auto& node : nodes) {
threads.emplace_back([this, &node, &weights]() {
node.second->computeCPT(samples, features, laplaceSmoothing, weights);
});
++activeThreads;
}
for (auto& thread : threads) {
thread.join();
}
fitted = true;
}
torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict()");
}
torch::Tensor result;
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
for (int i = 0; i < samples.size(1); ++i) {
const Tensor sample = samples.index({ "...", i });
auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64);
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
result.index_put_({ i, "..." }, temp);
}
if (proba)
return result;
else
return result.argmax(1);
}
// Return mxn tensor of probabilities
Tensor Network::predict_proba(const Tensor& samples)
{
return predict_tensor(samples, true);
}
// Return mxn tensor of probabilities
Tensor Network::predict(const Tensor& samples)
{
return predict_tensor(samples, false);
}
// Return mx1 vector of predictions
// tsamples is nxm vector of samples
vector<int> Network::predict(const vector<vector<int>>& tsamples)
{
if (!fitted) {
@@ -191,6 +238,7 @@ namespace bayesnet {
}
return predictions;
}
// Return mxn vector of probabilities
vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
{
if (!fitted) {
@@ -218,12 +266,13 @@ namespace bayesnet {
}
return (double)correct / y_pred.size();
}
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const vector<int>& sample)
{
// Ensure the sample size is equal to the number of features
if (sample.size() != features.size()) {
if (sample.size() != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size()) +
") does not match the number of features (" + to_string(features.size()) + ")");
") does not match the number of features (" + to_string(features.size() - 1) + ")");
}
map<string, int> evidence;
for (int i = 0; i < sample.size(); ++i) {
@@ -231,6 +280,20 @@ namespace bayesnet {
}
return exactInference(evidence);
}
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const Tensor& sample)
{
// Ensure the sample size is equal to the number of features
if (sample.size(0) != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size(0)) +
") does not match the number of features (" + to_string(features.size() - 1) + ")");
}
map<string, int> evidence;
for (int i = 0; i < sample.size(0); ++i) {
evidence[features[i]] = sample[i].item<int>();
}
return exactInference(evidence);
}
double Network::computeFactor(map<string, int>& completeEvidence)
{
double result = 1.0;
@@ -256,13 +319,12 @@ namespace bayesnet {
for (auto& thread : threads) {
thread.join();
}
// Normalize result
double sum = accumulate(result.begin(), result.end(), 0.0);
transform(result.begin(), result.end(), result.begin(), [sum](double& value) { return value / sum; });
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result;
}
vector<string> Network::show()
vector<string> Network::show() const
{
vector<string> result;
// Draw the network
@@ -275,7 +337,7 @@ namespace bayesnet {
}
return result;
}
vector<string> Network::graph(const string& title)
vector<string> Network::graph(const string& title) const
{
auto output = vector<string>();
auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
@@ -289,7 +351,7 @@ namespace bayesnet {
output.push_back("}\n");
return output;
}
vector<pair<string, string>> Network::getEdges()
vector<pair<string, string>> Network::getEdges() const
{
auto edges = vector<pair<string, string>>();
for (const auto& node : nodes) {
@@ -301,4 +363,52 @@ namespace bayesnet {
}
return edges;
}
int Network::getNumEdges() const
{
return getEdges().size();
}
vector<string> Network::topological_sort()
{
/* Check if al the fathers of every node are before the node */
auto result = features;
result.erase(remove(result.begin(), result.end(), className), result.end());
bool ending{ false };
while (!ending) {
ending = true;
for (auto feature : features) {
auto fathers = nodes[feature]->getParents();
for (const auto& father : fathers) {
auto fatherName = father->getName();
if (fatherName == className) {
continue;
}
// Check if father is placed before the actual feature
auto it = find(result.begin(), result.end(), fatherName);
if (it != result.end()) {
auto it2 = find(result.begin(), result.end(), feature);
if (it2 != result.end()) {
if (distance(it, it2) < 0) {
// if it is not, insert it before the feature
result.erase(remove(result.begin(), result.end(), fatherName), result.end());
result.insert(it2, fatherName);
ending = false;
}
} else {
throw logic_error("Error in topological sort because of node " + feature + " is not in result");
}
} else {
throw logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
}
}
}
}
return result;
}
void Network::dump_cpt() const
{
for (auto& node : nodes) {
cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl;
cout << node.second->getCPT() << endl;
}
}
}

View File

@@ -8,48 +8,52 @@ namespace bayesnet {
class Network {
private:
map<string, unique_ptr<Node>> nodes;
map<string, vector<int>> dataset;
bool fitted;
float maxThreads;
float maxThreads = 0.95;
int classNumStates;
vector<string> features;
vector<string> features; // Including classname
string className;
int laplaceSmoothing;
torch::Tensor samples;
double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
vector<double> predict_sample(const vector<int>&);
vector<double> predict_sample(const torch::Tensor&);
vector<double> exactInference(map<string, int>&);
double computeFactor(map<string, int>&);
double mutual_info(torch::Tensor&, torch::Tensor&);
double entropy(torch::Tensor&);
double conditionalEntropy(torch::Tensor&, torch::Tensor&);
double mutualInformation(torch::Tensor&, torch::Tensor&);
void completeFit();
void completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights);
void setStates(const map<string, vector<int>>&);
public:
Network();
explicit Network(float, int);
explicit Network(float);
explicit Network(Network&);
~Network() = default;
torch::Tensor& getSamples();
float getmaxThreads();
void addNode(const string&, int);
void addNode(const string&);
void addEdge(const string&, const string&);
map<string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures();
int getStates();
vector<pair<string, string>> getEdges();
int getClassNumStates();
string getClassName();
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
void fit(torch::Tensor&, torch::Tensor&, const vector<string>&, const string&);
vector<int> predict(const vector<vector<int>>&);
//Computes the conditional edge weight of variable index u and v conditioned on class_node
torch::Tensor conditionalEdgeWeight();
vector<vector<double>> predict_proba(const vector<vector<int>>&);
vector<string> getFeatures() const;
int getStates() const;
vector<pair<string, string>> getEdges() const;
int getNumEdges() const;
int getClassNumStates() const;
string getClassName() const;
void fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
vector<int> predict(const vector<vector<int>>&); // Return mx1 vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
vector<vector<double>> predict_proba(const vector<vector<int>>&); // Return mxn vector of probabilities
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const vector<vector<int>>&, const vector<int>&);
vector<string> show();
vector<string> graph(const string& title); // Returns a vector of strings representing the graph in graphviz format
inline string version() { return "0.1.0"; }
vector<string> topological_sort();
vector<string> show() const;
vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format
void initialize();
void dump_cpt() const;
inline string version() { return "0.2.0"; }
};
}
#endif

View File

@@ -2,8 +2,8 @@
namespace bayesnet {
Node::Node(const std::string& name, int numStates)
: name(name), numStates(numStates), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
Node::Node(const std::string& name)
: name(name), numStates(0), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
{
}
void Node::clear()
@@ -84,8 +84,9 @@ namespace bayesnet {
}
return result;
}
void Node::computeCPT(map<string, vector<int>>& dataset, const int laplaceSmoothing)
void Node::computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
{
dimensions.clear();
// Get dimensions of the CPT
dimensions.push_back(numStates);
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
@@ -93,22 +94,34 @@ namespace bayesnet {
// Create a tensor of zeros with the dimensions of the CPT
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
// Fill table with counts
for (int n_sample = 0; n_sample < dataset[name].size(); ++n_sample) {
torch::List<c10::optional<torch::Tensor>> coordinates;
coordinates.push_back(torch::tensor(dataset[name][n_sample]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&dataset, &n_sample](const auto& parent) { return torch::tensor(dataset[parent->getName()][n_sample]); });
auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) {
throw logic_error("Feature " + name + " not found in dataset");
}
int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
c10::List<c10::optional<at::Tensor>> coordinates;
coordinates.push_back(dataset.index({ name_index, n_sample }));
for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName());
if (pos == features.end()) {
throw logic_error("Feature parent " + parent->getName() + " not found in dataset");
}
int parent_index = pos - features.begin();
coordinates.push_back(dataset.index({ parent_index, n_sample }));
}
// Increment the count of the corresponding coordinate
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + 1);
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>());
}
// Normalize the counts
cpTable = cpTable / cpTable.sum(0);
}
float Node::getFactorValue(map<string, int>& evidence)
{
torch::List<c10::optional<torch::Tensor>> coordinates;
c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(torch::tensor(evidence[name]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return torch::tensor(evidence[parent->getName()]); });
coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<float>();
}
vector<string> Node::graph(const string& className)

View File

@@ -16,7 +16,7 @@ namespace bayesnet {
vector<int64_t> dimensions; // dimensions of the cpTable
public:
vector<pair<string, string>> combinations(const vector<string>&);
Node(const string&, int);
explicit Node(const string&);
void clear();
void addParent(Node*);
void addChild(Node*);
@@ -26,7 +26,7 @@ namespace bayesnet {
vector<Node*>& getParents();
vector<Node*>& getChildren();
torch::Tensor& getCPT();
void computeCPT(map<string, vector<int>>&, const int);
void computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
int getNumStates() const;
void setNumStates(int);
unsigned minFill();

110
src/BayesNet/Proposal.cc Normal file
View File

@@ -0,0 +1,110 @@
#include "Proposal.h"
#include "ArffFiles.h"
namespace bayesnet {
Proposal::Proposal(torch::Tensor& dataset_, vector<string>& features_, string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
Proposal::~Proposal()
{
for (auto& [key, value] : discretizers) {
delete value;
}
}
void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
{
if (!torch::is_floating_point(X)) {
throw std::invalid_argument("X must be a floating point tensor");
}
if (torch::is_floating_point(y)) {
throw std::invalid_argument("y must be an integer tensor");
}
}
map<string, vector<int>> Proposal::localDiscretizationProposal(const map<string, vector<int>>& oldStates, Network& model)
{
// order of local discretization is important. no good 0, 1, 2...
// although we rediscretize features after the local discretization of every feature
auto order = model.topological_sort();
auto& nodes = model.getNodes();
map<string, vector<int>> states = oldStates;
vector<int> indicesToReDiscretize;
bool upgrade = false; // Flag to check if we need to upgrade the model
for (auto feature : order) {
auto nodeParents = nodes[feature]->getParents();
if (nodeParents.size() < 2) continue; // Only has class as parent
upgrade = true;
int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin();
indicesToReDiscretize.push_back(index); // We need to re-discretize this feature
vector<string> parents;
transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); });
// Remove class as parent as it will be added later
parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end());
// Get the indices of the parents
vector<int> indices;
indices.push_back(-1); // Add class index
transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); });
// Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y)
vector<string> yJoinParents(Xf.size(1));
for (auto idx : indices) {
for (int i = 0; i < Xf.size(1); ++i) {
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
}
}
auto arff = ArffFiles();
auto yxv = arff.factorize(yJoinParents);
auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
auto xvf = vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
discretizers[feature]->fit(xvf, yxv);
}
if (upgrade) {
// Discretize again X (only the affected indices) with the new fitted discretizers
for (auto index : indicesToReDiscretize) {
auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt)));
auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
//Update new states of the feature/node
states[pFeatures[index]] = xStates;
}
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
model.fit(pDataset, weights, pFeatures, pClassName, states);
}
return states;
}
map<string, vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
{
// Discretize the continuous input data and build pDataset (Classifier::dataset)
int m = Xf.size(1);
int n = Xf.size(0);
map<string, vector<int>> states;
pDataset = torch::zeros({ n + 1, m }, kInt32);
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// discretize input data by feature(row)
for (auto i = 0; i < pFeatures.size(); ++i) {
auto* discretizer = new mdlp::CPPFImdlp();
auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
discretizer->fit(Xt, yv);
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
auto xStates = vector<int>(discretizer->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
states[pFeatures[i]] = xStates;
discretizers[pFeatures[i]] = discretizer;
}
int n_classes = torch::max(y).item<int>() + 1;
auto yStates = vector<int>(n_classes);
iota(yStates.begin(), yStates.end(), 0);
states[pClassName] = yStates;
pDataset.index_put_({ n, "..." }, y);
return states;
}
torch::Tensor Proposal::prepareX(torch::Tensor& X)
{
auto Xtd = torch::zeros_like(X, torch::kInt32);
for (int i = 0; i < X.size(0); ++i) {
auto Xt = vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xd = discretizers[pFeatures[i]]->transform(Xt);
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
}
return Xtd;
}
}

30
src/BayesNet/Proposal.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef PROPOSAL_H
#define PROPOSAL_H
#include <string>
#include <map>
#include <torch/torch.h>
#include "Network.h"
#include "CPPFImdlp.h"
#include "Classifier.h"
namespace bayesnet {
class Proposal {
public:
Proposal(torch::Tensor& pDataset, vector<string>& features_, string& className_);
virtual ~Proposal();
protected:
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
torch::Tensor prepareX(torch::Tensor& X);
map<string, vector<int>> localDiscretizationProposal(const map<string, vector<int>>& states, Network& model);
map<string, vector<int>> fit_local_discretization(const torch::Tensor& y);
torch::Tensor Xf; // X continuous nxm tensor
torch::Tensor y; // y discrete nx1 tensor
map<string, mdlp::CPPFImdlp*> discretizers;
private:
torch::Tensor& pDataset; // (n+1)xm tensor
vector<string>& pFeatures;
string& pClassName;
};
}
#endif

View File

@@ -4,7 +4,7 @@ namespace bayesnet {
SPODE::SPODE(int root) : Classifier(Network()), root(root) {}
void SPODE::train()
void SPODE::buildModel(const torch::Tensor& weights)
{
// 0. Add all nodes to the model
addNodes();
@@ -17,7 +17,7 @@ namespace bayesnet {
}
}
}
vector<string> SPODE::graph(string name )
vector<string> SPODE::graph(const string& name) const
{
return model.graph(name);
}

View File

@@ -7,11 +7,11 @@ namespace bayesnet {
private:
int root;
protected:
void train() override;
void buildModel(const torch::Tensor& weights) override;
public:
explicit SPODE(int root);
virtual ~SPODE() {};
vector<string> graph(string name = "SPODE") override;
vector<string> graph(const string& name = "SPODE") const override;
};
}
#endif

48
src/BayesNet/SPODELd.cc Normal file
View File

@@ -0,0 +1,48 @@
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
SPODELd& SPODELd::fit(torch::Tensor& dataset, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{
if (!torch::is_floating_point(dataset)) {
throw std::runtime_error("Dataset must be a floating point tensor");
}
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
y = dataset.index({ -1, "..." }).clone();
features = features_;
className = className_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor SPODELd::predict(Tensor& X)
{
auto Xt = prepareX(X);
return SPODE::predict(Xt);
}
vector<string> SPODELd::graph(const string& name) const
{
return SPODE::graph(name);
}
}

19
src/BayesNet/SPODELd.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef SPODELD_H
#define SPODELD_H
#include "SPODE.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class SPODELd : public SPODE, public Proposal {
public:
explicit SPODELd(int root);
virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
SPODELd& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !SPODELD_H

View File

@@ -5,25 +5,25 @@ namespace bayesnet {
TAN::TAN() : Classifier(Network()) {}
void TAN::train()
void TAN::buildModel(const torch::Tensor& weights)
{
// 0. Add all nodes to the model
addNodes();
// 1. Compute mutual information between each feature and the class and set the root node
// as the highest mutual information with the class
auto mi = vector <pair<int, float >>();
Tensor class_dataset = dataset.index({ "...", -1 });
Tensor class_dataset = dataset.index({ -1, "..." });
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
Tensor feature_dataset = dataset.index({ "...", i });
auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset);
Tensor feature_dataset = dataset.index({ i, "..." });
auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights);
mi.push_back({ i, mi_value });
}
sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;});
auto root = mi[mi.size() - 1].first;
// 2. Compute mutual information between each feature and the class
auto weights = metrics.conditionalEdge();
auto weights_matrix = metrics.conditionalEdge(weights);
// 3. Compute the maximum spanning tree
auto mst = metrics.maximumSpanningTree(features, weights, root);
auto mst = metrics.maximumSpanningTree(features, weights_matrix, root);
// 4. Add edges from the maximum spanning tree to the model
for (auto i = 0; i < mst.size(); ++i) {
auto [from, to] = mst[i];
@@ -34,7 +34,7 @@ namespace bayesnet {
model.addEdge(className, feature);
}
}
vector<string> TAN::graph(string title)
vector<string> TAN::graph(const string& title) const
{
return model.graph(title);
}

View File

@@ -3,15 +3,14 @@
#include "Classifier.h"
namespace bayesnet {
using namespace std;
using namespace torch;
class TAN : public Classifier {
private:
protected:
void train() override;
void buildModel(const torch::Tensor& weights) override;
public:
TAN();
virtual ~TAN() {};
vector<string> graph(string name = "TAN") override;
vector<string> graph(const string& name = "TAN") const override;
};
}
#endif

31
src/BayesNet/TANLd.cc Normal file
View File

@@ -0,0 +1,31 @@
#include "TANLd.h"
namespace bayesnet {
using namespace std;
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
TAN::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor TANLd::predict(Tensor& X)
{
auto Xt = prepareX(X);
return TAN::predict(Xt);
}
vector<string> TANLd::graph(const string& name) const
{
return TAN::graph(name);
}
}

19
src/BayesNet/TANLd.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef TANLD_H
#define TANLD_H
#include "TAN.h"
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class TANLd : public TAN, public Proposal {
private:
public:
TANLd();
virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
};
}
#endif // !TANLD_H

View File

@@ -3,7 +3,8 @@
namespace bayesnet {
using namespace std;
using namespace torch;
vector<int> argsort(vector<float>& nums)
// Return the indices in descending order
vector<int> argsort(vector<double>& nums)
{
int n = nums.size();
vector<int> indices(n);

View File

@@ -5,7 +5,7 @@
namespace bayesnet {
using namespace std;
using namespace torch;
vector<int> argsort(vector<float>& nums);
vector<int> argsort(vector<double>& nums);
vector<vector<int>> tensorToVector(Tensor& tensor);
}
#endif //BAYESNET_UTILS_H

305
src/Platform/BestResults.cc Normal file
View File

@@ -0,0 +1,305 @@
#include <filesystem>
#include <fstream>
#include <iostream>
#include <sstream>
#include <set>
#include "BestResults.h"
#include "Result.h"
#include "Colors.h"
namespace fs = std::filesystem;
// function ftime_to_string, Code taken from
// https://stackoverflow.com/a/58237530/1389271
template <typename TP>
std::string ftime_to_string(TP tp)
{
using namespace std::chrono;
auto sctp = time_point_cast<system_clock::duration>(tp - TP::clock::now()
+ system_clock::now());
auto tt = system_clock::to_time_t(sctp);
std::tm* gmt = std::gmtime(&tt);
std::stringstream buffer;
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
return buffer.str();
}
namespace platform {
string BestResults::build()
{
auto files = loadResultFiles();
if (files.size() == 0) {
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
exit(1);
}
json bests;
for (const auto& file : files) {
auto result = Result(path, file);
auto data = result.load();
for (auto const& item : data.at("results")) {
bool update = false;
if (bests.contains(item.at("dataset").get<string>())) {
if (item.at("score").get<double>() > bests[item.at("dataset").get<string>()].at(0).get<double>()) {
update = true;
}
} else {
update = true;
}
if (update) {
bests[item.at("dataset").get<string>()] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
}
}
}
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl;
}
ofstream file(bestFileName);
file << bests;
file.close();
return bestFileName;
}
string BestResults::bestResultFile()
{
return "best_results_" + score + "_" + model + ".json";
}
pair<string, string> getModelScore(string name)
{
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
int i = 0;
auto pos = name.find("_");
auto pos2 = name.find("_", pos + 1);
string score = name.substr(pos + 1, pos2 - pos - 1);
pos = name.find("_", pos2 + 1);
string model = name.substr(pos2 + 1, pos - pos2 - 1);
return { model, score };
}
vector<string> BestResults::loadResultFiles()
{
vector<string> files;
using std::filesystem::directory_iterator;
string fileModel, fileScore;
for (const auto& file : directory_iterator(path)) {
auto fileName = file.path().filename().string();
if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) {
tie(fileModel, fileScore) = getModelScore(fileName);
if (score == fileScore && (model == fileModel || model == "any")) {
files.push_back(fileName);
}
}
}
return files;
}
json BestResults::loadFile(const string& fileName)
{
ifstream resultData(fileName);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + fileName + "]");
}
set<string> BestResults::getModels()
{
set<string> models;
auto files = loadResultFiles();
if (files.size() == 0) {
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
exit(1);
}
string fileModel, fileScore;
for (const auto& file : files) {
// extract the model from the file name
tie(fileModel, fileScore) = getModelScore(file);
// add the model to the vector of models
models.insert(fileModel);
}
return models;
}
void BestResults::buildAll()
{
auto models = getModels();
for (const auto& model : models) {
cout << "Building best results for model: " << model << endl;
this->model = model;
build();
}
model = "any";
}
void BestResults::reportSingle()
{
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
exit(1);
}
auto date = ftime_to_string(filesystem::last_write_time(bestFileName));
auto data = loadFile(bestFileName);
cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
cout << "--------------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset Score File Hyperparameters" << endl;
cout << "=== ========================= =========== ================================================================== ================================================= " << endl;
auto i = 0;
bool odd = true;
for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
cout << setw(66) << item.value().at(2).get<string>() << " ";
cout << item.value().at(1) << " ";
cout << endl;
odd = !odd;
}
}
json BestResults::buildTableResults(set<string> models)
{
int numberOfDatasets = 0;
bool first = true;
json origin;
json table;
auto maxDate = filesystem::file_time_type::max();
for (const auto& model : models) {
this->model = model;
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
exit(1);
}
auto dateWrite = filesystem::last_write_time(bestFileName);
if (dateWrite < maxDate) {
maxDate = dateWrite;
}
auto data = loadFile(bestFileName);
if (first) {
// Get the number of datasets of the first file and check that is the same for all the models
first = false;
numberOfDatasets = data.size();
origin = data;
} else {
if (numberOfDatasets != data.size()) {
cerr << Colors::MAGENTA() << "The number of datasets in the best results files is not the same for all the models." << Colors::RESET() << endl;
exit(1);
}
}
table[model] = data;
}
table["dateTable"] = ftime_to_string(maxDate);
return table;
}
void BestResults::printTableResults(set<string> models, json table)
{
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
cout << "------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset ";
for (const auto& model : models) {
cout << setw(12) << left << model << " ";
}
cout << endl;
cout << "=== ========================= ";
for (const auto& model : models) {
cout << "============ ";
}
cout << endl;
auto i = 0;
bool odd = true;
map<string, double> totals;
map<string, int> ranks;
for (const auto& model : models) {
totals[model] = 0.0;
}
json origin = table.begin().value();
for (auto const& item : origin.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
double maxValue = 0;
vector<pair<string, double>> ranksOrder;
// Find out the max value for this dataset
for (const auto& model : models) {
double value = table[model].at(item.key()).at(0).get<double>();
if (value > maxValue) {
maxValue = value;
}
ranksOrder.push_back({ model, value });
}
// sort the ranksOrder vector by value
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
return a.second > b.second;
});
// Assign the ranks
for (int i = 0; i < ranksOrder.size(); i++) {
ranks[ranksOrder[i].first] = i + 1;
}
// Print the row with red colors on max values
for (const auto& model : models) {
string efectiveColor = color;
double value = table[model].at(item.key()).at(0).get<double>();
if (value == maxValue) {
efectiveColor = Colors::RED();
}
totals[model] += value;
cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
}
cout << endl;
odd = !odd;
}
cout << Colors::GREEN() << "=== ========================= ";
for (const auto& model : models) {
cout << "============ ";
}
cout << endl;
cout << Colors::GREEN() << setw(30) << " Totals...................";
double max = 0.0;
for (const auto& total : totals) {
if (total.second > max) {
max = total.second;
}
}
for (const auto& model : models) {
string efectiveColor = Colors::GREEN();
if (totals[model] == max) {
efectiveColor = Colors::RED();
}
cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
}
// Output the averaged ranks
cout << endl;
int min = 1;
for (const auto& rank : ranks) {
if (rank.second < min) {
min = rank.second;
}
}
cout << Colors::GREEN() << setw(30) << " Averaged ranks...........";
for (const auto& model : models) {
string efectiveColor = Colors::GREEN();
if (ranks[model] == min) {
efectiveColor = Colors::RED();
}
cout << efectiveColor << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " ";
}
cout << endl;
}
void BestResults::reportAll()
{
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
// Print the table of results
printTableResults(models, table);
}
}

View File

@@ -0,0 +1,28 @@
#ifndef BESTRESULTS_H
#define BESTRESULTS_H
#include <string>
#include <set>
#include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json;
namespace platform {
class BestResults {
public:
explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {}
string build();
void reportSingle();
void reportAll();
void buildAll();
private:
set<string> getModels();
vector<string> loadResultFiles();
json buildTableResults(set<string> models);
void printTableResults(set<string> models, json table);
string bestResultFile();
json loadFile(const string& fileName);
string path;
string score;
string model;
};
}
#endif //BESTRESULTS_H

10
src/Platform/BestScore.h Normal file
View File

@@ -0,0 +1,10 @@
#ifndef BESTSCORE_H
#define BESTSCORE_H
#include <string>
class BestScore {
public:
static std::string title() { return "STree_default (linear-ovo)"; }
static double score() { return 22.109799; }
static std::string scoreName() { return "accuracy"; }
};
#endif

View File

@@ -4,5 +4,16 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc)
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
add_executable(list list.cc platformUtils Datasets.cc)
add_executable(best best.cc BestResults.cc Result.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
target_link_libraries(best stdc++fs)
else()
target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
endif()
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")

14
src/Platform/Colors.h Normal file
View File

@@ -0,0 +1,14 @@
#ifndef COLORS_H
#define COLORS_H
class Colors {
public:
static std::string MAGENTA() { return "\033[1;35m"; }
static std::string BLUE() { return "\033[1;34m"; }
static std::string CYAN() { return "\033[1;36m"; }
static std::string GREEN() { return "\033[1;32m"; }
static std::string YELLOW() { return "\033[1;33m"; }
static std::string RED() { return "\033[1;31m"; }
static std::string WHITE() { return "\033[1;37m"; }
static std::string RESET() { return "\033[0m"; }
};
#endif // COLORS_H

View File

@@ -1,6 +1,7 @@
#include "Datasets.h"
#include "platformUtils.h"
#include "ArffFiles.h"
#include <fstream>
namespace platform {
void Datasets::load()
{
@@ -24,75 +25,110 @@ namespace platform {
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result;
}
vector<string> Datasets::getFeatures(string name)
vector<string> Datasets::getFeatures(const string& name) const
{
if (datasets[name]->isLoaded()) {
return datasets[name]->getFeatures();
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getFeatures();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
map<string, vector<int>> Datasets::getStates(string name)
map<string, vector<int>> Datasets::getStates(const string& name) const
{
if (datasets[name]->isLoaded()) {
return datasets[name]->getStates();
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getStates();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
string Datasets::getClassName(string name)
void Datasets::loadDataset(const string& name) const
{
if (datasets[name]->isLoaded()) {
return datasets[name]->getClassName();
if (datasets.at(name)->isLoaded()) {
return;
} else {
datasets.at(name)->load();
}
}
string Datasets::getClassName(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getClassName();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNSamples(string name)
int Datasets::getNSamples(const string& name) const
{
if (datasets[name]->isLoaded()) {
return datasets[name]->getNSamples();
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getNSamples();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(string name)
int Datasets::getNClasses(const string& name)
{
if (datasets.at(name)->isLoaded()) {
auto className = datasets.at(name)->getClassName();
if (discretize) {
auto states = getStates(name);
return states.at(className).size();
}
auto [Xv, yv] = getVectors(name);
return *max_element(yv.begin(), yv.end()) + 1;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
vector<int> Datasets::getClassesCounts(const string& name) const
{
if (datasets.at(name)->isLoaded()) {
auto [Xv, yv] = datasets.at(name)->getVectors();
vector<int> counts(*max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) {
counts[y]++;
}
return counts;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(const string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectors();
}
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(string name)
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(const string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectorsDiscretized();
}
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(string name)
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getTensors();
}
bool Datasets::isDataset(const string& name)
bool Datasets::isDataset(const string& name) const
{
return datasets.find(name) != datasets.end();
}
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{
}
string Dataset::getName()
string Dataset::getName() const
{
return name;
}
string Dataset::getClassName()
string Dataset::getClassName() const
{
return className;
}
vector<string> Dataset::getFeatures()
vector<string> Dataset::getFeatures() const
{
if (loaded) {
return features;
@@ -100,7 +136,7 @@ namespace platform {
throw invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNFeatures()
int Dataset::getNFeatures() const
{
if (loaded) {
return n_features;
@@ -108,7 +144,7 @@ namespace platform {
throw invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNSamples()
int Dataset::getNSamples() const
{
if (loaded) {
return n_samples;
@@ -116,7 +152,7 @@ namespace platform {
throw invalid_argument("Dataset not loaded.");
}
}
map<string, vector<int>> Dataset::getStates()
map<string, vector<int>> Dataset::getStates() const
{
if (loaded) {
return states;
@@ -177,10 +213,11 @@ namespace platform {
{
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
}
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states[className]), end(states[className]), 0);
iota(begin(states.at(className)), end(states.at(className)), 0);
}
void Dataset::load_arff()
{
@@ -207,9 +244,9 @@ namespace platform {
if (discretize) {
Xd = discretizeDataset(Xv, yv);
computeStates();
n_samples = Xd[0].size();
n_features = Xd.size();
}
n_samples = Xv[0].size();
n_features = Xv.size();
loaded = true;
}
void Dataset::buildTensors()

View File

@@ -29,15 +29,15 @@ namespace platform {
public:
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&);
string getName();
string getClassName();
vector<string> getFeatures();
map<string, vector<int>> getStates();
string getName() const;
string getClassName() const;
vector<string> getFeatures() const;
map<string, vector<int>> getStates() const;
pair<vector<vector<float>>&, vector<int>&> getVectors();
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures();
int getNSamples();
int getNFeatures() const;
int getNSamples() const;
void load();
const bool inline isLoaded() const { return loaded; };
};
@@ -51,14 +51,17 @@ namespace platform {
public:
explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
vector<string> getNames();
vector<string> getFeatures(string name);
int getNSamples(string name);
string getClassName(string name);
map<string, vector<int>> getStates(string name);
pair<vector<vector<float>>&, vector<int>&> getVectors(string name);
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(string name);
pair<torch::Tensor&, torch::Tensor&> getTensors(string name);
bool isDataset(const string& name);
vector<string> getFeatures(const string& name) const;
int getNSamples(const string& name) const;
string getClassName(const string& name) const;
int getNClasses(const string& name);
vector<int> getClassesCounts(const string& name) const;
map<string, vector<int>> getStates(const string& name) const;
pair<vector<vector<float>>&, vector<int>&> getVectors(const string& name);
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(const string& name);
pair<torch::Tensor&, torch::Tensor&> getTensors(const string& name);
bool isDataset(const string& name) const;
void loadDataset(const string& name) const;
};
};

View File

@@ -1,7 +1,8 @@
#include "Experiment.h"
#include "Datasets.h"
#include "Models.h"
#include "ReportConsole.h"
#include <fstream>
namespace platform {
using json = nlohmann::json;
string get_date()
@@ -24,6 +25,7 @@ namespace platform {
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
string Experiment::get_file_name()
{
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
@@ -86,6 +88,13 @@ namespace platform {
file.close();
}
void Experiment::report()
{
json data = build_json();
ReportConsole report(data);
report.show();
}
void Experiment::show()
{
json data = build_json();
@@ -102,9 +111,29 @@ namespace platform {
}
}
string getColor(bayesnet::status_t status)
{
switch (status) {
case bayesnet::NORMAL:
return Colors::GREEN();
case bayesnet::WARNING:
return Colors::YELLOW();
case bayesnet::ERROR:
return Colors::RED();
default:
return Colors::RESET();
}
}
void showProgress(int fold, const string& color, const string& phase)
{
string prefix = phase == "a" ? "" : "\b\b\b\b";
cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
}
void Experiment::cross_validation(const string& path, const string& fileName)
{
auto datasets = platform::Datasets(path, true, platform::ARFF);
auto datasets = platform::Datasets(path, discretized, platform::ARFF);
// Get dataset
auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName);
@@ -114,8 +143,10 @@ namespace platform {
cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
// Prepare Result
auto result = Result();
auto [values, counts] = at::_unique(y);;
auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters);
// Initialize results vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
@@ -136,6 +167,10 @@ namespace platform {
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion());
if (hyperparameters.size() != 0) {
clf->setHyperparameters(hyperparameters);
}
// Split train - test dataset
train_timer.start();
auto [train, test] = fold->getFold(nfold);
auto train_t = torch::tensor(train);
@@ -144,24 +179,31 @@ namespace platform {
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
cout << nfold + 1 << ", " << flush;
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
// Train model
clf->fit(X_train, y_train, features, className, states);
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration();
// Score train
auto accuracy_train_value = clf->score(X_train, y_train);
// Test model
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value;
cout << "\b\b\b, " << flush;
// Store results and times in vector
result.addScoreTrain(accuracy_train_value);
result.addScoreTest(accuracy_test_value);
result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>());
item++;
clf.reset();
}
cout << "end. " << flush;
delete fold;
@@ -169,6 +211,7 @@ namespace platform {
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
result.setDataset(fileName);
addResult(result);

View File

@@ -29,7 +29,8 @@ namespace platform {
};
class Result {
private:
string dataset, hyperparameters, model_version;
string dataset, model_version;
json hyperparameters;
int samples{ 0 }, features{ 0 }, classes{ 0 };
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
@@ -37,7 +38,7 @@ namespace platform {
public:
Result() = default;
Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(const string& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; }
Result& setClasses(int classes) { this->classes = classes; return *this; }
@@ -59,7 +60,7 @@ namespace platform {
const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; }
const string& getDataset() const { return dataset; }
const string& getHyperparameters() const { return hyperparameters; }
const json& getHyperparameters() const { return hyperparameters; }
const int getSamples() const { return samples; }
const int getFeatures() const { return features; }
const int getClasses() const { return classes; }
@@ -85,11 +86,12 @@ namespace platform {
bool discretized{ false }, stratified{ false };
vector<Result> results;
vector<int> randomSeeds;
json hyperparameters = "{}";
int nfolds{ 0 };
float duration{ 0 };
json build_json();
public:
Experiment() = default;
Experiment();
Experiment& setTitle(const string& title) { this->title = title; return *this; }
Experiment& setModel(const string& model) { this->model = model; return *this; }
Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; }
@@ -103,11 +105,13 @@ namespace platform {
Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
string get_file_name();
void save(const string& path);
void cross_validation(const string& path, const string& fileName);
void go(vector<string> filesToProcess, const string& path);
void show();
void report();
};
}
#endif

View File

@@ -1,95 +1,97 @@
#include "Folding.h"
#include <algorithm>
#include <map>
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
{
random_device rd;
random_seed = default_random_engine(seed == -1 ? rd() : seed);
srand(seed == -1 ? time(0) : seed);
}
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector<int>(n))
{
iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
shuffle(indices.begin(), indices.end(), random_seed);
}
pair<vector<int>, vector<int>> KFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
namespace platform {
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
{
random_device rd;
random_seed = default_random_engine(seed == -1 ? rd() : seed);
srand(seed == -1 ? time(0) : seed);
}
int nTest = n / k;
auto train = vector<int>();
auto test = vector<int>();
for (int i = 0; i < n; i++) {
if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
test.push_back(indices[i]);
} else {
train.push_back(indices[i]);
}
}
return { train, test };
}
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{
n = y.numel();
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build();
}
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed)
: Fold(k, y.size(), seed)
{
this->y = y;
n = y.size();
build();
}
void StratifiedKFold::build()
{
stratified_indices = vector<vector<int>>(k);
int fold_size = n / k;
// Compute class counts and indices
auto class_indices = map<int, vector<int>>();
vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
for (auto i = 0; i < n; ++i) {
class_counts[y[i]]++;
class_indices[y[i]].push_back(i);
}
// Shuffle class indices
for (auto& [cls, indices] : class_indices) {
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector<int>(n))
{
iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
shuffle(indices.begin(), indices.end(), random_seed);
}
// Assign indices to folds
for (auto label = 0; label < class_counts.size(); ++label) {
auto num_samples_to_take = class_counts[label] / k;
if (num_samples_to_take == 0)
continue;
auto remainder_samples_to_take = class_counts[label] % k;
for (auto fold = 0; fold < k; ++fold) {
auto it = next(class_indices[label].begin(), num_samples_to_take);
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
class_indices[label].erase(class_indices[label].begin(), it);
pair<vector<int>, vector<int>> KFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
}
while (remainder_samples_to_take > 0) {
int fold = (rand() % static_cast<int>(k));
if (stratified_indices[fold].size() == fold_size + 1) {
continue;
int nTest = n / k;
auto train = vector<int>();
auto test = vector<int>();
for (int i = 0; i < n; i++) {
if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
test.push_back(indices[i]);
} else {
train.push_back(indices[i]);
}
}
return { train, test };
}
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{
n = y.numel();
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build();
}
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed)
: Fold(k, y.size(), seed)
{
this->y = y;
n = y.size();
build();
}
void StratifiedKFold::build()
{
stratified_indices = vector<vector<int>>(k);
int fold_size = n / k;
// Compute class counts and indices
auto class_indices = map<int, vector<int>>();
vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
for (auto i = 0; i < n; ++i) {
class_counts[y[i]]++;
class_indices[y[i]].push_back(i);
}
// Shuffle class indices
for (auto& [cls, indices] : class_indices) {
shuffle(indices.begin(), indices.end(), random_seed);
}
// Assign indices to folds
for (auto label = 0; label < class_counts.size(); ++label) {
auto num_samples_to_take = class_counts[label] / k;
if (num_samples_to_take == 0)
continue;
auto remainder_samples_to_take = class_counts[label] % k;
for (auto fold = 0; fold < k; ++fold) {
auto it = next(class_indices[label].begin(), num_samples_to_take);
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
class_indices[label].erase(class_indices[label].begin(), it);
}
while (remainder_samples_to_take > 0) {
int fold = (rand() % static_cast<int>(k));
if (stratified_indices[fold].size() == fold_size + 1) {
continue;
}
auto it = next(class_indices[label].begin(), 1);
stratified_indices[fold].push_back(*class_indices[label].begin());
class_indices[label].erase(class_indices[label].begin(), it);
remainder_samples_to_take--;
}
auto it = next(class_indices[label].begin(), 1);
stratified_indices[fold].push_back(*class_indices[label].begin());
class_indices[label].erase(class_indices[label].begin(), it);
remainder_samples_to_take--;
}
}
}
pair<vector<int>, vector<int>> StratifiedKFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
pair<vector<int>, vector<int>> StratifiedKFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
}
vector<int> test_indices = stratified_indices[nFold];
vector<int> train_indices;
for (int i = 0; i < k; ++i) {
if (i == nFold) continue;
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());
}
return { train_indices, test_indices };
}
vector<int> test_indices = stratified_indices[nFold];
vector<int> train_indices;
for (int i = 0; i < k; ++i) {
if (i == nFold) continue;
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());
}
return { train_indices, test_indices };
}

View File

@@ -4,34 +4,35 @@
#include <vector>
#include <random>
using namespace std;
class Fold {
protected:
int k;
int n;
int seed;
default_random_engine random_seed;
public:
Fold(int k, int n, int seed = -1);
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default;
int getNumberOfFolds() { return k; }
};
class KFold : public Fold {
private:
vector<int> indices;
public:
KFold(int k, int n, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
};
class StratifiedKFold : public Fold {
private:
vector<int> y;
vector<vector<int>> stratified_indices;
void build();
public:
StratifiedKFold(int k, const vector<int>& y, int seed = -1);
StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
};
namespace platform {
class Fold {
protected:
int k;
int n;
int seed;
default_random_engine random_seed;
public:
Fold(int k, int n, int seed = -1);
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default;
int getNumberOfFolds() { return k; }
};
class KFold : public Fold {
private:
vector<int> indices;
public:
KFold(int k, int n, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
};
class StratifiedKFold : public Fold {
private:
vector<int> y;
vector<vector<int>> stratified_indices;
void build();
public:
StratifiedKFold(int k, const vector<int>& y, int seed = -1);
StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
};
}
#endif

View File

@@ -26,7 +26,7 @@ namespace platform {
instance = it->second();
// wrap instance in a shared ptr and return
if (instance != nullptr)
return shared_ptr<bayesnet::BaseClassifier>(instance);
return unique_ptr<bayesnet::BaseClassifier>(instance);
else
return nullptr;
}

View File

@@ -6,6 +6,11 @@
#include "TAN.h"
#include "KDB.h"
#include "SPODE.h"
#include "TANLd.h"
#include "KDBLd.h"
#include "SPODELd.h"
#include "AODELd.h"
#include "BoostAODE.h"
namespace platform {
class Models {
private:

12
src/Platform/Paths.h Normal file
View File

@@ -0,0 +1,12 @@
#ifndef PATHS_H
#define PATHS_H
#include <string>
namespace platform {
class Paths {
public:
static std::string datasets() { return "datasets/"; }
static std::string results() { return "results/"; }
static std::string excel() { return "excel/"; }
};
}
#endif

114
src/Platform/ReportBase.cc Normal file
View File

@@ -0,0 +1,114 @@
#include <sstream>
#include <locale>
#include "Datasets.h"
#include "ReportBase.h"
#include "BestScore.h"
namespace platform {
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
{
stringstream oss;
oss << "Better than ZeroR + " << setprecision(1) << fixed << margin * 100 << "%";
meaning = {
{Symbols::equal_best, "Equal to best"},
{Symbols::better_best, "Better than best"},
{Symbols::cross, "Less than or equal to ZeroR"},
{Symbols::upward_arrow, oss.str()}
};
}
string ReportBase::fromVector(const string& key)
{
stringstream oss;
string sep = "";
oss << "[";
for (auto& item : data[key]) {
oss << sep << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
string ReportBase::fVector(const string& title, const json& data, const int width, const int precision)
{
stringstream oss;
string sep = "";
oss << title << "[";
for (const auto& item : data) {
oss << sep << fixed << setw(width) << setprecision(precision) << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
void ReportBase::show()
{
header();
body();
}
string ReportBase::compareResult(const string& dataset, double result)
{
string status = " ";
if (compare) {
double best = bestResult(dataset, data["model"].get<string>());
if (result == best) {
status = Symbols::equal_best;
} else if (result > best) {
status = Symbols::better_best;
}
} else {
if (data["score_name"].get<string>() == "accuracy") {
auto dt = Datasets(Paths::datasets(), false);
dt.loadDataset(dataset);
auto numClasses = dt.getNClasses(dataset);
if (numClasses == 2) {
vector<int> distribution = dt.getClassesCounts(dataset);
double nSamples = dt.getNSamples(dataset);
vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin);
if (mark > 1) {
mark = 0.9995;
}
status = result < mark ? Symbols::cross : result > mark ? Symbols::upward_arrow : "=";
}
}
}
if (status != " ") {
auto item = summary.find(status);
if (item != summary.end()) {
summary[status]++;
} else {
summary[status] = 1;
}
}
return status;
}
double ReportBase::bestResult(const string& dataset, const string& model)
{
double value = 0.0;
if (bestResults.size() == 0) {
// try to load the best results
string score = data["score_name"];
replace(score.begin(), score.end(), '_', '-');
string fileName = "best_results_" + score + "_" + model + ".json";
ifstream resultData(Paths::results() + "/" + fileName);
if (resultData.is_open()) {
bestResults = json::parse(resultData);
} else {
existBestFile = false;
}
}
try {
value = bestResults.at(dataset).at(0);
}
catch (exception) {
value = 1.0;
}
return value;
}
bool ReportBase::getExistBestFile()
{
return existBestFile;
}
}

46
src/Platform/ReportBase.h Normal file
View File

@@ -0,0 +1,46 @@
#ifndef REPORTBASE_H
#define REPORTBASE_H
#include <string>
#include <iostream>
#include "Paths.h"
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
using namespace std;
class Symbols {
public:
inline static const string check_mark{ "\u2714" };
inline static const string exclamation{ "\u2757" };
inline static const string black_star{ "\u2605" };
inline static const string cross{ "\u2717" };
inline static const string upward_arrow{ "\u27B6" };
inline static const string down_arrow{ "\u27B4" };
inline static const string equal_best{ check_mark };
inline static const string better_best{ black_star };
};
class ReportBase {
public:
explicit ReportBase(json data_, bool compare);
virtual ~ReportBase() = default;
void show();
protected:
json data;
string fromVector(const string& key);
string fVector(const string& title, const json& data, const int width, const int precision);
bool getExistBestFile();
virtual void header() = 0;
virtual void body() = 0;
virtual void showSummary() = 0;
string compareResult(const string& dataset, double result);
map<string, int> summary;
double margin;
map<string, string> meaning;
bool compare;
private:
double bestResult(const string& dataset, const string& model);
json bestResults;
bool existBestFile = true;
};
};
#endif

View File

@@ -0,0 +1,112 @@
#include <sstream>
#include <locale>
#include "ReportConsole.h"
#include "BestScore.h"
namespace platform {
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
string ReportConsole::headerLine(const string& text, int utf = 0)
{
int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n;
return "* " + text + string(n + utf, ' ') + "*\n";
}
void ReportConsole::header()
{
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
stringstream oss;
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
cout << headerLine("Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>());
cout << headerLine(data["title"].get<string>());
cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
oss << "Execution took " << setprecision(2) << fixed << data["duration"].get<float>() << " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<string>();
cout << headerLine(oss.str());
cout << headerLine("Score is " + data["score_name"].get<string>());
cout << string(MAXL, '*') << endl;
cout << endl;
}
void ReportConsole::body()
{
cout << Colors::GREEN() << " # Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl;
cout << "=== ========================= ====== ===== === ========= ========= ========= =============== =================== ====================" << endl;
json lastResult;
double totalScore = 0.0;
bool odd = true;
int index = 0;
for (const auto& r : data["results"]) {
if (selectedIndex != -1 && index != selectedIndex) {
index++;
continue;
}
auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color;
cout << setw(3) << index++ << " ";
cout << setw(25) << left << r["dataset"].get<string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>();
const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
cout << status;
cout << setw(12) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " ";
try {
cout << r["hyperparameters"].get<string>();
}
catch (const exception& err) {
cout << r["hyperparameters"];
}
cout << endl;
lastResult = r;
totalScore += r["score"].get<double>();
odd = !odd;
}
if (data["results"].size() == 1 || selectedIndex != -1) {
cout << string(MAXL, '*') << endl;
cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
cout << string(MAXL, '*') << endl;
} else {
footer(totalScore);
}
}
void ReportConsole::showSummary()
{
for (const auto& item : summary) {
stringstream oss;
oss << setw(3) << left << item.first;
oss << setw(3) << right << item.second << " ";
oss << left << meaning.at(item.first);
cout << headerLine(oss.str(), 2);
}
}
void ReportConsole::footer(double totalScore)
{
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
showSummary();
auto score = data["score_name"].get<string>();
if (score == BestScore::scoreName()) {
stringstream oss;
oss << score << " compared to " << BestScore::title() << " .: " << totalScore / BestScore::score();
cout << headerLine(oss.str());
}
if (!getExistBestFile() && compare) {
cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
}
cout << string(MAXL, '*') << endl << Colors::RESET();
}
}

View File

@@ -0,0 +1,24 @@
#ifndef REPORTCONSOLE_H
#define REPORTCONSOLE_H
#include <string>
#include <iostream>
#include "ReportBase.h"
#include "Colors.h"
namespace platform {
using namespace std;
const int MAXL = 133;
class ReportConsole : public ReportBase {
public:
explicit ReportConsole(json data_, bool compare = false, int index = -1) : ReportBase(data_, compare), selectedIndex(index) {};
virtual ~ReportConsole() = default;
private:
int selectedIndex;
string headerLine(const string& text, int utf);
void header() override;
void body() override;
void footer(double totalScore);
void showSummary() override;
};
};
#endif

333
src/Platform/ReportExcel.cc Normal file
View File

@@ -0,0 +1,333 @@
#include <sstream>
#include <locale>
#include "ReportExcel.h"
#include "BestScore.h"
namespace platform {
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook) : ReportBase(data_, compare), row(0), workbook(workbook)
{
normalSize = 14; //font size for report body
colorTitle = 0xB1A0C7;
colorOdd = 0xDCE6F1;
colorEven = 0xFDE9D9;
createFile();
}
lxw_workbook* ReportExcel::getWorkbook()
{
return workbook;
}
lxw_format* ReportExcel::efectiveStyle(const string& style)
{
lxw_format* efectiveStyle;
if (style == "") {
efectiveStyle = NULL;
} else {
string suffix = row % 2 ? "_odd" : "_even";
efectiveStyle = styles.at(style + suffix);
}
return efectiveStyle;
}
void ReportExcel::writeString(int row, int col, const string& text, const string& style)
{
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
}
void ReportExcel::writeInt(int row, int col, const int number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::writeDouble(int row, int col, const double number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 6, 1);
vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void ReportExcel::addColor(lxw_format* style, bool odd)
{
uint32_t efectiveColor = odd ? colorEven : colorOdd;
format_set_bg_color(style, lxw_color_t(efectiveColor));
}
void ReportExcel::createStyle(const string& name, lxw_format* style, bool odd)
{
addColor(style, odd);
if (name == "textCentered") {
format_set_align(style, LXW_ALIGN_CENTER);
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "text") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "bodyHeader") {
format_set_bold(style);
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_CENTER);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
format_set_bg_color(style, lxw_color_t(colorTitle));
} else if (name == "result") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "0.0000000");
} else if (name == "time") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "#,##0.000000");
} else if (name == "ints") {
format_set_font_size(style, normalSize);
format_set_num_format(style, "###,##0");
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "floats") {
format_set_border(style, LXW_BORDER_THIN);
format_set_font_size(style, normalSize);
format_set_num_format(style, "#,##0.00");
}
}
void ReportExcel::createFormats()
{
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
lxw_format* style;
for (string name : styleNames) {
lxw_format* style = workbook_add_format(workbook);
style = workbook_add_format(workbook);
createStyle(name, style, true);
styles[name + "_odd"] = style;
style = workbook_add_format(workbook);
createStyle(name, style, false);
styles[name + "_even"] = style;
}
// Header 1st line
lxw_format* headerFirst = workbook_add_format(workbook);
format_set_bold(headerFirst);
format_set_font_size(headerFirst, 18);
format_set_align(headerFirst, LXW_ALIGN_CENTER);
format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerFirst, LXW_BORDER_THIN);
format_set_bg_color(headerFirst, lxw_color_t(colorTitle));
// Header rest
lxw_format* headerRest = workbook_add_format(workbook);
format_set_bold(headerRest);
format_set_align(headerRest, LXW_ALIGN_CENTER);
format_set_font_size(headerRest, 16);
format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerRest, LXW_BORDER_THIN);
format_set_bg_color(headerRest, lxw_color_t(colorOdd));
// Header small
lxw_format* headerSmall = workbook_add_format(workbook);
format_set_bold(headerSmall);
format_set_align(headerSmall, LXW_ALIGN_LEFT);
format_set_font_size(headerSmall, 12);
format_set_border(headerSmall, LXW_BORDER_THIN);
format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER);
format_set_bg_color(headerSmall, lxw_color_t(colorOdd));
// Summary style
lxw_format* summaryStyle = workbook_add_format(workbook);
format_set_bold(summaryStyle);
format_set_font_size(summaryStyle, 16);
format_set_border(summaryStyle, LXW_BORDER_THIN);
format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER);
styles["headerFirst"] = headerFirst;
styles["headerRest"] = headerRest;
styles["headerSmall"] = headerSmall;
styles["summaryStyle"] = summaryStyle;
}
void ReportExcel::setProperties()
{
char line[data["title"].get<string>().size() + 1];
strcpy(line, data["title"].get<string>().c_str());
lxw_doc_properties properties = {
.title = line,
.subject = (char*)"Machine learning results",
.author = (char*)"Ricardo Montañana Gómez",
.manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta",
.company = (char*)"UCLM",
.comments = (char*)"Created with libxlsxwriter and c++",
};
workbook_set_properties(workbook, &properties);
}
void ReportExcel::createFile()
{
if (workbook == NULL) {
workbook = workbook_new((Paths::excel() + fileName).c_str());
}
const string name = data["model"].get<string>();
string suffix = "";
string efectiveName;
int num = 1;
// Create a sheet with the name of the model
while (true) {
efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = to_string(++num);
} else {
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break;
}
if (num > 100) {
throw invalid_argument("Couldn't create sheet " + efectiveName);
}
}
cout << "Adding sheet " << efectiveName << " to " << Paths::excel() + fileName << endl;
setProperties();
createFormats();
formatColumns();
}
void ReportExcel::closeFile()
{
workbook_close(workbook);
}
void ReportExcel::header()
{
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
stringstream oss;
string message = data["model"].get<string>() + " ver. " + data["version"].get<string>() + " " +
data["language"].get<string>() + " ver. " + data["language_version"].get<string>() +
" with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>();
worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<string>()).c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
oss << setprecision(2) << fixed << data["duration"].get<float>() << " s";
worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
oss.str("");
oss.clear();
oss << setprecision(2) << fixed << data["duration"].get<float>() / 3600 << " h";
worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Stratified: " << (data["stratified"].get<bool>() ? "True" : "False");
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False");
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
}
void ReportExcel::body()
{
auto head = vector<string>(
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time",
"Time Std.", "Hyperparameters" });
int col = 0;
for (const auto& item : head) {
writeString(5, col++, item, "bodyHeader");
}
row = 6;
col = 0;
int hypSize = 22;
json lastResult;
double totalScore = 0.0;
string hyperparameters;
for (const auto& r : data["results"]) {
writeString(row, col, r["dataset"].get<string>(), "text");
writeInt(row, col + 1, r["samples"].get<int>(), "ints");
writeInt(row, col + 2, r["features"].get<int>(), "ints");
writeInt(row, col + 3, r["classes"].get<int>(), "ints");
writeDouble(row, col + 4, r["nodes"].get<float>(), "floats");
writeDouble(row, col + 5, r["leaves"].get<float>(), "floats");
writeDouble(row, col + 6, r["depth"].get<double>(), "floats");
writeDouble(row, col + 7, r["score"].get<double>(), "result");
writeDouble(row, col + 8, r["score_std"].get<double>(), "result");
const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
writeString(row, col + 9, status, "textCentered");
writeDouble(row, col + 10, r["time"].get<double>(), "time");
writeDouble(row, col + 11, r["time_std"].get<double>(), "time");
try {
hyperparameters = r["hyperparameters"].get<string>();
}
catch (const exception& err) {
stringstream oss;
oss << r["hyperparameters"];
hyperparameters = oss.str();
}
if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, col + 12, hyperparameters, "text");
lastResult = r;
totalScore += r["score"].get<double>();
row++;
}
// Set the right column width of hyperparameters with the maximum length
worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL);
// Show totals if only one dataset is present in the result
if (data["results"].size() == 1) {
for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
row++;
col = 1;
writeString(row, col, group, "text");
for (double item : lastResult[group]) {
string style = group.find("scores") != string::npos ? "result" : "time";
writeDouble(row, ++col, item, style);
}
}
// Set with of columns to show those totals completely
worksheet_set_column(worksheet, 1, 1, 12, NULL);
for (int i = 2; i < 7; ++i) {
// doesn't work with from col to col, so...
worksheet_set_column(worksheet, i, i, 15, NULL);
}
} else {
footer(totalScore, row);
}
}
void ReportExcel::showSummary()
{
for (const auto& item : summary) {
worksheet_write_string(worksheet, row + 2, 1, item.first.c_str(), styles["summaryStyle"]);
worksheet_write_number(worksheet, row + 2, 2, item.second, styles["summaryStyle"]);
worksheet_merge_range(worksheet, row + 2, 3, row + 2, 5, meaning.at(item.first).c_str(), styles["summaryStyle"]);
row += 1;
}
}
void ReportExcel::footer(double totalScore, int row)
{
showSummary();
row += 4 + summary.size();
auto score = data["score_name"].get<string>();
if (score == BestScore::scoreName()) {
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestScore::title() + " .:").c_str(), efectiveStyle("text"));
writeDouble(row, 6, totalScore / BestScore::score(), "result");
}
if (!getExistBestFile() && compare) {
worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]);
}
}
}

View File

@@ -0,0 +1,42 @@
#ifndef REPORTEXCEL_H
#define REPORTEXCEL_H
#include<map>
#include "xlsxwriter.h"
#include "ReportBase.h"
#include "Colors.h"
namespace platform {
using namespace std;
const int MAXLL = 128;
class ReportExcel : public ReportBase {
public:
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook);
lxw_workbook* getWorkbook();
private:
void writeString(int row, int col, const string& text, const string& style = "");
void writeInt(int row, int col, const int number, const string& style = "");
void writeDouble(int row, int col, const double number, const string& style = "");
void formatColumns();
void createFormats();
void setProperties();
void createFile();
void closeFile();
lxw_workbook* workbook;
lxw_worksheet* worksheet;
map<string, lxw_format*> styles;
int row;
int normalSize; //font size for report body
uint32_t colorTitle;
uint32_t colorOdd;
uint32_t colorEven;
const string fileName = "some_results.xlsx";
void header() override;
void body() override;
void showSummary() override;
void footer(double totalScore, int row);
void createStyle(const string& name, lxw_format* style, bool odd);
void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const string& name);
};
};
#endif // !REPORTEXCEL_H

51
src/Platform/Result.cc Normal file
View File

@@ -0,0 +1,51 @@
#include <filesystem>
#include <fstream>
#include <sstream>
#include "Result.h"
#include "Colors.h"
#include "BestScore.h"
namespace platform {
Result::Result(const string& path, const string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
if (scoreName == BestScore::scoreName()) {
score /= BestScore::score();
}
title = data["title"];
duration = data["duration"];
model = data["model"];
complete = data["results"].size() > 1;
}
json Result::load() const
{
ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
string Result::to_string() const
{
stringstream oss;
oss << date << " ";
oss << setw(12) << left << model << " ";
oss << setw(11) << left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << setw(1) << " " << completeString << " ";
oss << setw(9) << setprecision(3) << fixed << duration << " ";
oss << setw(50) << left << title << " ";
return oss.str();
}
}

37
src/Platform/Result.h Normal file
View File

@@ -0,0 +1,37 @@
#ifndef RESULT_H
#define RESULT_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using namespace std;
using json = nlohmann::json;
class Result {
public:
Result(const string& path, const string& filename);
json load() const;
string to_string() const;
string getFilename() const { return filename; };
string getDate() const { return date; };
double getScore() const { return score; };
string getTitle() const { return title; };
double getDuration() const { return duration; };
string getModel() const { return model; };
string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; };
private:
string path;
string filename;
string date;
double score;
string title;
double duration;
string model;
string scoreName;
bool complete;
};
};
#endif

268
src/Platform/Results.cc Normal file
View File

@@ -0,0 +1,268 @@
#include <filesystem>
#include "platformUtils.h"
#include "Results.h"
#include "ReportConsole.h"
#include "ReportExcel.h"
#include "BestScore.h"
#include "Colors.h"
namespace platform {
void Results::load()
{
using std::filesystem::directory_iterator;
for (const auto& file : directory_iterator(path)) {
auto filename = file.path().filename().string();
if (filename.find(".json") != string::npos && filename.find("results_") == 0) {
auto result = Result(path, filename);
bool addResult = true;
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete())
addResult = false;
if (addResult)
files.push_back(result);
}
}
if (max == 0) {
max = files.size();
}
}
void Results::show() const
{
cout << Colors::GREEN() << "Results found: " << files.size() << endl;
cout << "-------------------" << endl;
if (complete) {
cout << Colors::MAGENTA() << "Only listing complete results" << endl;
}
if (partial) {
cout << Colors::MAGENTA() << "Only listing partial results" << endl;
}
auto i = 0;
cout << Colors::GREEN() << " # Date Model Score Name Score C/P Duration Title" << endl;
cout << "=== ========== ============ =========== =========== === ========= =============================================================" << endl;
bool odd = true;
for (const auto& result : files) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << result.to_string() << endl;
if (i == max && max != 0) {
break;
}
odd = !odd;
}
}
int Results::getIndex(const string& intent) const
{
string color;
if (intent == "delete") {
color = Colors::RED();
} else {
color = Colors::YELLOW();
}
cout << color << "Choose result to " << intent << " (cancel=-1): ";
string line;
getline(cin, line);
int index = stoi(line);
if (index >= -1 && index < static_cast<int>(files.size())) {
return index;
}
cout << "Invalid index" << endl;
return -1;
}
void Results::report(const int index, const bool excelReport)
{
cout << Colors::YELLOW() << "Reporting " << files.at(index).getFilename() << endl;
auto data = files.at(index).load();
if (excelReport) {
ReportExcel reporter(data, compare, workbook);
reporter.show();
openExcel = true;
workbook = reporter.getWorkbook();
} else {
ReportConsole reporter(data, compare);
reporter.show();
}
}
void Results::showIndex(const int index, const int idx) const
{
auto data = files.at(index).load();
if (idx < 0 or idx >= static_cast<int>(data["results"].size())) {
cout << "Invalid index" << endl;
return;
}
cout << Colors::YELLOW() << "Showing " << files.at(index).getFilename() << endl;
ReportConsole reporter(data, compare, idx);
reporter.show();
}
void Results::menu()
{
char option;
int index;
bool finished = false;
string color, context;
string filename, line, options = "qldhsre";
while (!finished) {
if (indexList) {
color = Colors::GREEN();
context = " (quit='q', list='l', delete='d', hide='h', sort='s', report='r', excel='e'): ";
options = "qldhsre";
} else {
color = Colors::MAGENTA();
context = " (quit='q', list='l'): ";
options = "ql";
}
cout << Colors::RESET() << color;
cout << "Choose option " << context;
getline(cin, line);
if (line.size() == 0)
continue;
if (options.find(line[0]) != string::npos) {
if (line.size() > 1) {
cout << "Invalid option" << endl;
continue;
}
option = line[0];
} else {
if (all_of(line.begin(), line.end(), ::isdigit)) {
int idx = stoi(line);
if (indexList) {
// The value is about the files list
index = idx;
if (index >= 0 && index < max) {
report(index, false);
indexList = false;
continue;
}
} else {
// The value is about the result showed on screen
showIndex(index, idx);
continue;
}
}
cout << "Invalid option" << endl;
continue;
}
switch (option) {
case 'q':
finished = true;
break;
case 'l':
show();
indexList = true;
break;
case 'd':
index = getIndex("delete");
if (index == -1)
break;
filename = files[index].getFilename();
cout << "Deleting " << filename << endl;
remove((path + "/" + filename).c_str());
files.erase(files.begin() + index);
cout << "File: " + filename + " deleted!" << endl;
show();
indexList = true;
break;
case 'h':
index = getIndex("hide");
if (index == -1)
break;
filename = files[index].getFilename();
cout << "Hiding " << filename << endl;
rename((path + "/" + filename).c_str(), (path + "/." + filename).c_str());
files.erase(files.begin() + index);
show();
menu();
indexList = true;
break;
case 's':
sortList();
indexList = true;
show();
break;
case 'r':
index = getIndex("report");
if (index == -1)
break;
indexList = false;
report(index, false);
break;
case 'e':
index = getIndex("excel");
if (index == -1)
break;
indexList = true;
report(index, true);
break;
default:
cout << "Invalid option" << endl;
}
}
}
void Results::sortList()
{
cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): ";
string line;
char option;
getline(cin, line);
if (line.size() == 0)
return;
if (line.size() > 1) {
cout << "Invalid option" << endl;
return;
}
option = line[0];
switch (option) {
case 'd':
sortDate();
break;
case 's':
sortScore();
break;
case 'u':
sortDuration();
break;
case 'm':
sortModel();
break;
default:
cout << "Invalid option" << endl;
}
}
void Results::sortDate()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDate() > b.getDate();
});
}
void Results::sortModel()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getModel() > b.getModel();
});
}
void Results::sortDuration()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDuration() > b.getDuration();
});
}
void Results::sortScore()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getScore() > b.getScore();
});
}
void Results::manage()
{
if (files.size() == 0) {
cout << "No results found!" << endl;
exit(0);
}
sortDate();
show();
menu();
if (openExcel) {
workbook_close(workbook);
}
cout << Colors::RESET() << "Done!" << endl;
}
}

47
src/Platform/Results.h Normal file
View File

@@ -0,0 +1,47 @@
#ifndef RESULTS_H
#define RESULTS_H
#include "xlsxwriter.h"
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
#include "Result.h"
namespace platform {
using namespace std;
using json = nlohmann::json;
class Results {
public:
Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) :
path(path), max(max), model(model), scoreName(score), complete(complete), partial(partial), compare(compare)
{
load();
};
void manage();
private:
string path;
int max;
string model;
string scoreName;
bool complete;
bool partial;
bool indexList = true;
bool openExcel = false;
bool compare;
lxw_workbook* workbook = NULL;
vector<Result> files;
void load(); // Loads the list of results
void show() const;
void report(const int index, const bool excelReport);
void showIndex(const int index, const int idx) const;
int getIndex(const string& intent) const;
void menu();
void sortList();
void sortDate();
void sortScore();
void sortModel();
void sortDuration();
};
};
#endif

63
src/Platform/best.cc Normal file
View File

@@ -0,0 +1,63 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "Paths.h"
#include "BestResults.h"
#include "Colors.h"
using namespace std;
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
argparse::ArgumentParser program("best");
program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
try {
program.parse_args(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
if (model == "" || score == "") {
throw runtime_error("Model and score name must be supplied");
}
}
catch (const exception& err) {
cerr << err.what() << endl;
cerr << program;
exit(1);
}
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
if (!report && !build) {
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
cerr << program;
exit(1);
}
auto results = platform::BestResults(platform::Paths::results(), score, model);
if (build) {
if (model == "any") {
results.buildAll();
} else {
string fileName = results.build();
cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl;
}
}
if (report) {
if (model == "any") {
results.reportAll();
} else {
results.reportSingle();
}
}
return 0;
}

57
src/Platform/list.cc Normal file
View File

@@ -0,0 +1,57 @@
#include <iostream>
#include <locale>
#include "Paths.h"
#include "Colors.h"
#include "Datasets.h"
using namespace std;
const int BALANCE_LENGTH = 75;
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
void outputBalance(const string& balance)
{
auto temp = string(balance);
while (temp.size() > BALANCE_LENGTH - 1) {
auto part = temp.substr(0, BALANCE_LENGTH);
cout << part << endl;
cout << setw(48) << " ";
temp = temp.substr(BALANCE_LENGTH);
}
cout << temp << endl;
}
int main(int argc, char** argv)
{
auto data = platform::Datasets(platform::Paths().datasets(), false);
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << endl;
string balanceBars = string(BALANCE_LENGTH, '=');
cout << "============================== ====== ===== === " << balanceBars << endl;
bool odd = true;
for (const auto& dataset : data.getNames()) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color << setw(30) << left << dataset << " ";
data.loadDataset(dataset);
auto nSamples = data.getNSamples(dataset);
cout << setw(6) << right << nSamples << " ";
cout << setw(5) << right << data.getFeatures(dataset).size() << " ";
cout << setw(3) << right << data.getNClasses(dataset) << " ";
stringstream oss;
string sep = "";
for (auto number : data.getClassesCounts(dataset)) {
oss << sep << setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
sep = " / ";
}
outputBalance(oss.str());
odd = !odd;
}
cout << Colors::RESET() << endl;
return 0;
}

View File

@@ -1,25 +1,27 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "platformUtils.h"
#include "Experiment.h"
#include "Datasets.h"
#include "DotEnv.h"
#include "Models.h"
#include "modelRegister.h"
#include "Paths.h"
using namespace std;
const string PATH_RESULTS = "results";
const string PATH_DATASETS = "datasets";
using json = nlohmann::json;
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
auto env = platform::DotEnv();
argparse::ArgumentParser program("BayesNetSample");
argparse::ArgumentParser program("main");
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
program.add_argument("-p", "--path")
.help("folder where the data files are located, default")
.default_value(string{ PATH_DATASETS }
);
.default_value(string{ platform::Paths::datasets() });
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString())
.action([](const std::string& value) {
@@ -32,6 +34,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
);
program.add_argument("--title").default_value("").help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const string& value) {
try {
@@ -60,6 +63,8 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
auto seeds = program.get<vector<int>>("seeds");
auto complete_file_name = path + file_name + ".arff";
auto title = program.get<string>("title");
auto hyperparameters = program.get<string>("hyperparameters");
auto saveResults = program.get<bool>("save");
if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided");
}
@@ -75,7 +80,6 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
bool saveResults = false;
auto file_name = program.get<string>("dataset");
auto path = program.get<string>("path");
auto model_name = program.get<string>("model");
@@ -83,9 +87,11 @@ int main(int argc, char** argv)
auto stratified = program.get<bool>("stratified");
auto n_folds = program.get<int>("folds");
auto seeds = program.get<vector<int>>("seeds");
auto hyperparameters = program.get<string>("hyperparameters");
vector<string> filesToTest;
auto datasets = platform::Datasets(path, true, platform::ARFF);
auto title = program.get<string>("title");
auto saveResults = program.get<bool>("save");
if (file_name != "") {
if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << endl;
@@ -96,17 +102,18 @@ int main(int argc, char** argv)
}
filesToTest.push_back(file_name);
} else {
filesToTest = platform::Datasets(path, true, platform::ARFF).getNames();
filesToTest = datasets.getNames();
saveResults = true;
}
/*
* Begin Processing
*/
auto env = platform::DotEnv();
auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform("BayesNet");
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
experiment.setHyperparameters(json::parse(hyperparameters));
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}
@@ -114,10 +121,10 @@ int main(int argc, char** argv)
timer.start();
experiment.go(filesToTest, path);
experiment.setDuration(timer.getDuration());
if (saveResults)
experiment.save(PATH_RESULTS);
else
experiment.show();
if (saveResults) {
experiment.save(platform::Paths::results());
}
experiment.report();
cout << "Done!" << endl;
return 0;
}

52
src/Platform/manage.cc Normal file
View File

@@ -0,0 +1,52 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "platformUtils.h"
#include "Paths.h"
#include "Results.h"
using namespace std;
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
argparse::ArgumentParser program("manage");
program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>();
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true);
program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true);
program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true);
try {
program.parse_args(argc, argv);
auto number = program.get<int>("number");
if (number < 0) {
throw runtime_error("Number of results must be greater than or equal to 0");
}
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
}
catch (const exception& err) {
cerr << err.what() << endl;
cerr << program;
exit(1);
}
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
auto number = program.get<int>("number");
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
if (complete)
partial = false;
auto results = platform::Results(platform::Paths::results(), number, model, score, complete, partial, compare);
results.manage();
return 0;
}

View File

@@ -2,10 +2,20 @@
#define MODEL_REGISTER_H
static platform::Registrar registrarT("TAN",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
static platform::Registrar registrarTLD("TANLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
static platform::Registrar registrarS("SPODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
static platform::Registrar registrarSLD("SPODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
static platform::Registrar registrarK("KDB",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);});
static platform::Registrar registrarKLD("KDBLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
static platform::Registrar registrarA("AODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
static platform::Registrar registrarBA("BoostAODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
#endif

View File

@@ -1,4 +1,5 @@
#include "platformUtils.h"
#include "Paths.h"
using namespace torch;
@@ -68,11 +69,12 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
}
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states[className]), end(states[className]), 0);
iota(begin(states.at(className)), end(states.at(className)), 0);
} else {
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) {
@@ -85,7 +87,7 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name)
{
auto handler = ArffFiles();
handler.load(PATH + static_cast<string>(name) + ".arff");
handler.load(platform::Paths::datasets() + static_cast<string>(name) + ".arff");
// Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();

View File

@@ -8,7 +8,6 @@
#include "ArffFiles.h"
#include "CPPFImdlp.h"
using namespace std;
const string PATH = "../../data/";
bool file_exists(const std::string& name);
vector<string> split(const string& text, char delimiter);

View File

@@ -9,29 +9,21 @@ TEST_CASE("Test Bayesian Network")
{
auto [Xd, y, features, className, states] = loadFile("iris");
SECTION("Test Update Nodes")
{
auto net = bayesnet::Network();
net.addNode("A", 3);
REQUIRE(net.getStates() == 3);
net.addNode("A", 5);
REQUIRE(net.getStates() == 5);
}
SECTION("Test get features")
{
auto net = bayesnet::Network();
net.addNode("A", 3);
net.addNode("B", 5);
net.addNode("A");
net.addNode("B");
REQUIRE(net.getFeatures() == vector<string>{"A", "B"});
net.addNode("C", 2);
net.addNode("C");
REQUIRE(net.getFeatures() == vector<string>{"A", "B", "C"});
}
SECTION("Test get edges")
{
auto net = bayesnet::Network();
net.addNode("A", 3);
net.addNode("B", 5);
net.addNode("C", 2);
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("B", "C");
REQUIRE(net.getEdges() == vector<pair<string, string>>{ {"A", "B"}, { "B", "C" } });

View File

@@ -4,6 +4,7 @@ if(ENABLE_TESTING)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ${BayesNet_SOURCE_DIR}/src/Platform/platformUtils.cc ${BayesNet_SOURCES})
add_executable(${TEST_MAIN} ${TEST_SOURCES})
target_link_libraries(${TEST_MAIN} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)