Compare commits
53 Commits
mpi_grid
...
199ffc95d2
Author | SHA1 | Date | |
---|---|---|---|
199ffc95d2
|
|||
cbe15e317d
|
|||
debd890519
|
|||
46e929ff4d | |||
d858e26e4b
|
|||
0ee3eaed53
|
|||
093c197f0a
|
|||
78d7ea7c4d
|
|||
d6af1ffe8e
|
|||
20669dd161
|
|||
272dbad4f3
|
|||
8bccc3e4bc
|
|||
903b143338
|
|||
f10d0daf2e
|
|||
d39a17089e
|
|||
2e325cd114 | |||
fc3d63b7db
|
|||
43dc79a345
|
|||
b8589bcd0a | |||
3007e22a7d
|
|||
02e456befb
|
|||
8477698d8d
|
|||
52abd2d670
|
|||
3116eaa763
|
|||
443e5cc882
|
|||
e1c4221c11
|
|||
a63a35df3f
|
|||
c7555dac3f
|
|||
f3b8150e2c
|
|||
03f8b8653b
|
|||
2163e95c4a
|
|||
b33da34655
|
|||
e17aee7bdb
|
|||
37c31ee4c2
|
|||
80afdc06f7
|
|||
|
666782217e | ||
55af0714cd
|
|||
6ef5ca541a
|
|||
4364317411 | |||
65a96851ef
|
|||
722da7f781
|
|||
b1833a5feb
|
|||
41a0bd4ddd
|
|||
9ab4fc7d76
|
|||
beadb7465f
|
|||
652e5f623f
|
|||
b7fef9a99d
|
|||
343269d48c
|
|||
21c4c6df51
|
|||
702f086706
|
|||
981bc8f98b
|
|||
e0b7b2d316
|
|||
9b9e91e856 |
@@ -5,6 +5,7 @@ Checks: '-*,
|
||||
cppcoreguidelines-*,
|
||||
modernize-*,
|
||||
performance-*,
|
||||
-modernize-use-nodiscard,
|
||||
-cppcoreguidelines-pro-type-vararg,
|
||||
-modernize-use-trailing-return-type,
|
||||
-bugprone-exception-escape'
|
||||
|
31
.clang-uml
31
.clang-uml
@@ -1,31 +0,0 @@
|
||||
compilation_database_dir: build
|
||||
output_directory: puml
|
||||
diagrams:
|
||||
BayesNet:
|
||||
type: class
|
||||
glob:
|
||||
- src/BayesNet/*.cc
|
||||
- src/Platform/*.cc
|
||||
using_namespace: bayesnet
|
||||
include:
|
||||
namespaces:
|
||||
- bayesnet
|
||||
- platform
|
||||
plantuml:
|
||||
after:
|
||||
- "note left of {{ alias(\"MyProjectMain\") }}: Main class of myproject library."
|
||||
sequence:
|
||||
type: sequence
|
||||
glob:
|
||||
- src/Platform/main.cc
|
||||
combine_free_functions_into_file_participants: true
|
||||
using_namespace:
|
||||
- std
|
||||
- bayesnet
|
||||
- platform
|
||||
include:
|
||||
paths:
|
||||
- src/BayesNet
|
||||
- src/Platform
|
||||
start_from:
|
||||
- function: main(int,const char **)
|
17
.gitmodules
vendored
17
.gitmodules
vendored
@@ -5,21 +5,16 @@
|
||||
update = merge
|
||||
[submodule "lib/catch2"]
|
||||
path = lib/catch2
|
||||
main = v2.x
|
||||
main = v2.x
|
||||
update = merge
|
||||
url = https://github.com/catchorg/Catch2.git
|
||||
[submodule "lib/argparse"]
|
||||
path = lib/argparse
|
||||
url = https://github.com/p-ranav/argparse
|
||||
master = master
|
||||
update = merge
|
||||
[submodule "lib/json"]
|
||||
path = lib/json
|
||||
url = https://github.com/nlohmann/json.git
|
||||
master = master
|
||||
master = master
|
||||
update = merge
|
||||
[submodule "lib/libxlsxwriter"]
|
||||
path = lib/libxlsxwriter
|
||||
url = https://github.com/jmcnamara/libxlsxwriter.git
|
||||
main = main
|
||||
[submodule "lib/folding"]
|
||||
path = lib/folding
|
||||
url = https://github.com/rmontanana/folding
|
||||
main = main
|
||||
update = merge
|
||||
|
115
.vscode/launch.json
vendored
115
.vscode/launch.json
vendored
@@ -5,126 +5,21 @@
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "sample",
|
||||
"program": "${workspaceFolder}/build_debug/sample/BayesNetSample",
|
||||
"program": "${workspaceFolder}/build_release/sample/bayesnet_sample",
|
||||
"args": [
|
||||
"-d",
|
||||
"iris",
|
||||
"-m",
|
||||
"TANLd",
|
||||
"-s",
|
||||
"271",
|
||||
"-p",
|
||||
"/Users/rmontanana/Code/discretizbench/datasets/",
|
||||
"${workspaceFolder}/tests/data/glass.arff"
|
||||
],
|
||||
//"cwd": "${workspaceFolder}/build/sample/",
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "experimentPy",
|
||||
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
|
||||
"args": [
|
||||
"-m",
|
||||
"STree",
|
||||
"--stratified",
|
||||
"-d",
|
||||
"iris",
|
||||
//"--discretize"
|
||||
// "--hyperparameters",
|
||||
// "{\"repeatSparent\": true, \"maxModels\": 12}"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/../discretizbench",
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "gridsearch",
|
||||
"program": "${workspaceFolder}/build_debug/src/Platform/b_grid",
|
||||
"args": [
|
||||
"-m",
|
||||
"KDB",
|
||||
"--discretize",
|
||||
"--continue",
|
||||
"glass",
|
||||
"--only",
|
||||
"--compute"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/../discretizbench",
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "experimentBayes",
|
||||
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
|
||||
"args": [
|
||||
"-m",
|
||||
"TAN",
|
||||
"--stratified",
|
||||
"--discretize",
|
||||
"-d",
|
||||
"iris",
|
||||
"--hyperparameters",
|
||||
"{\"repeatSparent\": true, \"maxModels\": 12}"
|
||||
],
|
||||
"cwd": "/home/rmontanana/Code/discretizbench",
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "best",
|
||||
"program": "${workspaceFolder}/build_debug/src/Platform/b_best",
|
||||
"args": [
|
||||
"-m",
|
||||
"BoostAODE",
|
||||
"-s",
|
||||
"accuracy",
|
||||
"--build",
|
||||
],
|
||||
"cwd": "${workspaceFolder}/../discretizbench",
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "manage",
|
||||
"program": "${workspaceFolder}/build_debug/src/Platform/b_manage",
|
||||
"args": [
|
||||
"-n",
|
||||
"20"
|
||||
],
|
||||
"cwd": "${workspaceFolder}/../discretizbench",
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "list",
|
||||
"program": "${workspaceFolder}/build_debug/src/Platform/b_list",
|
||||
"args": [],
|
||||
//"cwd": "/Users/rmontanana/Code/discretizbench",
|
||||
"cwd": "${workspaceFolder}/../discretizbench",
|
||||
},
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "test",
|
||||
"program": "${workspaceFolder}/build_debug/tests/unit_tests",
|
||||
"program": "${workspaceFolder}/build_debug/tests/unit_tests_bayesnet",
|
||||
"args": [
|
||||
"-c=\"Metrics Test\"",
|
||||
//"-c=\"Metrics Test\"",
|
||||
// "-s",
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build/tests",
|
||||
},
|
||||
{
|
||||
"name": "Build & debug active file",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/build_debug/bayesnet",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"MIMode": "lldb",
|
||||
"preLaunchTask": "CMake: build"
|
||||
"cwd": "${workspaceFolder}/build_debug/tests",
|
||||
}
|
||||
]
|
||||
}
|
54
CHANGELOG.md
Normal file
54
CHANGELOG.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [1.0.4] 2024-03-06
|
||||
|
||||
### Added
|
||||
|
||||
- Change _ascending_ hyperparameter to _order_ with these possible values _{"asc", "desc", "rand"}_, Default is _"desc"_.
|
||||
- Add the _predict_single_ hyperparameter to control if only the last model created is used to predict in boost training or the whole ensemble (all the models built so far). Default is true.
|
||||
- sample app to show how to use the library (make sample)
|
||||
|
||||
### Changed
|
||||
|
||||
- Change the library structure adding folders for each group of classes (classifiers, ensembles, etc).
|
||||
- The significances of the models generated under the feature selection algorithm are now computed after all the models have been generated and an α<sub>t</sub> value is computed and assigned to each model.
|
||||
|
||||
## [1.0.3] 2024-02-25
|
||||
|
||||
### Added
|
||||
|
||||
- Voting / probability aggregation in Ensemble classes
|
||||
- predict_proba method in Classifier
|
||||
- predict_proba method in BoostAODE
|
||||
- predict_voting parameter in BoostAODE constructor to use voting or probability to predict (default is voting)
|
||||
- hyperparameter predict_voting to AODE, AODELd and BoostAODE (Ensemble child classes)
|
||||
- tests to check predict & predict_proba coherence
|
||||
|
||||
## [1.0.2] - 2024-02-20
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fix bug in BoostAODE: do not include the model if epsilon sub t is greater than 0.5
|
||||
- Fix bug in BoostAODE: compare accuracy with previous accuracy instead of the first of the ensemble if convergence true
|
||||
|
||||
## [1.0.1] - 2024-02-12
|
||||
|
||||
### Added
|
||||
|
||||
- Notes in Classifier class
|
||||
- BoostAODE: Add note with used features in initialization with feature selection
|
||||
- BoostAODE: Add note with the number of models
|
||||
- BoostAODE: Add note with the number of features used to create models if not all features are used
|
||||
- Test version number in TestBayesModels
|
||||
- Add tests with feature_select and notes on BoostAODE
|
||||
|
||||
### Fixed
|
||||
|
||||
- Network predict test
|
||||
- Network predict_proba test
|
||||
- Network score test
|
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(BayesNet
|
||||
VERSION 0.2.0
|
||||
VERSION 1.0.4
|
||||
DESCRIPTION "Bayesian Network and basic classifiers Library."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
|
||||
LANGUAGES CXX
|
||||
@@ -30,39 +30,25 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
||||
option(ENABLE_TESTING "Unit testing build" OFF)
|
||||
option(CODE_COVERAGE "Collect coverage from test library" OFF)
|
||||
option(MPI_ENABLED "Enable MPI options" ON)
|
||||
|
||||
if (MPI_ENABLED)
|
||||
find_package(MPI REQUIRED)
|
||||
message("MPI_CXX_LIBRARIES=${MPI_CXX_LIBRARIES}")
|
||||
message("MPI_CXX_INCLUDE_DIRS=${MPI_CXX_INCLUDE_DIRS}")
|
||||
endif (MPI_ENABLED)
|
||||
|
||||
# Boost Library
|
||||
set(Boost_USE_STATIC_LIBS OFF)
|
||||
set(Boost_USE_MULTITHREADED ON)
|
||||
set(Boost_USE_STATIC_RUNTIME OFF)
|
||||
find_package(Boost 1.66.0 REQUIRED COMPONENTS python3 numpy3)
|
||||
if(Boost_FOUND)
|
||||
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
|
||||
include_directories(${Boost_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
# Python
|
||||
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
|
||||
message("Python3_LIBRARIES=${Python3_LIBRARIES}")
|
||||
|
||||
# CMakes modules
|
||||
# --------------
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
|
||||
include(AddGitSubmodule)
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
MESSAGE("Debug mode")
|
||||
set(ENABLE_TESTING ON)
|
||||
set(CODE_COVERAGE ON)
|
||||
endif (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
|
||||
|
||||
if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE("Code coverage enabled")
|
||||
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE("Code coverage enabled")
|
||||
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
|
||||
if (ENABLE_CLANG_TIDY)
|
||||
@@ -73,30 +59,17 @@ endif (ENABLE_CLANG_TIDY)
|
||||
# ---------------------------------------------
|
||||
# include(FetchContent)
|
||||
add_git_submodule("lib/mdlp")
|
||||
add_git_submodule("lib/argparse")
|
||||
add_git_submodule("lib/json")
|
||||
|
||||
|
||||
find_library(XLSXWRITER_LIB NAMES libxlsxwriter.dylib libxlsxwriter.so PATHS ${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/lib)
|
||||
message("XLSXWRITER_LIB=${XLSXWRITER_LIB}")
|
||||
|
||||
|
||||
# Subdirectories
|
||||
# --------------
|
||||
add_subdirectory(config)
|
||||
add_subdirectory(lib/Files)
|
||||
add_subdirectory(src/BayesNet)
|
||||
add_subdirectory(src/Platform)
|
||||
add_subdirectory(src/PyClassifiers)
|
||||
add_subdirectory(sample)
|
||||
|
||||
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h)
|
||||
file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cc ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cpp)
|
||||
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform/*.cc ${BayesNet_SOURCE_DIR}/src/Platform/*.cpp)
|
||||
add_subdirectory(src)
|
||||
|
||||
# Testing
|
||||
# -------
|
||||
|
||||
if (ENABLE_TESTING)
|
||||
MESSAGE("Testing enabled")
|
||||
add_git_submodule("lib/catch2")
|
||||
|
53
Makefile
53
Makefile
@@ -1,11 +1,11 @@
|
||||
SHELL := /bin/bash
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: coverage setup help build test clean debug release
|
||||
.PHONY: coverage setup help buildr buildd test clean debug release sample
|
||||
|
||||
f_release = build_release
|
||||
f_debug = build_debug
|
||||
app_targets = b_best b_list b_main b_manage b_grid
|
||||
test_targets = unit_tests_bayesnet unit_tests_platform
|
||||
app_targets = BayesNet
|
||||
test_targets = unit_tests_bayesnet
|
||||
n_procs = -j 16
|
||||
|
||||
define ClearTests
|
||||
@@ -31,37 +31,22 @@ setup: ## Install dependencies for tests and coverage
|
||||
pip install gcovr; \
|
||||
fi
|
||||
|
||||
dest ?= ${HOME}/bin
|
||||
install: ## Copy binary files to bin folder
|
||||
@echo "Destination folder: $(dest)"
|
||||
make buildr
|
||||
@echo "*******************************************"
|
||||
@echo ">>> Copying files to $(dest)"
|
||||
@echo "*******************************************"
|
||||
@for item in $(app_targets); do \
|
||||
echo ">>> Copying $$item" ; \
|
||||
cp $(f_release)/src/Platform/$$item $(dest) ; \
|
||||
done
|
||||
|
||||
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
|
||||
@echo ">>> Creating dependency graph diagram of the project...";
|
||||
$(MAKE) debug
|
||||
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
|
||||
|
||||
buildd: ## Build the debug targets
|
||||
cmake --build $(f_debug) -t $(app_targets) BayesNetSample $(n_procs)
|
||||
cmake --build $(f_debug) -t $(app_targets) $(n_procs)
|
||||
|
||||
buildr: ## Build the release targets
|
||||
cmake --build $(f_release) -t $(app_targets) BayesNetSample $(n_procs)
|
||||
cmake --build $(f_release) -t $(app_targets) $(n_procs)
|
||||
|
||||
clean: ## Clean the tests info
|
||||
@echo ">>> Cleaning Debug BayesNet tests...";
|
||||
$(call ClearTests)
|
||||
@echo ">>> Done";
|
||||
|
||||
clang-uml: ## Create uml class and sequence diagrams
|
||||
clang-uml -p --add-compile-flag -I /usr/lib/gcc/x86_64-redhat-linux/8/include/
|
||||
|
||||
debug: ## Build a debug version of the project
|
||||
@echo ">>> Building Debug BayesNet...";
|
||||
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
|
||||
@@ -74,6 +59,13 @@ release: ## Build a Release version of the project
|
||||
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
|
||||
@mkdir $(f_release);
|
||||
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
|
||||
@echo ">>> Done";
|
||||
|
||||
fname = "tests/data/iris.arff"
|
||||
sample: ## Build sample
|
||||
@echo ">>> Building Sample...";
|
||||
cmake --build $(f_release) -t bayesnet_sample $(n_procs)
|
||||
$(f_release)/sample/bayesnet_sample $(fname)
|
||||
@echo ">>> Done";
|
||||
|
||||
opt = ""
|
||||
@@ -89,27 +81,10 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
|
||||
done
|
||||
@echo ">>> Done";
|
||||
|
||||
opt = ""
|
||||
testp: ## Run platform tests (opt="-s") to verbose output the tests, (opt="-c='Stratified Fold Test'") to run only that section
|
||||
@echo ">>> Running Platform tests...";
|
||||
@$(MAKE) clean
|
||||
@cmake --build $(f_debug) --target unit_tests_platform $(n_procs)
|
||||
@if [ -f $(f_debug)/tests/unit_tests_platform ]; then cd $(f_debug)/tests ; ./unit_tests_platform $(opt) ; fi ;
|
||||
@echo ">>> Done";
|
||||
|
||||
opt = ""
|
||||
testb: ## Run BayesNet tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
|
||||
@echo ">>> Running BayesNet tests...";
|
||||
@$(MAKE) clean
|
||||
@cmake --build $(f_debug) --target unit_tests_bayesnet $(n_procs)
|
||||
@if [ -f $(f_debug)/tests/unit_tests_bayesnet ]; then cd $(f_debug)/tests ; ./unit_tests_bayesnet $(opt) ; fi ;
|
||||
@echo ">>> Done";
|
||||
|
||||
coverage: ## Run tests and generate coverage report (build/index.html)
|
||||
@echo ">>> Building tests with coverage...";
|
||||
@echo ">>> Building tests with coverage..."
|
||||
@$(MAKE) test
|
||||
@cd $(f_debug) ; \
|
||||
gcovr --config ../gcovr.cfg tests ;
|
||||
@gcovr $(f_debug)/tests
|
||||
@echo ">>> Done";
|
||||
|
||||
|
||||
|
77
README.md
77
README.md
@@ -2,78 +2,31 @@
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
|
||||
Bayesian Network Classifier with libtorch from scratch
|
||||
|
||||
## 0. Setup
|
||||
|
||||
Before compiling BayesNet.
|
||||
|
||||
### MPI
|
||||
|
||||
In Linux just install openmpi & openmpi-devel packages. Only cmake can't find openmpi install (like in Oracle Linux) set the following variable:
|
||||
|
||||
```bash
|
||||
export MPI_HOME="/usr/lib64/openmpi"
|
||||
```
|
||||
|
||||
In Mac OS X, install mpich with brew and if cmake doesn't find it, edit mpicxx wrapper to remove the ",-commons,use_dylibs" from final_ldflags
|
||||
|
||||
```bash
|
||||
vi /opt/homebrew/bin/mpicx
|
||||
```
|
||||
|
||||
### boost library
|
||||
|
||||
[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
|
||||
|
||||
The best option is install the packages that the Linux distribution have in its repository. If this is the case:
|
||||
|
||||
```bash
|
||||
sudo dnf install boost-devel
|
||||
```
|
||||
|
||||
If this is not possible and the compressed packaged is installed, the following environment variable has to be set pointing to the folder where it was unzipped to:
|
||||
|
||||
```bash
|
||||
export BOOST_ROOT=/path/to/library/
|
||||
```
|
||||
|
||||
In some cases, it is needed to build the library, to do so:
|
||||
|
||||
```bash
|
||||
cd /path/to/library
|
||||
mkdir own
|
||||
./bootstrap.sh --prefix=/path/to/library/own
|
||||
./b2 install
|
||||
export BOOST_ROOT=/path/to/library/own/
|
||||
```
|
||||
|
||||
Don't forget to add the export BOOST_ROOT statement to .bashrc or wherever it is meant to be.
|
||||
|
||||
### libxlswriter
|
||||
|
||||
```bash
|
||||
cd lib/libxlsxwriter
|
||||
make
|
||||
make install DESTDIR=/home/rmontanana/Code PREFIX=
|
||||
```
|
||||
|
||||
Environment variable has to be set:
|
||||
|
||||
```bash
|
||||
export LD_LIBRARY_PATH=/usr/local/lib
|
||||
```
|
||||
Bayesian Network Classifiers using libtorch from scratch
|
||||
|
||||
### Release
|
||||
|
||||
```bash
|
||||
make release
|
||||
make buildr
|
||||
```
|
||||
|
||||
### Debug & Tests
|
||||
|
||||
```bash
|
||||
make debug
|
||||
make test
|
||||
make coverage
|
||||
```
|
||||
|
||||
## 1. Introduction
|
||||
### Sample app
|
||||
|
||||
```bash
|
||||
make release
|
||||
make sample
|
||||
make sample fname=tests/data/glass.arff
|
||||
```
|
||||
|
||||
## Models
|
||||
|
||||
### [BoostAODE](docs/BoostAODE.md)
|
||||
|
@@ -7,7 +7,8 @@
|
||||
#define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR @
|
||||
#define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH @
|
||||
|
||||
static constexpr std::string_view project_name = " @PROJECT_NAME@ ";
|
||||
static constexpr std::string_view project_name = "@PROJECT_NAME@";
|
||||
static constexpr std::string_view project_version = "@PROJECT_VERSION@";
|
||||
static constexpr std::string_view project_description = "@PROJECT_DESCRIPTION@";
|
||||
static constexpr std::string_view git_sha = "@GIT_SHA@";
|
||||
static constexpr std::string_view data_path = "@BayesNet_SOURCE_DIR@/tests/data/";
|
@@ -1,25 +0,0 @@
|
||||
Type Si
|
||||
Type Fe
|
||||
Type RI
|
||||
Type Na
|
||||
Type Ba
|
||||
Type Ca
|
||||
Type Al
|
||||
Type K
|
||||
Type Mg
|
||||
Fe RI
|
||||
Fe Ba
|
||||
Fe Ca
|
||||
RI Na
|
||||
RI Ba
|
||||
RI Ca
|
||||
RI Al
|
||||
RI K
|
||||
RI Mg
|
||||
Ba Ca
|
||||
Ba Al
|
||||
Ca Al
|
||||
Ca K
|
||||
Ca Mg
|
||||
Al K
|
||||
K Mg
|
@@ -1,645 +0,0 @@
|
||||
class att215
|
||||
class att25
|
||||
class att131
|
||||
class att95
|
||||
class att122
|
||||
class att17
|
||||
class att28
|
||||
class att5
|
||||
class att121
|
||||
class att214
|
||||
class att197
|
||||
class att116
|
||||
class att182
|
||||
class att60
|
||||
class att168
|
||||
class att178
|
||||
class att206
|
||||
class att89
|
||||
class att77
|
||||
class att209
|
||||
class att73
|
||||
class att126
|
||||
class att16
|
||||
class att74
|
||||
class att27
|
||||
class att61
|
||||
class att20
|
||||
class att101
|
||||
class att85
|
||||
class att76
|
||||
class att137
|
||||
class att211
|
||||
class att143
|
||||
class att14
|
||||
class att40
|
||||
class att210
|
||||
class att155
|
||||
class att170
|
||||
class att160
|
||||
class att23
|
||||
class att162
|
||||
class att203
|
||||
class att164
|
||||
class att107
|
||||
class att62
|
||||
class att42
|
||||
class att71
|
||||
class att128
|
||||
class att138
|
||||
class att83
|
||||
class att171
|
||||
class att92
|
||||
class att163
|
||||
class att49
|
||||
class att161
|
||||
class att158
|
||||
class att176
|
||||
class att11
|
||||
class att145
|
||||
class att4
|
||||
class att172
|
||||
class att196
|
||||
class att58
|
||||
class att68
|
||||
class att169
|
||||
class att80
|
||||
class att32
|
||||
class att175
|
||||
class att87
|
||||
class att88
|
||||
class att159
|
||||
class att18
|
||||
class att52
|
||||
class att98
|
||||
class att136
|
||||
class att150
|
||||
class att156
|
||||
class att110
|
||||
class att100
|
||||
class att63
|
||||
class att148
|
||||
class att90
|
||||
class att167
|
||||
class att35
|
||||
class att205
|
||||
class att51
|
||||
class att21
|
||||
class att142
|
||||
class att46
|
||||
class att134
|
||||
class att39
|
||||
class att102
|
||||
class att208
|
||||
class att130
|
||||
class att149
|
||||
class att96
|
||||
class att75
|
||||
class att118
|
||||
class att78
|
||||
class att213
|
||||
class att112
|
||||
class att38
|
||||
class att174
|
||||
class att189
|
||||
class att70
|
||||
class att179
|
||||
class att59
|
||||
class att79
|
||||
class att15
|
||||
class att47
|
||||
class att124
|
||||
class att34
|
||||
class att54
|
||||
class att191
|
||||
class att86
|
||||
class att56
|
||||
class att151
|
||||
class att66
|
||||
class att173
|
||||
class att44
|
||||
class att198
|
||||
class att139
|
||||
class att216
|
||||
class att129
|
||||
class att152
|
||||
class att69
|
||||
class att81
|
||||
class att50
|
||||
class att153
|
||||
class att41
|
||||
class att204
|
||||
class att188
|
||||
class att26
|
||||
class att13
|
||||
class att117
|
||||
class att114
|
||||
class att10
|
||||
class att64
|
||||
class att200
|
||||
class att9
|
||||
class att3
|
||||
class att119
|
||||
class att45
|
||||
class att104
|
||||
class att140
|
||||
class att30
|
||||
class att183
|
||||
class att146
|
||||
class att141
|
||||
class att202
|
||||
class att194
|
||||
class att24
|
||||
class att147
|
||||
class att8
|
||||
class att212
|
||||
class att123
|
||||
class att166
|
||||
class att187
|
||||
class att127
|
||||
class att190
|
||||
class att105
|
||||
class att106
|
||||
class att184
|
||||
class att82
|
||||
class att2
|
||||
class att135
|
||||
class att154
|
||||
class att111
|
||||
class att115
|
||||
class att99
|
||||
class att22
|
||||
class att84
|
||||
class att207
|
||||
class att94
|
||||
class att177
|
||||
class att103
|
||||
class att93
|
||||
class att201
|
||||
class att43
|
||||
class att36
|
||||
class att12
|
||||
class att125
|
||||
class att165
|
||||
class att180
|
||||
class att195
|
||||
class att157
|
||||
class att48
|
||||
class att6
|
||||
class att113
|
||||
class att193
|
||||
class att91
|
||||
class att72
|
||||
class att31
|
||||
class att132
|
||||
class att33
|
||||
class att57
|
||||
class att144
|
||||
class att192
|
||||
class att185
|
||||
class att37
|
||||
class att53
|
||||
class att120
|
||||
class att186
|
||||
class att199
|
||||
class att65
|
||||
class att108
|
||||
class att133
|
||||
class att29
|
||||
class att19
|
||||
class att7
|
||||
class att97
|
||||
class att67
|
||||
class att55
|
||||
class att1
|
||||
class att109
|
||||
class att181
|
||||
att215 att25
|
||||
att215 att131
|
||||
att215 att95
|
||||
att25 att131
|
||||
att25 att121
|
||||
att25 att73
|
||||
att25 att61
|
||||
att25 att85
|
||||
att25 att169
|
||||
att25 att13
|
||||
att131 att95
|
||||
att131 att122
|
||||
att131 att17
|
||||
att131 att28
|
||||
att131 att121
|
||||
att131 att214
|
||||
att131 att116
|
||||
att131 att126
|
||||
att131 att143
|
||||
att95 att122
|
||||
att95 att17
|
||||
att95 att28
|
||||
att95 att5
|
||||
att95 att214
|
||||
att95 att116
|
||||
att95 att60
|
||||
att95 att143
|
||||
att95 att155
|
||||
att95 att71
|
||||
att122 att182
|
||||
att122 att170
|
||||
att17 att5
|
||||
att17 att197
|
||||
att17 att89
|
||||
att17 att77
|
||||
att17 att161
|
||||
att28 att206
|
||||
att28 att16
|
||||
att28 att76
|
||||
att28 att172
|
||||
att28 att124
|
||||
att28 att64
|
||||
att5 att197
|
||||
att5 att89
|
||||
att5 att209
|
||||
att121 att73
|
||||
att214 att178
|
||||
att214 att58
|
||||
att214 att142
|
||||
att197 att209
|
||||
att197 att101
|
||||
att116 att182
|
||||
att116 att60
|
||||
att116 att168
|
||||
att116 att178
|
||||
att116 att206
|
||||
att116 att126
|
||||
att116 att16
|
||||
att116 att27
|
||||
att116 att20
|
||||
att116 att211
|
||||
att116 att164
|
||||
att116 att128
|
||||
att182 att27
|
||||
att182 att14
|
||||
att60 att168
|
||||
att60 att156
|
||||
att168 att156
|
||||
att168 att96
|
||||
att178 att20
|
||||
att178 att58
|
||||
att178 att142
|
||||
att178 att130
|
||||
att206 att74
|
||||
att206 att170
|
||||
att206 att158
|
||||
att89 att77
|
||||
att89 att137
|
||||
att89 att149
|
||||
att89 att173
|
||||
att77 att137
|
||||
att77 att161
|
||||
att209 att101
|
||||
att209 att41
|
||||
att73 att61
|
||||
att73 att157
|
||||
att126 att162
|
||||
att126 att138
|
||||
att126 att150
|
||||
att16 att74
|
||||
att16 att76
|
||||
att16 att40
|
||||
att16 att4
|
||||
att74 att14
|
||||
att74 att62
|
||||
att27 att171
|
||||
att61 att85
|
||||
att61 att169
|
||||
att20 att211
|
||||
att20 att210
|
||||
att20 att164
|
||||
att20 att176
|
||||
att101 att41
|
||||
att85 att13
|
||||
att76 att40
|
||||
att76 att160
|
||||
att137 att149
|
||||
att211 att210
|
||||
att211 att162
|
||||
att211 att171
|
||||
att211 att163
|
||||
att211 att175
|
||||
att211 att79
|
||||
att143 att155
|
||||
att143 att23
|
||||
att143 att71
|
||||
att143 att83
|
||||
att143 att11
|
||||
att14 att98
|
||||
att40 att160
|
||||
att40 att4
|
||||
att40 att196
|
||||
att40 att52
|
||||
att210 att42
|
||||
att210 att114
|
||||
att155 att23
|
||||
att155 att203
|
||||
att155 att107
|
||||
att155 att11
|
||||
att170 att158
|
||||
att160 att52
|
||||
att23 att203
|
||||
att162 att138
|
||||
att162 att18
|
||||
att162 att150
|
||||
att162 att90
|
||||
att162 att174
|
||||
att203 att107
|
||||
att203 att49
|
||||
att203 att59
|
||||
att203 att191
|
||||
att203 att119
|
||||
att164 att62
|
||||
att164 att42
|
||||
att164 att128
|
||||
att164 att92
|
||||
att164 att163
|
||||
att164 att176
|
||||
att164 att145
|
||||
att164 att68
|
||||
att164 att80
|
||||
att164 att98
|
||||
att164 att110
|
||||
att164 att205
|
||||
att164 att21
|
||||
att164 att213
|
||||
att164 att112
|
||||
att164 att38
|
||||
att164 att56
|
||||
att164 att44
|
||||
att107 att59
|
||||
att107 att47
|
||||
att107 att191
|
||||
att71 att83
|
||||
att71 att167
|
||||
att71 att35
|
||||
att128 att92
|
||||
att138 att18
|
||||
att83 att167
|
||||
att171 att87
|
||||
att171 att159
|
||||
att171 att63
|
||||
att171 att51
|
||||
att171 att39
|
||||
att171 att75
|
||||
att163 att49
|
||||
att163 att175
|
||||
att163 att87
|
||||
att163 att79
|
||||
att163 att151
|
||||
att163 att139
|
||||
att163 att187
|
||||
att163 att91
|
||||
att161 att173
|
||||
att176 att145
|
||||
att176 att172
|
||||
att176 att68
|
||||
att176 att80
|
||||
att176 att32
|
||||
att176 att110
|
||||
att176 att205
|
||||
att176 att21
|
||||
att176 att134
|
||||
att176 att56
|
||||
att4 att196
|
||||
att4 att88
|
||||
att4 att136
|
||||
att4 att100
|
||||
att4 att148
|
||||
att4 att208
|
||||
att172 att112
|
||||
att172 att184
|
||||
att196 att88
|
||||
att196 att136
|
||||
att196 att100
|
||||
att196 att208
|
||||
att58 att46
|
||||
att68 att32
|
||||
att32 att200
|
||||
att87 att159
|
||||
att87 att63
|
||||
att87 att75
|
||||
att87 att15
|
||||
att87 att99
|
||||
att159 att195
|
||||
att18 att90
|
||||
att18 att102
|
||||
att18 att78
|
||||
att18 att198
|
||||
att52 att124
|
||||
att98 att86
|
||||
att150 att174
|
||||
att150 att66
|
||||
att156 att96
|
||||
att156 att216
|
||||
att156 att204
|
||||
att156 att24
|
||||
att156 att84
|
||||
att100 att148
|
||||
att63 att51
|
||||
att63 att3
|
||||
att63 att183
|
||||
att90 att102
|
||||
att90 att78
|
||||
att167 att35
|
||||
att167 att179
|
||||
att35 att179
|
||||
att51 att39
|
||||
att51 att3
|
||||
att21 att134
|
||||
att21 att213
|
||||
att21 att38
|
||||
att21 att189
|
||||
att21 att129
|
||||
att21 att81
|
||||
att21 att117
|
||||
att21 att9
|
||||
att142 att46
|
||||
att142 att130
|
||||
att142 att118
|
||||
att142 att10
|
||||
att142 att202
|
||||
att142 att190
|
||||
att142 att106
|
||||
att46 att70
|
||||
att46 att34
|
||||
att46 att166
|
||||
att134 att2
|
||||
att102 att54
|
||||
att130 att118
|
||||
att130 att10
|
||||
att130 att202
|
||||
att149 att125
|
||||
att96 att216
|
||||
att96 att24
|
||||
att75 att15
|
||||
att75 att99
|
||||
att118 att70
|
||||
att78 att198
|
||||
att213 att189
|
||||
att38 att50
|
||||
att38 att26
|
||||
att174 att54
|
||||
att174 att66
|
||||
att174 att30
|
||||
att189 att86
|
||||
att189 att129
|
||||
att189 att69
|
||||
att189 att81
|
||||
att189 att153
|
||||
att189 att117
|
||||
att189 att9
|
||||
att189 att45
|
||||
att189 att105
|
||||
att70 att34
|
||||
att59 att47
|
||||
att79 att151
|
||||
att79 att139
|
||||
att79 att187
|
||||
att79 att127
|
||||
att79 att103
|
||||
att79 att43
|
||||
att79 att91
|
||||
att79 att19
|
||||
att124 att64
|
||||
att54 att114
|
||||
att54 att30
|
||||
att191 att119
|
||||
att86 att194
|
||||
att56 att44
|
||||
att56 att152
|
||||
att56 att50
|
||||
att56 att188
|
||||
att56 att26
|
||||
att56 att104
|
||||
att56 att140
|
||||
att56 att146
|
||||
att56 att194
|
||||
att56 att8
|
||||
att56 att2
|
||||
att56 att133
|
||||
att56 att1
|
||||
att173 att125
|
||||
att173 att113
|
||||
att44 att152
|
||||
att44 att188
|
||||
att44 att200
|
||||
att44 att212
|
||||
att44 att1
|
||||
att139 att103
|
||||
att139 att43
|
||||
att139 att31
|
||||
att139 att199
|
||||
att139 att7
|
||||
att216 att204
|
||||
att216 att36
|
||||
att216 att12
|
||||
att216 att180
|
||||
att216 att108
|
||||
att129 att69
|
||||
att152 att140
|
||||
att69 att153
|
||||
att81 att45
|
||||
att153 att141
|
||||
att41 att53
|
||||
att204 att12
|
||||
att13 att157
|
||||
att114 att6
|
||||
att114 att186
|
||||
att10 att190
|
||||
att64 att184
|
||||
att200 att104
|
||||
att9 att146
|
||||
att9 att141
|
||||
att9 att177
|
||||
att9 att37
|
||||
att9 att133
|
||||
att9 att109
|
||||
att9 att181
|
||||
att3 att183
|
||||
att3 att147
|
||||
att3 att123
|
||||
att3 att135
|
||||
att3 att111
|
||||
att45 att105
|
||||
att45 att177
|
||||
att45 att93
|
||||
att45 att201
|
||||
att45 att193
|
||||
att45 att37
|
||||
att45 att97
|
||||
att140 att8
|
||||
att30 att6
|
||||
att183 att147
|
||||
att183 att123
|
||||
att202 att166
|
||||
att202 att106
|
||||
att202 att82
|
||||
att24 att84
|
||||
att24 att36
|
||||
att147 att135
|
||||
att8 att212
|
||||
att166 att82
|
||||
att187 att127
|
||||
att187 att115
|
||||
att127 att115
|
||||
att105 att93
|
||||
att106 att154
|
||||
att82 att154
|
||||
att82 att22
|
||||
att135 att111
|
||||
att135 att207
|
||||
att154 att22
|
||||
att154 att94
|
||||
att111 att207
|
||||
att22 att94
|
||||
att84 att48
|
||||
att177 att165
|
||||
att103 att195
|
||||
att103 att109
|
||||
att93 att201
|
||||
att93 att165
|
||||
att93 att193
|
||||
att93 att33
|
||||
att201 att33
|
||||
att201 att57
|
||||
att36 att180
|
||||
att36 att72
|
||||
att36 att132
|
||||
att36 att144
|
||||
att125 att113
|
||||
att125 att185
|
||||
att125 att65
|
||||
att125 att29
|
||||
att180 att48
|
||||
att180 att72
|
||||
att180 att192
|
||||
att180 att108
|
||||
att6 att186
|
||||
att113 att185
|
||||
att113 att53
|
||||
att193 att97
|
||||
att91 att31
|
||||
att91 att19
|
||||
att72 att132
|
||||
att72 att192
|
||||
att31 att199
|
||||
att31 att67
|
||||
att132 att144
|
||||
att132 att120
|
||||
att33 att57
|
||||
att144 att120
|
||||
att185 att65
|
||||
att199 att7
|
||||
att199 att67
|
||||
att199 att55
|
||||
att65 att29
|
||||
att67 att55
|
||||
att109 att181
|
@@ -1,859 +0,0 @@
|
||||
class att215
|
||||
class att25
|
||||
class att131
|
||||
class att95
|
||||
class att122
|
||||
class att17
|
||||
class att28
|
||||
class att5
|
||||
class att121
|
||||
class att214
|
||||
class att197
|
||||
class att116
|
||||
class att182
|
||||
class att60
|
||||
class att168
|
||||
class att178
|
||||
class att206
|
||||
class att89
|
||||
class att77
|
||||
class att209
|
||||
class att73
|
||||
class att126
|
||||
class att16
|
||||
class att74
|
||||
class att27
|
||||
class att61
|
||||
class att20
|
||||
class att101
|
||||
class att85
|
||||
class att76
|
||||
class att137
|
||||
class att211
|
||||
class att143
|
||||
class att14
|
||||
class att40
|
||||
class att210
|
||||
class att155
|
||||
class att170
|
||||
class att160
|
||||
class att23
|
||||
class att162
|
||||
class att203
|
||||
class att164
|
||||
class att107
|
||||
class att62
|
||||
class att42
|
||||
class att71
|
||||
class att128
|
||||
class att138
|
||||
class att83
|
||||
class att171
|
||||
class att92
|
||||
class att163
|
||||
class att49
|
||||
class att161
|
||||
class att158
|
||||
class att176
|
||||
class att11
|
||||
class att145
|
||||
class att4
|
||||
class att172
|
||||
class att196
|
||||
class att58
|
||||
class att68
|
||||
class att169
|
||||
class att80
|
||||
class att32
|
||||
class att175
|
||||
class att87
|
||||
class att88
|
||||
class att159
|
||||
class att18
|
||||
class att52
|
||||
class att98
|
||||
class att136
|
||||
class att150
|
||||
class att156
|
||||
class att110
|
||||
class att100
|
||||
class att63
|
||||
class att148
|
||||
class att90
|
||||
class att167
|
||||
class att35
|
||||
class att205
|
||||
class att51
|
||||
class att21
|
||||
class att142
|
||||
class att46
|
||||
class att134
|
||||
class att39
|
||||
class att102
|
||||
class att208
|
||||
class att130
|
||||
class att149
|
||||
class att96
|
||||
class att75
|
||||
class att118
|
||||
class att78
|
||||
class att213
|
||||
class att112
|
||||
class att38
|
||||
class att174
|
||||
class att189
|
||||
class att70
|
||||
class att179
|
||||
class att59
|
||||
class att79
|
||||
class att15
|
||||
class att47
|
||||
class att124
|
||||
class att34
|
||||
class att54
|
||||
class att191
|
||||
class att86
|
||||
class att56
|
||||
class att151
|
||||
class att66
|
||||
class att173
|
||||
class att44
|
||||
class att198
|
||||
class att139
|
||||
class att216
|
||||
class att129
|
||||
class att152
|
||||
class att69
|
||||
class att81
|
||||
class att50
|
||||
class att153
|
||||
class att41
|
||||
class att204
|
||||
class att188
|
||||
class att26
|
||||
class att13
|
||||
class att117
|
||||
class att114
|
||||
class att10
|
||||
class att64
|
||||
class att200
|
||||
class att9
|
||||
class att3
|
||||
class att119
|
||||
class att45
|
||||
class att104
|
||||
class att140
|
||||
class att30
|
||||
class att183
|
||||
class att146
|
||||
class att141
|
||||
class att202
|
||||
class att194
|
||||
class att24
|
||||
class att147
|
||||
class att8
|
||||
class att212
|
||||
class att123
|
||||
class att166
|
||||
class att187
|
||||
class att127
|
||||
class att190
|
||||
class att105
|
||||
class att106
|
||||
class att184
|
||||
class att82
|
||||
class att2
|
||||
class att135
|
||||
class att154
|
||||
class att111
|
||||
class att115
|
||||
class att99
|
||||
class att22
|
||||
class att84
|
||||
class att207
|
||||
class att94
|
||||
class att177
|
||||
class att103
|
||||
class att93
|
||||
class att201
|
||||
class att43
|
||||
class att36
|
||||
class att12
|
||||
class att125
|
||||
class att165
|
||||
class att180
|
||||
class att195
|
||||
class att157
|
||||
class att48
|
||||
class att6
|
||||
class att113
|
||||
class att193
|
||||
class att91
|
||||
class att72
|
||||
class att31
|
||||
class att132
|
||||
class att33
|
||||
class att57
|
||||
class att144
|
||||
class att192
|
||||
class att185
|
||||
class att37
|
||||
class att53
|
||||
class att120
|
||||
class att186
|
||||
class att199
|
||||
class att65
|
||||
class att108
|
||||
class att133
|
||||
class att29
|
||||
class att19
|
||||
class att7
|
||||
class att97
|
||||
class att67
|
||||
class att55
|
||||
class att1
|
||||
class att109
|
||||
class att181
|
||||
att215 att25
|
||||
att215 att131
|
||||
att215 att95
|
||||
att215 att17
|
||||
att215 att214
|
||||
att215 att143
|
||||
att25 att131
|
||||
att25 att95
|
||||
att25 att122
|
||||
att25 att121
|
||||
att25 att73
|
||||
att25 att61
|
||||
att25 att85
|
||||
att25 att169
|
||||
att25 att13
|
||||
att25 att157
|
||||
att131 att95
|
||||
att131 att122
|
||||
att131 att17
|
||||
att131 att28
|
||||
att131 att5
|
||||
att131 att121
|
||||
att131 att214
|
||||
att131 att116
|
||||
att131 att182
|
||||
att131 att60
|
||||
att131 att126
|
||||
att131 att16
|
||||
att131 att27
|
||||
att131 att20
|
||||
att131 att143
|
||||
att131 att155
|
||||
att95 att122
|
||||
att95 att17
|
||||
att95 att28
|
||||
att95 att5
|
||||
att95 att121
|
||||
att95 att214
|
||||
att95 att197
|
||||
att95 att116
|
||||
att95 att60
|
||||
att95 att168
|
||||
att95 att178
|
||||
att95 att143
|
||||
att95 att155
|
||||
att95 att23
|
||||
att95 att71
|
||||
att95 att167
|
||||
att122 att28
|
||||
att122 att182
|
||||
att122 att170
|
||||
att17 att5
|
||||
att17 att197
|
||||
att17 att89
|
||||
att17 att77
|
||||
att17 att209
|
||||
att17 att137
|
||||
att17 att161
|
||||
att17 att41
|
||||
att28 att206
|
||||
att28 att16
|
||||
att28 att76
|
||||
att28 att40
|
||||
att28 att210
|
||||
att28 att160
|
||||
att28 att172
|
||||
att28 att124
|
||||
att28 att64
|
||||
att5 att197
|
||||
att5 att89
|
||||
att5 att77
|
||||
att5 att209
|
||||
att5 att101
|
||||
att121 att73
|
||||
att121 att61
|
||||
att214 att116
|
||||
att214 att178
|
||||
att214 att206
|
||||
att214 att58
|
||||
att214 att142
|
||||
att214 att46
|
||||
att197 att89
|
||||
att197 att209
|
||||
att197 att101
|
||||
att116 att182
|
||||
att116 att60
|
||||
att116 att168
|
||||
att116 att178
|
||||
att116 att206
|
||||
att116 att73
|
||||
att116 att126
|
||||
att116 att16
|
||||
att116 att74
|
||||
att116 att27
|
||||
att116 att20
|
||||
att116 att211
|
||||
att116 att164
|
||||
att116 att128
|
||||
att116 att92
|
||||
att116 att176
|
||||
att116 att68
|
||||
att182 att27
|
||||
att182 att14
|
||||
att60 att168
|
||||
att60 att156
|
||||
att60 att96
|
||||
att168 att126
|
||||
att168 att156
|
||||
att168 att96
|
||||
att168 att216
|
||||
att178 att20
|
||||
att178 att211
|
||||
att178 att58
|
||||
att178 att142
|
||||
att178 att130
|
||||
att178 att166
|
||||
att206 att74
|
||||
att206 att170
|
||||
att206 att158
|
||||
att89 att77
|
||||
att89 att137
|
||||
att89 att149
|
||||
att89 att173
|
||||
att77 att137
|
||||
att77 att161
|
||||
att77 att149
|
||||
att209 att101
|
||||
att209 att41
|
||||
att73 att61
|
||||
att73 att85
|
||||
att73 att13
|
||||
att73 att157
|
||||
att126 att162
|
||||
att126 att138
|
||||
att126 att18
|
||||
att126 att150
|
||||
att16 att74
|
||||
att16 att76
|
||||
att16 att40
|
||||
att16 att4
|
||||
att16 att196
|
||||
att16 att136
|
||||
att74 att14
|
||||
att74 att62
|
||||
att27 att171
|
||||
att27 att63
|
||||
att61 att85
|
||||
att61 att169
|
||||
att20 att76
|
||||
att20 att211
|
||||
att20 att210
|
||||
att20 att170
|
||||
att20 att164
|
||||
att20 att128
|
||||
att20 att176
|
||||
att20 att80
|
||||
att101 att41
|
||||
att85 att169
|
||||
att85 att13
|
||||
att76 att14
|
||||
att76 att40
|
||||
att76 att160
|
||||
att76 att4
|
||||
att76 att52
|
||||
att137 att161
|
||||
att137 att149
|
||||
att137 att173
|
||||
att137 att125
|
||||
att211 att210
|
||||
att211 att162
|
||||
att211 att164
|
||||
att211 att62
|
||||
att211 att42
|
||||
att211 att171
|
||||
att211 att163
|
||||
att211 att175
|
||||
att211 att79
|
||||
att211 att151
|
||||
att211 att43
|
||||
att143 att155
|
||||
att143 att23
|
||||
att143 att203
|
||||
att143 att71
|
||||
att143 att83
|
||||
att143 att11
|
||||
att14 att98
|
||||
att40 att160
|
||||
att40 att4
|
||||
att40 att196
|
||||
att40 att88
|
||||
att40 att52
|
||||
att210 att162
|
||||
att210 att42
|
||||
att210 att114
|
||||
att155 att23
|
||||
att155 att203
|
||||
att155 att107
|
||||
att155 att11
|
||||
att170 att158
|
||||
att160 att52
|
||||
att160 att124
|
||||
att23 att203
|
||||
att23 att107
|
||||
att23 att71
|
||||
att23 att11
|
||||
att162 att138
|
||||
att162 att18
|
||||
att162 att150
|
||||
att162 att90
|
||||
att162 att102
|
||||
att162 att174
|
||||
att162 att66
|
||||
att203 att107
|
||||
att203 att49
|
||||
att203 att59
|
||||
att203 att47
|
||||
att203 att191
|
||||
att203 att119
|
||||
att164 att62
|
||||
att164 att42
|
||||
att164 att128
|
||||
att164 att171
|
||||
att164 att92
|
||||
att164 att163
|
||||
att164 att158
|
||||
att164 att176
|
||||
att164 att145
|
||||
att164 att172
|
||||
att164 att58
|
||||
att164 att68
|
||||
att164 att80
|
||||
att164 att32
|
||||
att164 att98
|
||||
att164 att156
|
||||
att164 att110
|
||||
att164 att205
|
||||
att164 att21
|
||||
att164 att134
|
||||
att164 att213
|
||||
att164 att112
|
||||
att164 att38
|
||||
att164 att189
|
||||
att164 att56
|
||||
att164 att44
|
||||
att164 att152
|
||||
att164 att8
|
||||
att107 att83
|
||||
att107 att49
|
||||
att107 att59
|
||||
att107 att47
|
||||
att107 att191
|
||||
att42 att138
|
||||
att42 att54
|
||||
att42 att114
|
||||
att71 att83
|
||||
att71 att167
|
||||
att71 att35
|
||||
att71 att179
|
||||
att128 att92
|
||||
att128 att112
|
||||
att138 att18
|
||||
att138 att150
|
||||
att83 att167
|
||||
att83 att35
|
||||
att171 att87
|
||||
att171 att159
|
||||
att171 att63
|
||||
att171 att51
|
||||
att171 att39
|
||||
att171 att75
|
||||
att92 att163
|
||||
att92 att145
|
||||
att92 att56
|
||||
att163 att49
|
||||
att163 att175
|
||||
att163 att87
|
||||
att163 att79
|
||||
att163 att151
|
||||
att163 att139
|
||||
att163 att187
|
||||
att163 att127
|
||||
att163 att103
|
||||
att163 att91
|
||||
att49 att37
|
||||
att161 att173
|
||||
att161 att113
|
||||
att176 att145
|
||||
att176 att172
|
||||
att176 att68
|
||||
att176 att80
|
||||
att176 att32
|
||||
att176 att175
|
||||
att176 att98
|
||||
att176 att110
|
||||
att176 att205
|
||||
att176 att21
|
||||
att176 att134
|
||||
att176 att213
|
||||
att176 att56
|
||||
att4 att196
|
||||
att4 att88
|
||||
att4 att136
|
||||
att4 att100
|
||||
att4 att148
|
||||
att4 att208
|
||||
att172 att112
|
||||
att172 att184
|
||||
att196 att88
|
||||
att196 att136
|
||||
att196 att100
|
||||
att196 att148
|
||||
att196 att208
|
||||
att58 att142
|
||||
att58 att46
|
||||
att58 att34
|
||||
att68 att32
|
||||
att80 att38
|
||||
att32 att110
|
||||
att32 att21
|
||||
att32 att44
|
||||
att32 att200
|
||||
att175 att87
|
||||
att175 att159
|
||||
att175 att79
|
||||
att175 att187
|
||||
att175 att115
|
||||
att87 att159
|
||||
att87 att63
|
||||
att87 att51
|
||||
att87 att75
|
||||
att87 att15
|
||||
att87 att99
|
||||
att159 att75
|
||||
att159 att15
|
||||
att159 att195
|
||||
att18 att90
|
||||
att18 att102
|
||||
att18 att78
|
||||
att18 att198
|
||||
att52 att124
|
||||
att52 att64
|
||||
att98 att86
|
||||
att136 att100
|
||||
att136 att208
|
||||
att150 att90
|
||||
att150 att174
|
||||
att150 att66
|
||||
att156 att205
|
||||
att156 att96
|
||||
att156 att216
|
||||
att156 att204
|
||||
att156 att24
|
||||
att156 att84
|
||||
att156 att36
|
||||
att156 att12
|
||||
att156 att108
|
||||
att100 att148
|
||||
att63 att51
|
||||
att63 att39
|
||||
att63 att3
|
||||
att63 att183
|
||||
att63 att147
|
||||
att90 att102
|
||||
att90 att78
|
||||
att167 att35
|
||||
att167 att179
|
||||
att35 att179
|
||||
att51 att39
|
||||
att51 att3
|
||||
att51 att183
|
||||
att21 att134
|
||||
att21 att213
|
||||
att21 att38
|
||||
att21 att189
|
||||
att21 att129
|
||||
att21 att81
|
||||
att21 att153
|
||||
att21 att117
|
||||
att21 att9
|
||||
att142 att46
|
||||
att142 att130
|
||||
att142 att118
|
||||
att142 att70
|
||||
att142 att10
|
||||
att142 att202
|
||||
att142 att190
|
||||
att142 att106
|
||||
att46 att130
|
||||
att46 att118
|
||||
att46 att70
|
||||
att46 att34
|
||||
att46 att166
|
||||
att46 att82
|
||||
att134 att2
|
||||
att39 att3
|
||||
att102 att78
|
||||
att102 att174
|
||||
att102 att54
|
||||
att102 att198
|
||||
att130 att118
|
||||
att130 att10
|
||||
att130 att202
|
||||
att130 att190
|
||||
att130 att106
|
||||
att149 att125
|
||||
att96 att216
|
||||
att96 att204
|
||||
att96 att24
|
||||
att75 att15
|
||||
att75 att99
|
||||
att118 att70
|
||||
att118 att10
|
||||
att118 att202
|
||||
att78 att198
|
||||
att213 att189
|
||||
att213 att129
|
||||
att213 att69
|
||||
att213 att81
|
||||
att38 att50
|
||||
att38 att26
|
||||
att174 att54
|
||||
att174 att66
|
||||
att174 att30
|
||||
att189 att86
|
||||
att189 att129
|
||||
att189 att69
|
||||
att189 att81
|
||||
att189 att153
|
||||
att189 att117
|
||||
att189 att9
|
||||
att189 att45
|
||||
att189 att141
|
||||
att189 att105
|
||||
att70 att34
|
||||
att70 att154
|
||||
att179 att59
|
||||
att59 att47
|
||||
att59 att191
|
||||
att59 att119
|
||||
att79 att86
|
||||
att79 att151
|
||||
att79 att139
|
||||
att79 att187
|
||||
att79 att127
|
||||
att79 att103
|
||||
att79 att43
|
||||
att79 att193
|
||||
att79 att91
|
||||
att79 att19
|
||||
att124 att64
|
||||
att54 att114
|
||||
att54 att30
|
||||
att54 att6
|
||||
att191 att119
|
||||
att86 att194
|
||||
att56 att44
|
||||
att56 att152
|
||||
att56 att50
|
||||
att56 att188
|
||||
att56 att26
|
||||
att56 att200
|
||||
att56 att104
|
||||
att56 att140
|
||||
att56 att146
|
||||
att56 att194
|
||||
att56 att8
|
||||
att56 att2
|
||||
att56 att133
|
||||
att56 att1
|
||||
att151 att139
|
||||
att66 att30
|
||||
att173 att125
|
||||
att173 att113
|
||||
att173 att185
|
||||
att44 att152
|
||||
att44 att50
|
||||
att44 att188
|
||||
att44 att200
|
||||
att44 att104
|
||||
att44 att140
|
||||
att44 att194
|
||||
att44 att212
|
||||
att44 att1
|
||||
att139 att26
|
||||
att139 att99
|
||||
att139 att103
|
||||
att139 att43
|
||||
att139 att91
|
||||
att139 att31
|
||||
att139 att199
|
||||
att139 att7
|
||||
att216 att204
|
||||
att216 att24
|
||||
att216 att84
|
||||
att216 att36
|
||||
att216 att12
|
||||
att216 att180
|
||||
att216 att108
|
||||
att129 att69
|
||||
att152 att188
|
||||
att152 att140
|
||||
att69 att153
|
||||
att69 att9
|
||||
att69 att177
|
||||
att81 att45
|
||||
att81 att105
|
||||
att153 att117
|
||||
att153 att141
|
||||
att41 att53
|
||||
att204 att12
|
||||
att204 att180
|
||||
att188 att146
|
||||
att188 att212
|
||||
att13 att157
|
||||
att114 att6
|
||||
att114 att186
|
||||
att10 att190
|
||||
att64 att184
|
||||
att200 att104
|
||||
att9 att45
|
||||
att9 att146
|
||||
att9 att141
|
||||
att9 att177
|
||||
att9 att37
|
||||
att9 att133
|
||||
att9 att109
|
||||
att9 att181
|
||||
att3 att183
|
||||
att3 att147
|
||||
att3 att123
|
||||
att3 att135
|
||||
att3 att111
|
||||
att45 att105
|
||||
att45 att177
|
||||
att45 att93
|
||||
att45 att201
|
||||
att45 att165
|
||||
att45 att193
|
||||
att45 att33
|
||||
att45 att37
|
||||
att45 att133
|
||||
att45 att97
|
||||
att140 att8
|
||||
att30 att6
|
||||
att30 att186
|
||||
att183 att147
|
||||
att183 att123
|
||||
att183 att135
|
||||
att146 att2
|
||||
att202 att166
|
||||
att202 att106
|
||||
att202 att82
|
||||
att24 att84
|
||||
att24 att36
|
||||
att24 att132
|
||||
att147 att123
|
||||
att147 att135
|
||||
att147 att111
|
||||
att147 att207
|
||||
att8 att212
|
||||
att166 att82
|
||||
att166 att22
|
||||
att166 att94
|
||||
att187 att127
|
||||
att187 att115
|
||||
att127 att115
|
||||
att105 att184
|
||||
att105 att93
|
||||
att105 att201
|
||||
att106 att154
|
||||
att82 att154
|
||||
att82 att22
|
||||
att135 att111
|
||||
att135 att207
|
||||
att154 att22
|
||||
att154 att94
|
||||
att111 att207
|
||||
att99 att195
|
||||
att22 att94
|
||||
att84 att48
|
||||
att177 att93
|
||||
att177 att165
|
||||
att177 att181
|
||||
att103 att195
|
||||
att103 att97
|
||||
att103 att109
|
||||
att93 att201
|
||||
att93 att165
|
||||
att93 att193
|
||||
att93 att33
|
||||
att93 att57
|
||||
att201 att33
|
||||
att201 att57
|
||||
att43 att31
|
||||
att36 att180
|
||||
att36 att48
|
||||
att36 att72
|
||||
att36 att132
|
||||
att36 att144
|
||||
att125 att113
|
||||
att125 att185
|
||||
att125 att65
|
||||
att125 att29
|
||||
att180 att48
|
||||
att180 att72
|
||||
att180 att192
|
||||
att180 att108
|
||||
att48 att72
|
||||
att6 att186
|
||||
att113 att185
|
||||
att113 att53
|
||||
att113 att65
|
||||
att193 att97
|
||||
att91 att31
|
||||
att91 att199
|
||||
att91 att19
|
||||
att72 att132
|
||||
att72 att144
|
||||
att72 att192
|
||||
att72 att120
|
||||
att31 att199
|
||||
att31 att7
|
||||
att31 att67
|
||||
att31 att55
|
||||
att31 att1
|
||||
att132 att144
|
||||
att132 att120
|
||||
att33 att57
|
||||
att144 att192
|
||||
att144 att120
|
||||
att185 att53
|
||||
att185 att65
|
||||
att185 att29
|
||||
att199 att19
|
||||
att199 att7
|
||||
att199 att67
|
||||
att199 att55
|
||||
att199 att109
|
||||
att65 att29
|
||||
att7 att67
|
||||
att67 att55
|
||||
att109 att181
|
||||
|
@@ -1,859 +0,0 @@
|
||||
class att215
|
||||
class att25
|
||||
class att131
|
||||
class att95
|
||||
class att122
|
||||
class att17
|
||||
class att28
|
||||
class att5
|
||||
class att121
|
||||
class att214
|
||||
class att197
|
||||
class att116
|
||||
class att182
|
||||
class att60
|
||||
class att168
|
||||
class att178
|
||||
class att206
|
||||
class att89
|
||||
class att77
|
||||
class att209
|
||||
class att73
|
||||
class att126
|
||||
class att16
|
||||
class att74
|
||||
class att27
|
||||
class att61
|
||||
class att20
|
||||
class att101
|
||||
class att85
|
||||
class att76
|
||||
class att137
|
||||
class att211
|
||||
class att143
|
||||
class att14
|
||||
class att40
|
||||
class att210
|
||||
class att155
|
||||
class att170
|
||||
class att160
|
||||
class att23
|
||||
class att162
|
||||
class att203
|
||||
class att164
|
||||
class att107
|
||||
class att62
|
||||
class att42
|
||||
class att71
|
||||
class att128
|
||||
class att138
|
||||
class att83
|
||||
class att171
|
||||
class att92
|
||||
class att163
|
||||
class att49
|
||||
class att161
|
||||
class att158
|
||||
class att176
|
||||
class att11
|
||||
class att145
|
||||
class att4
|
||||
class att172
|
||||
class att196
|
||||
class att58
|
||||
class att68
|
||||
class att169
|
||||
class att80
|
||||
class att32
|
||||
class att175
|
||||
class att87
|
||||
class att88
|
||||
class att159
|
||||
class att18
|
||||
class att52
|
||||
class att98
|
||||
class att136
|
||||
class att150
|
||||
class att156
|
||||
class att110
|
||||
class att100
|
||||
class att63
|
||||
class att148
|
||||
class att90
|
||||
class att167
|
||||
class att35
|
||||
class att205
|
||||
class att51
|
||||
class att21
|
||||
class att142
|
||||
class att46
|
||||
class att134
|
||||
class att39
|
||||
class att102
|
||||
class att208
|
||||
class att130
|
||||
class att149
|
||||
class att96
|
||||
class att75
|
||||
class att118
|
||||
class att78
|
||||
class att213
|
||||
class att112
|
||||
class att38
|
||||
class att174
|
||||
class att189
|
||||
class att70
|
||||
class att179
|
||||
class att59
|
||||
class att79
|
||||
class att15
|
||||
class att47
|
||||
class att124
|
||||
class att34
|
||||
class att54
|
||||
class att191
|
||||
class att86
|
||||
class att56
|
||||
class att151
|
||||
class att66
|
||||
class att173
|
||||
class att44
|
||||
class att198
|
||||
class att139
|
||||
class att216
|
||||
class att129
|
||||
class att152
|
||||
class att69
|
||||
class att81
|
||||
class att50
|
||||
class att153
|
||||
class att41
|
||||
class att204
|
||||
class att188
|
||||
class att26
|
||||
class att13
|
||||
class att117
|
||||
class att114
|
||||
class att10
|
||||
class att64
|
||||
class att200
|
||||
class att9
|
||||
class att3
|
||||
class att119
|
||||
class att45
|
||||
class att104
|
||||
class att140
|
||||
class att30
|
||||
class att183
|
||||
class att146
|
||||
class att141
|
||||
class att202
|
||||
class att194
|
||||
class att24
|
||||
class att147
|
||||
class att8
|
||||
class att212
|
||||
class att123
|
||||
class att166
|
||||
class att187
|
||||
class att127
|
||||
class att190
|
||||
class att105
|
||||
class att106
|
||||
class att184
|
||||
class att82
|
||||
class att2
|
||||
class att135
|
||||
class att154
|
||||
class att111
|
||||
class att115
|
||||
class att99
|
||||
class att22
|
||||
class att84
|
||||
class att207
|
||||
class att94
|
||||
class att177
|
||||
class att103
|
||||
class att93
|
||||
class att201
|
||||
class att43
|
||||
class att36
|
||||
class att12
|
||||
class att125
|
||||
class att165
|
||||
class att180
|
||||
class att195
|
||||
class att157
|
||||
class att48
|
||||
class att6
|
||||
class att113
|
||||
class att193
|
||||
class att91
|
||||
class att72
|
||||
class att31
|
||||
class att132
|
||||
class att33
|
||||
class att57
|
||||
class att144
|
||||
class att192
|
||||
class att185
|
||||
class att37
|
||||
class att53
|
||||
class att120
|
||||
class att186
|
||||
class att199
|
||||
class att65
|
||||
class att108
|
||||
class att133
|
||||
class att29
|
||||
class att19
|
||||
class att7
|
||||
class att97
|
||||
class att67
|
||||
class att55
|
||||
class att1
|
||||
class att109
|
||||
class att181
|
||||
att215 att25
|
||||
att215 att131
|
||||
att215 att95
|
||||
att215 att17
|
||||
att215 att214
|
||||
att215 att143
|
||||
att25 att131
|
||||
att25 att95
|
||||
att25 att122
|
||||
att25 att121
|
||||
att25 att73
|
||||
att25 att61
|
||||
att25 att85
|
||||
att25 att169
|
||||
att25 att13
|
||||
att25 att157
|
||||
att131 att95
|
||||
att131 att122
|
||||
att131 att17
|
||||
att131 att28
|
||||
att131 att5
|
||||
att131 att121
|
||||
att131 att214
|
||||
att131 att116
|
||||
att131 att182
|
||||
att131 att60
|
||||
att131 att126
|
||||
att131 att16
|
||||
att131 att27
|
||||
att131 att20
|
||||
att131 att143
|
||||
att131 att155
|
||||
att95 att122
|
||||
att95 att17
|
||||
att95 att28
|
||||
att95 att5
|
||||
att95 att121
|
||||
att95 att214
|
||||
att95 att197
|
||||
att95 att116
|
||||
att95 att60
|
||||
att95 att168
|
||||
att95 att178
|
||||
att95 att143
|
||||
att95 att155
|
||||
att95 att23
|
||||
att95 att71
|
||||
att95 att167
|
||||
att122 att28
|
||||
att122 att182
|
||||
att122 att170
|
||||
att17 att5
|
||||
att17 att197
|
||||
att17 att89
|
||||
att17 att77
|
||||
att17 att209
|
||||
att17 att137
|
||||
att17 att161
|
||||
att17 att41
|
||||
att28 att206
|
||||
att28 att16
|
||||
att28 att76
|
||||
att28 att40
|
||||
att28 att210
|
||||
att28 att160
|
||||
att28 att172
|
||||
att28 att124
|
||||
att28 att64
|
||||
att5 att197
|
||||
att5 att89
|
||||
att5 att77
|
||||
att5 att209
|
||||
att5 att101
|
||||
att121 att73
|
||||
att121 att61
|
||||
att214 att116
|
||||
att214 att178
|
||||
att214 att206
|
||||
att214 att58
|
||||
att214 att142
|
||||
att214 att46
|
||||
att197 att89
|
||||
att197 att209
|
||||
att197 att101
|
||||
att116 att182
|
||||
att116 att60
|
||||
att116 att168
|
||||
att116 att178
|
||||
att116 att206
|
||||
att116 att73
|
||||
att116 att126
|
||||
att116 att16
|
||||
att116 att74
|
||||
att116 att27
|
||||
att116 att20
|
||||
att116 att211
|
||||
att116 att164
|
||||
att116 att128
|
||||
att116 att92
|
||||
att116 att176
|
||||
att116 att68
|
||||
att182 att27
|
||||
att182 att14
|
||||
att60 att168
|
||||
att60 att156
|
||||
att60 att96
|
||||
att168 att126
|
||||
att168 att156
|
||||
att168 att96
|
||||
att168 att216
|
||||
att178 att20
|
||||
att178 att211
|
||||
att178 att58
|
||||
att178 att142
|
||||
att178 att130
|
||||
att178 att166
|
||||
att206 att74
|
||||
att206 att170
|
||||
att206 att158
|
||||
att89 att77
|
||||
att89 att137
|
||||
att89 att149
|
||||
att89 att173
|
||||
att77 att137
|
||||
att77 att161
|
||||
att77 att149
|
||||
att209 att101
|
||||
att209 att41
|
||||
att73 att61
|
||||
att73 att85
|
||||
att73 att13
|
||||
att73 att157
|
||||
att126 att162
|
||||
att126 att138
|
||||
att126 att18
|
||||
att126 att150
|
||||
att16 att74
|
||||
att16 att76
|
||||
att16 att40
|
||||
att16 att4
|
||||
att16 att196
|
||||
att16 att136
|
||||
att74 att14
|
||||
att74 att62
|
||||
att27 att171
|
||||
att27 att63
|
||||
att61 att85
|
||||
att61 att169
|
||||
att20 att76
|
||||
att20 att211
|
||||
att20 att210
|
||||
att20 att170
|
||||
att20 att164
|
||||
att20 att128
|
||||
att20 att176
|
||||
att20 att80
|
||||
att101 att41
|
||||
att85 att169
|
||||
att85 att13
|
||||
att76 att14
|
||||
att76 att40
|
||||
att76 att160
|
||||
att76 att4
|
||||
att76 att52
|
||||
att137 att161
|
||||
att137 att149
|
||||
att137 att173
|
||||
att137 att125
|
||||
att211 att210
|
||||
att211 att162
|
||||
att211 att164
|
||||
att211 att62
|
||||
att211 att42
|
||||
att211 att171
|
||||
att211 att163
|
||||
att211 att175
|
||||
att211 att79
|
||||
att211 att151
|
||||
att211 att43
|
||||
att143 att155
|
||||
att143 att23
|
||||
att143 att203
|
||||
att143 att71
|
||||
att143 att83
|
||||
att143 att11
|
||||
att14 att98
|
||||
att40 att160
|
||||
att40 att4
|
||||
att40 att196
|
||||
att40 att88
|
||||
att40 att52
|
||||
att210 att162
|
||||
att210 att42
|
||||
att210 att114
|
||||
att155 att23
|
||||
att155 att203
|
||||
att155 att107
|
||||
att155 att11
|
||||
att170 att158
|
||||
att160 att52
|
||||
att160 att124
|
||||
att23 att203
|
||||
att23 att107
|
||||
att23 att71
|
||||
att23 att11
|
||||
att162 att138
|
||||
att162 att18
|
||||
att162 att150
|
||||
att162 att90
|
||||
att162 att102
|
||||
att162 att174
|
||||
att162 att66
|
||||
att203 att107
|
||||
att203 att49
|
||||
att203 att59
|
||||
att203 att47
|
||||
att203 att191
|
||||
att203 att119
|
||||
att164 att62
|
||||
att164 att42
|
||||
att164 att128
|
||||
att164 att171
|
||||
att164 att92
|
||||
att164 att163
|
||||
att164 att158
|
||||
att164 att176
|
||||
att164 att145
|
||||
att164 att172
|
||||
att164 att58
|
||||
att164 att68
|
||||
att164 att80
|
||||
att164 att32
|
||||
att164 att98
|
||||
att164 att156
|
||||
att164 att110
|
||||
att164 att205
|
||||
att164 att21
|
||||
att164 att134
|
||||
att164 att213
|
||||
att164 att112
|
||||
att164 att38
|
||||
att164 att189
|
||||
att164 att56
|
||||
att164 att44
|
||||
att164 att152
|
||||
att164 att8
|
||||
att107 att83
|
||||
att107 att49
|
||||
att107 att59
|
||||
att107 att47
|
||||
att107 att191
|
||||
att42 att138
|
||||
att42 att54
|
||||
att42 att114
|
||||
att71 att83
|
||||
att71 att167
|
||||
att71 att35
|
||||
att71 att179
|
||||
att128 att92
|
||||
att128 att112
|
||||
att138 att18
|
||||
att138 att150
|
||||
att83 att167
|
||||
att83 att35
|
||||
att171 att87
|
||||
att171 att159
|
||||
att171 att63
|
||||
att171 att51
|
||||
att171 att39
|
||||
att171 att75
|
||||
att92 att163
|
||||
att92 att145
|
||||
att92 att56
|
||||
att163 att49
|
||||
att163 att175
|
||||
att163 att87
|
||||
att163 att79
|
||||
att163 att151
|
||||
att163 att139
|
||||
att163 att187
|
||||
att163 att127
|
||||
att163 att103
|
||||
att163 att91
|
||||
att49 att37
|
||||
att161 att173
|
||||
att161 att113
|
||||
att176 att145
|
||||
att176 att172
|
||||
att176 att68
|
||||
att176 att80
|
||||
att176 att32
|
||||
att176 att175
|
||||
att176 att98
|
||||
att176 att110
|
||||
att176 att205
|
||||
att176 att21
|
||||
att176 att134
|
||||
att176 att213
|
||||
att176 att56
|
||||
att4 att196
|
||||
att4 att88
|
||||
att4 att136
|
||||
att4 att100
|
||||
att4 att148
|
||||
att4 att208
|
||||
att172 att112
|
||||
att172 att184
|
||||
att196 att88
|
||||
att196 att136
|
||||
att196 att100
|
||||
att196 att148
|
||||
att196 att208
|
||||
att58 att142
|
||||
att58 att46
|
||||
att58 att34
|
||||
att68 att32
|
||||
att80 att38
|
||||
att32 att110
|
||||
att32 att21
|
||||
att32 att44
|
||||
att32 att200
|
||||
att175 att87
|
||||
att175 att159
|
||||
att175 att79
|
||||
att175 att187
|
||||
att175 att115
|
||||
att87 att159
|
||||
att87 att63
|
||||
att87 att51
|
||||
att87 att75
|
||||
att87 att15
|
||||
att87 att99
|
||||
att159 att75
|
||||
att159 att15
|
||||
att159 att195
|
||||
att18 att90
|
||||
att18 att102
|
||||
att18 att78
|
||||
att18 att198
|
||||
att52 att124
|
||||
att52 att64
|
||||
att98 att86
|
||||
att136 att100
|
||||
att136 att208
|
||||
att150 att90
|
||||
att150 att174
|
||||
att150 att66
|
||||
att156 att205
|
||||
att156 att96
|
||||
att156 att216
|
||||
att156 att204
|
||||
att156 att24
|
||||
att156 att84
|
||||
att156 att36
|
||||
att156 att12
|
||||
att156 att108
|
||||
att100 att148
|
||||
att63 att51
|
||||
att63 att39
|
||||
att63 att3
|
||||
att63 att183
|
||||
att63 att147
|
||||
att90 att102
|
||||
att90 att78
|
||||
att167 att35
|
||||
att167 att179
|
||||
att35 att179
|
||||
att51 att39
|
||||
att51 att3
|
||||
att51 att183
|
||||
att21 att134
|
||||
att21 att213
|
||||
att21 att38
|
||||
att21 att189
|
||||
att21 att129
|
||||
att21 att81
|
||||
att21 att153
|
||||
att21 att117
|
||||
att21 att9
|
||||
att142 att46
|
||||
att142 att130
|
||||
att142 att118
|
||||
att142 att70
|
||||
att142 att10
|
||||
att142 att202
|
||||
att142 att190
|
||||
att142 att106
|
||||
att46 att130
|
||||
att46 att118
|
||||
att46 att70
|
||||
att46 att34
|
||||
att46 att166
|
||||
att46 att82
|
||||
att134 att2
|
||||
att39 att3
|
||||
att102 att78
|
||||
att102 att174
|
||||
att102 att54
|
||||
att102 att198
|
||||
att130 att118
|
||||
att130 att10
|
||||
att130 att202
|
||||
att130 att190
|
||||
att130 att106
|
||||
att149 att125
|
||||
att96 att216
|
||||
att96 att204
|
||||
att96 att24
|
||||
att75 att15
|
||||
att75 att99
|
||||
att118 att70
|
||||
att118 att10
|
||||
att118 att202
|
||||
att78 att198
|
||||
att213 att189
|
||||
att213 att129
|
||||
att213 att69
|
||||
att213 att81
|
||||
att38 att50
|
||||
att38 att26
|
||||
att174 att54
|
||||
att174 att66
|
||||
att174 att30
|
||||
att189 att86
|
||||
att189 att129
|
||||
att189 att69
|
||||
att189 att81
|
||||
att189 att153
|
||||
att189 att117
|
||||
att189 att9
|
||||
att189 att45
|
||||
att189 att141
|
||||
att189 att105
|
||||
att70 att34
|
||||
att70 att154
|
||||
att179 att59
|
||||
att59 att47
|
||||
att59 att191
|
||||
att59 att119
|
||||
att79 att86
|
||||
att79 att151
|
||||
att79 att139
|
||||
att79 att187
|
||||
att79 att127
|
||||
att79 att103
|
||||
att79 att43
|
||||
att79 att193
|
||||
att79 att91
|
||||
att79 att19
|
||||
att124 att64
|
||||
att54 att114
|
||||
att54 att30
|
||||
att54 att6
|
||||
att191 att119
|
||||
att86 att194
|
||||
att56 att44
|
||||
att56 att152
|
||||
att56 att50
|
||||
att56 att188
|
||||
att56 att26
|
||||
att56 att200
|
||||
att56 att104
|
||||
att56 att140
|
||||
att56 att146
|
||||
att56 att194
|
||||
att56 att8
|
||||
att56 att2
|
||||
att56 att133
|
||||
att56 att1
|
||||
att151 att139
|
||||
att66 att30
|
||||
att173 att125
|
||||
att173 att113
|
||||
att173 att185
|
||||
att44 att152
|
||||
att44 att50
|
||||
att44 att188
|
||||
att44 att200
|
||||
att44 att104
|
||||
att44 att140
|
||||
att44 att194
|
||||
att44 att212
|
||||
att44 att1
|
||||
att139 att26
|
||||
att139 att99
|
||||
att139 att103
|
||||
att139 att43
|
||||
att139 att91
|
||||
att139 att31
|
||||
att139 att199
|
||||
att139 att7
|
||||
att216 att204
|
||||
att216 att24
|
||||
att216 att84
|
||||
att216 att36
|
||||
att216 att12
|
||||
att216 att180
|
||||
att216 att108
|
||||
att129 att69
|
||||
att152 att188
|
||||
att152 att140
|
||||
att69 att153
|
||||
att69 att9
|
||||
att69 att177
|
||||
att81 att45
|
||||
att81 att105
|
||||
att153 att117
|
||||
att153 att141
|
||||
att41 att53
|
||||
att204 att12
|
||||
att204 att180
|
||||
att188 att146
|
||||
att188 att212
|
||||
att13 att157
|
||||
att114 att6
|
||||
att114 att186
|
||||
att10 att190
|
||||
att64 att184
|
||||
att200 att104
|
||||
att9 att45
|
||||
att9 att146
|
||||
att9 att141
|
||||
att9 att177
|
||||
att9 att37
|
||||
att9 att133
|
||||
att9 att109
|
||||
att9 att181
|
||||
att3 att183
|
||||
att3 att147
|
||||
att3 att123
|
||||
att3 att135
|
||||
att3 att111
|
||||
att45 att105
|
||||
att45 att177
|
||||
att45 att93
|
||||
att45 att201
|
||||
att45 att165
|
||||
att45 att193
|
||||
att45 att33
|
||||
att45 att37
|
||||
att45 att133
|
||||
att45 att97
|
||||
att140 att8
|
||||
att30 att6
|
||||
att30 att186
|
||||
att183 att147
|
||||
att183 att123
|
||||
att183 att135
|
||||
att146 att2
|
||||
att202 att166
|
||||
att202 att106
|
||||
att202 att82
|
||||
att24 att84
|
||||
att24 att36
|
||||
att24 att132
|
||||
att147 att123
|
||||
att147 att135
|
||||
att147 att111
|
||||
att147 att207
|
||||
att8 att212
|
||||
att166 att82
|
||||
att166 att22
|
||||
att166 att94
|
||||
att187 att127
|
||||
att187 att115
|
||||
att127 att115
|
||||
att105 att184
|
||||
att105 att93
|
||||
att105 att201
|
||||
att106 att154
|
||||
att82 att154
|
||||
att82 att22
|
||||
att135 att111
|
||||
att135 att207
|
||||
att154 att22
|
||||
att154 att94
|
||||
att111 att207
|
||||
att99 att195
|
||||
att22 att94
|
||||
att84 att48
|
||||
att177 att93
|
||||
att177 att165
|
||||
att177 att181
|
||||
att103 att195
|
||||
att103 att97
|
||||
att103 att109
|
||||
att93 att201
|
||||
att93 att165
|
||||
att93 att193
|
||||
att93 att33
|
||||
att93 att57
|
||||
att201 att33
|
||||
att201 att57
|
||||
att43 att31
|
||||
att36 att180
|
||||
att36 att48
|
||||
att36 att72
|
||||
att36 att132
|
||||
att36 att144
|
||||
att125 att113
|
||||
att125 att185
|
||||
att125 att65
|
||||
att125 att29
|
||||
att180 att48
|
||||
att180 att72
|
||||
att180 att192
|
||||
att180 att108
|
||||
att48 att72
|
||||
att6 att186
|
||||
att113 att185
|
||||
att113 att53
|
||||
att113 att65
|
||||
att193 att97
|
||||
att91 att31
|
||||
att91 att199
|
||||
att91 att19
|
||||
att72 att132
|
||||
att72 att144
|
||||
att72 att192
|
||||
att72 att120
|
||||
att31 att199
|
||||
att31 att7
|
||||
att31 att67
|
||||
att31 att55
|
||||
att31 att1
|
||||
att132 att144
|
||||
att132 att120
|
||||
att33 att57
|
||||
att144 att192
|
||||
att144 att120
|
||||
att185 att53
|
||||
att185 att65
|
||||
att185 att29
|
||||
att199 att19
|
||||
att199 att7
|
||||
att199 att67
|
||||
att199 att55
|
||||
att199 att109
|
||||
att65 att29
|
||||
att7 att67
|
||||
att67 att55
|
||||
att109 att181
|
||||
|
71
docs/BoostAODE.md
Normal file
71
docs/BoostAODE.md
Normal file
@@ -0,0 +1,71 @@
|
||||
# BoostAODE Algorithm Operation
|
||||
|
||||
The algorithm is based on the AdaBoost algorithm with some new proposals that can be activated using the following hyperparameters.
|
||||
|
||||
## Hyperparameters
|
||||
|
||||
The hyperparameters defined in the algorithm are:
|
||||
|
||||
- ***repeatSparent*** (*boolean*): Allows dataset variables to be repeated as parents of an *SPODE*. Default value: *false*.
|
||||
|
||||
- ***maxModels*** (*int*): Maximum number of models (*SPODEs*) to build. This hyperparameter is only taken into account if ***repeatSparent*** is set to *true*. Default value: *0*.
|
||||
|
||||
- ***order*** (*{"asc", "desc", "rand"}*): Sets the order (ascending/descending/random) in which dataset variables will be processed to choose the parents of the *SPODEs*. Default value: *"desc"*.
|
||||
|
||||
- ***convergence*** (*boolean*): Sets whether the convergence of the result will be used as a termination condition. If this hyperparameter is set to true, the training dataset passed to the model is divided into two sets, one serving as training data and the other as a test set (so the original test partition will become a validation partition in this case). The partition is made by taking the first partition generated by a process of generating a 5 fold partition with stratification using a predetermined seed. The exit condition used in this *convergence* is that the difference between the accuracy obtained by the current model and that obtained by the previous model is greater than *1e-4*; otherwise, one will be added to the number of models that worsen the result (see next hyperparameter). Default value: *false*.
|
||||
|
||||
- ***tolerance*** (*int*): Sets the maximum number of models that can worsen the result without constituting a termination condition. Default value: *0*.
|
||||
|
||||
- ***select_features*** (*{"IWSS", "FCBF", "CFS", ""}*): Selects the variable selection method to be used to build initial models for the ensemble that will be included without considering any of the other exit conditions. Once the models of the selected variables are built, the algorithm will update the weights using the ensemble and set the significance of all the models built with the same α<sub>t</sub>. Default value: *""*.
|
||||
|
||||
- ***threshold*** (*double*): Sets the necessary value for the IWSS and FCBF algorithms to function. Accepted values are:
|
||||
- IWSS: $threshold \in [0, 0.5]$
|
||||
- FCBF: $threshold \in [10^{-7}, 1]$
|
||||
|
||||
Default value is *-1* so every time any of those algorithms are called, the threshold has to be set to the desired value.
|
||||
|
||||
- ***predict_voting*** (*boolean*): Sets whether the algorithm will use *model voting* to predict the result. If set to false, the weighted average of the probabilities of each model's prediction will be used. Default value: *true*.
|
||||
|
||||
- ***predict_single*** (*boolean*): Sets whether the algorithm will use single-model prediction in the learning process. If set to *false*, all models trained up to that point will be used to calculate the prediction necessary to update the weights in the learning process. Default value: *true*.
|
||||
|
||||
## Operation
|
||||
|
||||
The algorithm performs the following steps:
|
||||
|
||||
1. **Initialization**
|
||||
|
||||
- If ***select_features*** is set, as many *SPODEs* are created as variables selected by the corresponding feature selection algorithm, and these variables are marked as used.
|
||||
|
||||
- Initial weights of the examples are set to *1/m*.
|
||||
|
||||
1. **Main Training Loop:**
|
||||
|
||||
- Variables are sorted by mutual information order with the class variable and processed in ascending, descending or random order, according to the value of the *order* hyperparameter. If it is random, the variables are shuffled.
|
||||
|
||||
- If the parent repetition is not established, the variable is marked as used.
|
||||
|
||||
- A *SPODE* is created using the selected variable as the parent.
|
||||
|
||||
- The model is trained, and the class variable corresponding to the training dataset is calculated. The calculation can be done using the last trained model or the set of models trained up to that point, according to the value of the *predict_single* hyperparameter.
|
||||
|
||||
- The weights associated with the examples are updated using this expression:
|
||||
|
||||
- w<sub>i</sub> · e<sup>α<sub>t</sub></sup> (if the example has been misclassified)
|
||||
|
||||
- w<sub>i</sub> · e<sup>-α<sub>t</sub></sup> (if the example has been correctly classified)
|
||||
|
||||
- The model significance is set to α<sub>t</sub>.
|
||||
|
||||
- If the ***convergence*** hyperparameter is set, the accuracy value on the test dataset that we separated in an initial step is calculated.
|
||||
|
||||
1. **Exit Conditions:**
|
||||
|
||||
- ε<sub>t</sub> > 0.5 => misclassified examples are penalized.
|
||||
|
||||
- Number of models with worse accuracy greater than ***tolerance*** and ***convergence*** established.
|
||||
|
||||
- There are no more variables to create models, and ***repeatSparent*** is not set.
|
||||
|
||||
- Number of models > ***maxModels*** if ***repeatSparent*** is set.
|
||||
|
||||
### [Proposal for *predict_single = false*](./BoostAODE_train_predict.pdf)
|
BIN
docs/BoostAODE_train_predict.odp
Normal file
BIN
docs/BoostAODE_train_predict.odp
Normal file
Binary file not shown.
BIN
docs/BoostAODE_train_predict.pdf
Normal file
BIN
docs/BoostAODE_train_predict.pdf
Normal file
Binary file not shown.
@@ -1,4 +1,4 @@
|
||||
filter = src/
|
||||
exclude-directories = build/lib/
|
||||
exclude-directories = build_debug/lib/
|
||||
print-summary = yes
|
||||
sort-percentage = yes
|
||||
sort = uncovered-percent
|
||||
|
162
grid_stree.json
162
grid_stree.json
@@ -1,162 +0,0 @@
|
||||
{
|
||||
"balance-scale": {
|
||||
"C": 10000.0,
|
||||
"gamma": 0.1,
|
||||
"kernel": "rbf",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"balloons": {
|
||||
"C": 7,
|
||||
"gamma": 0.1,
|
||||
"kernel": "rbf",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"breast-cancer-wisc-diag": {
|
||||
"C": 0.2,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"breast-cancer-wisc-prog": {
|
||||
"C": 0.2,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"breast-cancer-wisc": {},
|
||||
"breast-cancer": {},
|
||||
"cardiotocography-10clases": {},
|
||||
"cardiotocography-3clases": {},
|
||||
"conn-bench-sonar-mines-rocks": {},
|
||||
"cylinder-bands": {},
|
||||
"dermatology": {
|
||||
"C": 55,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"echocardiogram": {
|
||||
"C": 7,
|
||||
"gamma": 0.1,
|
||||
"kernel": "poly",
|
||||
"max_features": "auto",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"fertility": {
|
||||
"C": 0.05,
|
||||
"max_features": "auto",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"haberman-survival": {},
|
||||
"heart-hungarian": {
|
||||
"C": 0.05,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"hepatitis": {
|
||||
"C": 7,
|
||||
"gamma": 0.1,
|
||||
"kernel": "rbf",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"ilpd-indian-liver": {},
|
||||
"ionosphere": {
|
||||
"C": 7,
|
||||
"gamma": 0.1,
|
||||
"kernel": "rbf",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"iris": {},
|
||||
"led-display": {},
|
||||
"libras": {
|
||||
"C": 0.08,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"low-res-spect": {
|
||||
"C": 0.05,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"lymphography": {
|
||||
"C": 0.05,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"mammographic": {},
|
||||
"molec-biol-promoter": {
|
||||
"C": 0.05,
|
||||
"gamma": 0.1,
|
||||
"kernel": "poly",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"musk-1": {
|
||||
"C": 0.05,
|
||||
"gamma": 0.1,
|
||||
"kernel": "poly",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"oocytes_merluccius_nucleus_4d": {
|
||||
"C": 8.25,
|
||||
"gamma": 0.1,
|
||||
"kernel": "poly"
|
||||
},
|
||||
"oocytes_merluccius_states_2f": {},
|
||||
"oocytes_trisopterus_nucleus_2f": {},
|
||||
"oocytes_trisopterus_states_5b": {
|
||||
"C": 0.11,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"parkinsons": {},
|
||||
"pima": {},
|
||||
"pittsburg-bridges-MATERIAL": {
|
||||
"C": 7,
|
||||
"gamma": 0.1,
|
||||
"kernel": "rbf",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"pittsburg-bridges-REL-L": {},
|
||||
"pittsburg-bridges-SPAN": {
|
||||
"C": 0.05,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"pittsburg-bridges-T-OR-D": {},
|
||||
"planning": {
|
||||
"C": 7,
|
||||
"gamma": 10.0,
|
||||
"kernel": "rbf",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"post-operative": {
|
||||
"C": 55,
|
||||
"degree": 5,
|
||||
"gamma": 0.1,
|
||||
"kernel": "poly",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"seeds": {
|
||||
"C": 10000.0,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"statlog-australian-credit": {
|
||||
"C": 0.05,
|
||||
"max_features": "auto",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"statlog-german-credit": {},
|
||||
"statlog-heart": {},
|
||||
"statlog-image": {
|
||||
"C": 7,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"statlog-vehicle": {},
|
||||
"synthetic-control": {
|
||||
"C": 0.55,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"tic-tac-toe": {
|
||||
"C": 0.2,
|
||||
"gamma": 0.1,
|
||||
"kernel": "poly",
|
||||
"max_iter": 10000
|
||||
},
|
||||
"vertebral-column-2clases": {},
|
||||
"wine": {
|
||||
"C": 0.55,
|
||||
"max_iter": 10000
|
||||
},
|
||||
"zoo": {
|
||||
"C": 0.1,
|
||||
"max_iter": 10000
|
||||
}
|
||||
}
|
Submodule lib/argparse deleted from 69dabd88a8
Submodule lib/catch2 updated: 766541d12d...ed6ac8a629
1
lib/folding
Submodule
1
lib/folding
Submodule
Submodule lib/folding added at 37316a54e0
2
lib/json
2
lib/json
Submodule lib/json updated: edffad036d...0457de21cf
Submodule lib/libxlsxwriter deleted from 29355a0887
@@ -1,10 +1,14 @@
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
|
||||
include_directories(${Python3_INCLUDE_DIRS})
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
||||
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
|
||||
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap)
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/src
|
||||
${BayesNet_SOURCE_DIR}/src/classifiers
|
||||
${BayesNet_SOURCE_DIR}/src/ensembles
|
||||
${BayesNet_SOURCE_DIR}/src/bayesian_network
|
||||
${BayesNet_SOURCE_DIR}/src/utils
|
||||
${BayesNet_SOURCE_DIR}/src/feature_selection
|
||||
${BayesNet_SOURCE_DIR}/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
add_executable(bayesnet_sample sample.cc)
|
||||
target_link_libraries(bayesnet_sample ArffFiles BayesNet)
|
251
sample/sample.cc
251
sample/sample.cc
@@ -1,147 +1,22 @@
|
||||
#include <iostream>
|
||||
#include <torch/torch.h>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "ArffFiles.h"
|
||||
#include "BayesMetrics.h"
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Folding.h"
|
||||
#include "Models.h"
|
||||
#include "modelRegister.h"
|
||||
#include <fstream>
|
||||
#include "BoostAODE.h"
|
||||
|
||||
const std::string PATH = "../../data/";
|
||||
|
||||
pair<std::vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<std::string> features)
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
{
|
||||
std::vector<mdlp::labels_t>Xd;
|
||||
map<std::string, int> maxes;
|
||||
|
||||
std::vector<mdlp::labels_t> Xd;
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
for (int i = 0; i < X.size(); i++) {
|
||||
fimdlp.fit(X[i], y);
|
||||
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
||||
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
|
||||
Xd.push_back(xd);
|
||||
}
|
||||
return { Xd, maxes };
|
||||
return Xd;
|
||||
}
|
||||
|
||||
bool file_exists(const std::string& name)
|
||||
tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last)
|
||||
{
|
||||
if (FILE* file = fopen(name.c_str(), "r")) {
|
||||
fclose(file);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
pair<std::vector<std::vector<int>>, std::vector<int>> extract_indices(std::vector<int> indices, std::vector<std::vector<int>> X, std::vector<int> y)
|
||||
{
|
||||
std::vector<std::vector<int>> Xr; // nxm
|
||||
std::vector<int> yr;
|
||||
for (int col = 0; col < X.size(); ++col) {
|
||||
Xr.push_back(std::vector<int>());
|
||||
}
|
||||
for (auto index : indices) {
|
||||
for (int col = 0; col < X.size(); ++col) {
|
||||
Xr[col].push_back(X[col][index]);
|
||||
}
|
||||
yr.push_back(y[index]);
|
||||
}
|
||||
return { Xr, yr };
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
map<std::string, bool> datasets = {
|
||||
{"diabetes", true},
|
||||
{"ecoli", true},
|
||||
{"glass", true},
|
||||
{"iris", true},
|
||||
{"kdd_JapaneseVowels", false},
|
||||
{"letter", true},
|
||||
{"liver-disorders", true},
|
||||
{"mfeat-factors", true},
|
||||
};
|
||||
auto valid_datasets = std::vector<std::string>();
|
||||
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
|
||||
[](const pair<std::string, bool>& pair) { return pair.first; });
|
||||
argparse::ArgumentParser program("BayesNetSample");
|
||||
program.add_argument("-d", "--dataset")
|
||||
.help("Dataset file name")
|
||||
.action([valid_datasets](const std::string& value) {
|
||||
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
|
||||
return value;
|
||||
}
|
||||
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
|
||||
}
|
||||
);
|
||||
program.add_argument("-p", "--path")
|
||||
.help(" folder where the data files are located, default")
|
||||
.default_value(std::string{ PATH }
|
||||
);
|
||||
program.add_argument("-m", "--model")
|
||||
.help("Model to use " + platform::Models::instance()->tostring())
|
||||
.action([](const std::string& value) {
|
||||
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
|
||||
if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
||||
return value;
|
||||
}
|
||||
throw runtime_error("Model must be one of " + platform::Models::instance()->tostring());
|
||||
}
|
||||
);
|
||||
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
|
||||
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
|
||||
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
|
||||
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
|
||||
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const std::string& value) {
|
||||
try {
|
||||
auto k = stoi(value);
|
||||
if (k < 2) {
|
||||
throw runtime_error("Number of folds must be greater than 1");
|
||||
}
|
||||
return k;
|
||||
}
|
||||
catch (const runtime_error& err) {
|
||||
throw runtime_error(err.what());
|
||||
}
|
||||
catch (...) {
|
||||
throw runtime_error("Number of folds must be an integer");
|
||||
}});
|
||||
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
|
||||
bool class_last, stratified, tensors, dump_cpt;
|
||||
std::string model_name, file_name, path, complete_file_name;
|
||||
int nFolds, seed;
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
file_name = program.get<std::string>("dataset");
|
||||
path = program.get<std::string>("path");
|
||||
model_name = program.get<std::string>("model");
|
||||
complete_file_name = path + file_name + ".arff";
|
||||
stratified = program.get<bool>("stratified");
|
||||
tensors = program.get<bool>("tensors");
|
||||
nFolds = program.get<int>("folds");
|
||||
seed = program.get<int>("seed");
|
||||
dump_cpt = program.get<bool>("dumpcpt");
|
||||
class_last = datasets[file_name];
|
||||
if (!file_exists(complete_file_name)) {
|
||||
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
|
||||
}
|
||||
}
|
||||
catch (const exception& err) {
|
||||
cerr << err.what() << std::endl;
|
||||
cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Begin Processing
|
||||
*/
|
||||
auto handler = ArffFiles();
|
||||
handler.load(complete_file_name, class_last);
|
||||
handler.load(name, class_last);
|
||||
// Get Dataset X, y
|
||||
std::vector<mdlp::samples_t>& X = handler.getX();
|
||||
mdlp::labels_t& y = handler.getY();
|
||||
@@ -149,87 +24,39 @@ int main(int argc, char** argv)
|
||||
auto className = handler.getClassName();
|
||||
std::vector<std::string> features;
|
||||
auto attributes = handler.getAttributes();
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features),
|
||||
[](const pair<std::string, std::string>& item) { return item.first; });
|
||||
// Discretize Dataset
|
||||
auto [Xd, maxes] = discretize(X, y, features);
|
||||
maxes[className] = *max_element(y.begin(), y.end()) + 1;
|
||||
map<std::string, std::vector<int>> states;
|
||||
for (auto feature : features) {
|
||||
states[feature] = std::vector<int>(maxes[feature]);
|
||||
}
|
||||
states[className] = std::vector<int>(maxes[className]);
|
||||
auto clf = platform::Models::instance()->create(model_name);
|
||||
clf->fit(Xd, y, features, className, states);
|
||||
if (dump_cpt) {
|
||||
std::cout << "--- CPT Tables ---" << std::endl;
|
||||
clf->dump_cpt();
|
||||
}
|
||||
auto lines = clf->show();
|
||||
for (auto line : lines) {
|
||||
std::cout << line << std::endl;
|
||||
}
|
||||
std::cout << "--- Topological Order ---" << std::endl;
|
||||
auto order = clf->topological_order();
|
||||
for (auto name : order) {
|
||||
std::cout << name << ", ";
|
||||
}
|
||||
std::cout << "end." << std::endl;
|
||||
auto score = clf->score(Xd, y);
|
||||
std::cout << "Score: " << score << std::endl;
|
||||
auto graph = clf->graph();
|
||||
auto dot_file = model_name + "_" + file_name;
|
||||
ofstream file(dot_file + ".dot");
|
||||
file << graph;
|
||||
file.close();
|
||||
std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
|
||||
std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
|
||||
std::string stratified_string = stratified ? " Stratified" : "";
|
||||
std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
|
||||
std::cout << "==========================================" << std::endl;
|
||||
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
|
||||
torch::Tensor yt = torch::tensor(y, torch::kInt32);
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
|
||||
torch::Tensor Xd;
|
||||
auto states = map<std::string, std::vector<int>>();
|
||||
auto Xr = discretizeDataset(X, y);
|
||||
Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
|
||||
auto item = states.at(features[i]);
|
||||
iota(begin(item), end(item), 0);
|
||||
Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
|
||||
}
|
||||
float total_score = 0, total_score_train = 0, score_train, score_test;
|
||||
platform::Fold* fold;
|
||||
if (stratified)
|
||||
fold = new platform::StratifiedKFold(nFolds, y, seed);
|
||||
else
|
||||
fold = new platform::KFold(nFolds, y.size(), seed);
|
||||
for (auto i = 0; i < nFolds; ++i) {
|
||||
auto [train, test] = fold->getFold(i);
|
||||
std::cout << "Fold: " << i + 1 << std::endl;
|
||||
if (tensors) {
|
||||
auto ttrain = torch::tensor(train, torch::kInt64);
|
||||
auto ttest = torch::tensor(test, torch::kInt64);
|
||||
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
|
||||
torch::Tensor ytraint = yt.index({ ttrain });
|
||||
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
||||
torch::Tensor ytestt = yt.index({ ttest });
|
||||
clf->fit(Xtraint, ytraint, features, className, states);
|
||||
auto temp = clf->predict(Xtraint);
|
||||
score_train = clf->score(Xtraint, ytraint);
|
||||
score_test = clf->score(Xtestt, ytestt);
|
||||
} else {
|
||||
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
||||
auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
||||
clf->fit(Xtrain, ytrain, features, className, states);
|
||||
score_train = clf->score(Xtrain, ytrain);
|
||||
score_test = clf->score(Xtest, ytest);
|
||||
}
|
||||
if (dump_cpt) {
|
||||
std::cout << "--- CPT Tables ---" << std::endl;
|
||||
clf->dump_cpt();
|
||||
}
|
||||
total_score_train += score_train;
|
||||
total_score += score_test;
|
||||
std::cout << "Score Train: " << score_train << std::endl;
|
||||
std::cout << "Score Test : " << score_test << std::endl;
|
||||
std::cout << "-------------------------------------------------------------------------------" << std::endl;
|
||||
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
|
||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if (argc < 2) {
|
||||
std::cerr << "Usage: " << argv[0] << " <file_name>" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "**********************************************************************************" << std::endl;
|
||||
std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
|
||||
std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
|
||||
}
|
||||
std::string file_name = argv[1];
|
||||
torch::Tensor X, y;
|
||||
std::vector<std::string> features;
|
||||
std::string className;
|
||||
map<std::string, std::vector<int>> states;
|
||||
auto clf = bayesnet::BoostAODE(false); // false for not using voting in predict
|
||||
std::cout << "Library version: " << clf.getVersion() << std::endl;
|
||||
tie(X, y, features, className, states) = loadDataset(file_name, true);
|
||||
clf.fit(X, y, features, className, states);
|
||||
auto score = clf.score(X, y);
|
||||
std::cout << "File: " << file_name << " score: " << score << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -16,16 +16,20 @@ namespace bayesnet {
|
||||
virtual ~BaseClassifier() = default;
|
||||
torch::Tensor virtual predict(torch::Tensor& X) = 0;
|
||||
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
|
||||
torch::Tensor virtual predict_proba(torch::Tensor& X) = 0;
|
||||
std::vector<std::vector<double>> virtual predict_proba(std::vector<std::vector<int >>& X) = 0;
|
||||
status_t virtual getStatus() const = 0;
|
||||
float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0;
|
||||
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
|
||||
int virtual getNumberOfNodes()const = 0;
|
||||
int virtual getNumberOfEdges()const = 0;
|
||||
int virtual getNumberOfStates() const = 0;
|
||||
int virtual getClassNumStates() const = 0;
|
||||
std::vector<std::string> virtual show() const = 0;
|
||||
std::vector<std::string> virtual graph(const std::string& title = "") const = 0;
|
||||
virtual std::string getVersion() = 0;
|
||||
std::vector<std::string> virtual topological_order() = 0;
|
||||
std::vector<std::string> virtual getNotes() const = 0;
|
||||
void virtual dump_cpt()const = 0;
|
||||
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
||||
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
|
@@ -1,18 +0,0 @@
|
||||
#include "AODE.h"
|
||||
|
||||
namespace bayesnet {
|
||||
AODE::AODE() : Ensemble() {}
|
||||
void AODE::buildModel(const torch::Tensor& weights)
|
||||
{
|
||||
models.clear();
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
models.push_back(std::make_unique<SPODE>(i));
|
||||
}
|
||||
n_models = models.size();
|
||||
significanceModels = std::vector<double>(n_models, 1.0);
|
||||
}
|
||||
std::vector<std::string> AODE::graph(const std::string& title) const
|
||||
{
|
||||
return Ensemble::graph(title);
|
||||
}
|
||||
}
|
@@ -1,12 +0,0 @@
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
|
||||
include_directories(${Python3_INCLUDE_DIRS})
|
||||
|
||||
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
|
||||
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
|
||||
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
|
||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
@@ -1,141 +0,0 @@
|
||||
#include "Ensemble.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
||||
Ensemble::Ensemble() : Classifier(Network()), n_models(0) {}
|
||||
|
||||
void Ensemble::trainModel(const torch::Tensor& weights)
|
||||
{
|
||||
n_models = models.size();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
// fit with std::vectors
|
||||
models[i]->fit(dataset, features, className, states);
|
||||
}
|
||||
}
|
||||
std::vector<int> Ensemble::voting(torch::Tensor& y_pred)
|
||||
{
|
||||
auto y_pred_ = y_pred.accessor<int, 2>();
|
||||
std::vector<int> y_pred_final;
|
||||
int numClasses = states.at(className).size();
|
||||
// y_pred is m x n_models with the prediction of every model for each sample
|
||||
for (int i = 0; i < y_pred.size(0); ++i) {
|
||||
// votes store in each index (value of class) the significance added by each model
|
||||
// i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
|
||||
std::vector<double> votes(numClasses, 0.0);
|
||||
for (int j = 0; j < n_models; ++j) {
|
||||
votes[y_pred_[i][j]] += significanceModels.at(j);
|
||||
}
|
||||
// argsort in descending order
|
||||
auto indices = argsort(votes);
|
||||
y_pred_final.push_back(indices[0]);
|
||||
}
|
||||
return y_pred_final;
|
||||
}
|
||||
torch::Tensor Ensemble::predict(torch::Tensor& X)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw std::logic_error("Ensemble has not been fitted");
|
||||
}
|
||||
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
|
||||
auto threads{ std::vector<std::thread>() };
|
||||
std::mutex mtx;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
threads.push_back(std::thread([&, i]() {
|
||||
auto ypredict = models[i]->predict(X);
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
y_pred.index_put_({ "...", i }, ypredict);
|
||||
}));
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
return torch::tensor(voting(y_pred));
|
||||
}
|
||||
std::vector<int> Ensemble::predict(std::vector<std::vector<int>>& X)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw std::logic_error("Ensemble has not been fitted");
|
||||
}
|
||||
long m_ = X[0].size();
|
||||
long n_ = X.size();
|
||||
std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
|
||||
for (auto i = 0; i < n_; i++) {
|
||||
Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
|
||||
}
|
||||
torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32);
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32));
|
||||
}
|
||||
return voting(y_pred);
|
||||
}
|
||||
float Ensemble::score(torch::Tensor& X, torch::Tensor& y)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw std::logic_error("Ensemble has not been fitted");
|
||||
}
|
||||
auto y_pred = predict(X);
|
||||
int correct = 0;
|
||||
for (int i = 0; i < y_pred.size(0); ++i) {
|
||||
if (y_pred[i].item<int>() == y[i].item<int>()) {
|
||||
correct++;
|
||||
}
|
||||
}
|
||||
return (double)correct / y_pred.size(0);
|
||||
}
|
||||
float Ensemble::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
|
||||
{
|
||||
if (!fitted) {
|
||||
throw std::logic_error("Ensemble has not been fitted");
|
||||
}
|
||||
auto y_pred = predict(X);
|
||||
int correct = 0;
|
||||
for (int i = 0; i < y_pred.size(); ++i) {
|
||||
if (y_pred[i] == y[i]) {
|
||||
correct++;
|
||||
}
|
||||
}
|
||||
return (double)correct / y_pred.size();
|
||||
}
|
||||
std::vector<std::string> Ensemble::show() const
|
||||
{
|
||||
auto result = std::vector<std::string>();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
auto res = models[i]->show();
|
||||
result.insert(result.end(), res.begin(), res.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
std::vector<std::string> Ensemble::graph(const std::string& title) const
|
||||
{
|
||||
auto result = std::vector<std::string>();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
auto res = models[i]->graph(title + "_" + std::to_string(i));
|
||||
result.insert(result.end(), res.begin(), res.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
int Ensemble::getNumberOfNodes() const
|
||||
{
|
||||
int nodes = 0;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
nodes += models[i]->getNumberOfNodes();
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
int Ensemble::getNumberOfEdges() const
|
||||
{
|
||||
int edges = 0;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
edges += models[i]->getNumberOfEdges();
|
||||
}
|
||||
return edges;
|
||||
}
|
||||
int Ensemble::getNumberOfStates() const
|
||||
{
|
||||
int nstates = 0;
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
nstates += models[i]->getNumberOfStates();
|
||||
}
|
||||
return nstates;
|
||||
}
|
||||
}
|
@@ -1,25 +0,0 @@
|
||||
|
||||
#include "bayesnetUtils.h"
|
||||
namespace bayesnet {
|
||||
// Return the indices in descending order
|
||||
std::vector<int> argsort(std::vector<double>& nums)
|
||||
{
|
||||
int n = nums.size();
|
||||
std::vector<int> indices(n);
|
||||
iota(indices.begin(), indices.end(), 0);
|
||||
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
|
||||
return indices;
|
||||
}
|
||||
std::vector<std::vector<int>> tensorToVector(torch::Tensor& tensor)
|
||||
{
|
||||
// convert mxn tensor to nxm std::vector
|
||||
std::vector<std::vector<int>> result;
|
||||
// Iterate over cols
|
||||
for (int i = 0; i < tensor.size(1); ++i) {
|
||||
auto col_tensor = tensor.index({ "...", i });
|
||||
auto col = std::vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
|
||||
result.push_back(col);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
18
src/CMakeLists.txt
Normal file
18
src/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
include_directories(
|
||||
${BayesNet_SOURCE_DIR}/lib/mdlp
|
||||
${BayesNet_SOURCE_DIR}/lib/Files
|
||||
${BayesNet_SOURCE_DIR}/lib/folding
|
||||
${BayesNet_SOURCE_DIR}/lib/json/include
|
||||
${BayesNet_SOURCE_DIR}/src
|
||||
${BayesNet_SOURCE_DIR}/src/feature_selection
|
||||
${BayesNet_SOURCE_DIR}/src/bayesian_network
|
||||
${BayesNet_SOURCE_DIR}/src/classifiers
|
||||
${BayesNet_SOURCE_DIR}/src/ensembles
|
||||
${BayesNet_SOURCE_DIR}/src/utils
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
)
|
||||
|
||||
file(GLOB_RECURSE Sources "*.cc")
|
||||
|
||||
add_library(BayesNet ${Sources})
|
||||
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
|
@@ -1,343 +0,0 @@
|
||||
#include <filesystem>
|
||||
#include <set>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "BestResults.h"
|
||||
#include "Result.h"
|
||||
#include "Colors.h"
|
||||
#include "Statistics.h"
|
||||
#include "BestResultsExcel.h"
|
||||
#include "CLocale.h"
|
||||
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
// function ftime_to_std::string, Code taken from
|
||||
// https://stackoverflow.com/a/58237530/1389271
|
||||
template <typename TP>
|
||||
std::string ftime_to_string(TP tp)
|
||||
{
|
||||
auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(tp - TP::clock::now()
|
||||
+ std::chrono::system_clock::now());
|
||||
auto tt = std::chrono::system_clock::to_time_t(sctp);
|
||||
std::tm* gmt = std::gmtime(&tt);
|
||||
std::stringstream buffer;
|
||||
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
|
||||
return buffer.str();
|
||||
}
|
||||
namespace platform {
|
||||
std::string BestResults::build()
|
||||
{
|
||||
auto files = loadResultFiles();
|
||||
if (files.size() == 0) {
|
||||
std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
json bests;
|
||||
for (const auto& file : files) {
|
||||
auto result = Result(path, file);
|
||||
auto data = result.load();
|
||||
for (auto const& item : data.at("results")) {
|
||||
bool update = false;
|
||||
// Check if results file contains only one dataset
|
||||
auto datasetName = item.at("dataset").get<std::string>();
|
||||
if (bests.contains(datasetName)) {
|
||||
if (item.at("score").get<double>() > bests[datasetName].at(0).get<double>()) {
|
||||
update = true;
|
||||
}
|
||||
} else {
|
||||
update = true;
|
||||
}
|
||||
if (update) {
|
||||
bests[datasetName] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string bestFileName = path + bestResultFile();
|
||||
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
|
||||
fclose(fileTest);
|
||||
std::cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << std::endl;
|
||||
}
|
||||
std::ofstream file(bestFileName);
|
||||
file << bests;
|
||||
file.close();
|
||||
return bestFileName;
|
||||
}
|
||||
std::string BestResults::bestResultFile()
|
||||
{
|
||||
return "best_results_" + score + "_" + model + ".json";
|
||||
}
|
||||
std::pair<std::string, std::string> getModelScore(std::string name)
|
||||
{
|
||||
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
|
||||
int i = 0;
|
||||
auto pos = name.find("_");
|
||||
auto pos2 = name.find("_", pos + 1);
|
||||
std::string score = name.substr(pos + 1, pos2 - pos - 1);
|
||||
pos = name.find("_", pos2 + 1);
|
||||
std::string model = name.substr(pos2 + 1, pos - pos2 - 1);
|
||||
return { model, score };
|
||||
}
|
||||
std::vector<std::string> BestResults::loadResultFiles()
|
||||
{
|
||||
std::vector<std::string> files;
|
||||
using std::filesystem::directory_iterator;
|
||||
std::string fileModel, fileScore;
|
||||
for (const auto& file : directory_iterator(path)) {
|
||||
auto fileName = file.path().filename().string();
|
||||
if (fileName.find(".json") != std::string::npos && fileName.find("results_") == 0) {
|
||||
tie(fileModel, fileScore) = getModelScore(fileName);
|
||||
if (score == fileScore && (model == fileModel || model == "any")) {
|
||||
files.push_back(fileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
json BestResults::loadFile(const std::string& fileName)
|
||||
{
|
||||
std::ifstream resultData(fileName);
|
||||
if (resultData.is_open()) {
|
||||
json data = json::parse(resultData);
|
||||
return data;
|
||||
}
|
||||
throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
|
||||
}
|
||||
std::vector<std::string> BestResults::getModels()
|
||||
{
|
||||
std::set<std::string> models;
|
||||
std::vector<std::string> result;
|
||||
auto files = loadResultFiles();
|
||||
if (files.size() == 0) {
|
||||
std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
std::string fileModel, fileScore;
|
||||
for (const auto& file : files) {
|
||||
// extract the model from the file name
|
||||
tie(fileModel, fileScore) = getModelScore(file);
|
||||
// add the model to the std::vector of models
|
||||
models.insert(fileModel);
|
||||
}
|
||||
result = std::vector<std::string>(models.begin(), models.end());
|
||||
return result;
|
||||
}
|
||||
std::vector<std::string> BestResults::getDatasets(json table)
|
||||
{
|
||||
std::vector<std::string> datasets;
|
||||
for (const auto& dataset : table.items()) {
|
||||
datasets.push_back(dataset.key());
|
||||
}
|
||||
return datasets;
|
||||
}
|
||||
void BestResults::buildAll()
|
||||
{
|
||||
auto models = getModels();
|
||||
for (const auto& model : models) {
|
||||
std::cout << "Building best results for model: " << model << std::endl;
|
||||
this->model = model;
|
||||
build();
|
||||
}
|
||||
model = "any";
|
||||
}
|
||||
void BestResults::listFile()
|
||||
{
|
||||
std::string bestFileName = path + bestResultFile();
|
||||
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
|
||||
fclose(fileTest);
|
||||
} else {
|
||||
std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
auto temp = ConfigLocale();
|
||||
auto date = ftime_to_string(std::filesystem::last_write_time(bestFileName));
|
||||
auto data = loadFile(bestFileName);
|
||||
auto datasets = getDatasets(data);
|
||||
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
|
||||
int maxFileName = 0;
|
||||
int maxHyper = 15;
|
||||
for (auto const& item : data.items()) {
|
||||
maxHyper = std::max(maxHyper, (int)item.value().at(1).dump().size());
|
||||
maxFileName = std::max(maxFileName, (int)item.value().at(2).get<std::string>().size());
|
||||
}
|
||||
std::stringstream oss;
|
||||
oss << Colors::GREEN() << "Best results for " << model << " as of " << date << std::endl;
|
||||
std::cout << oss.str();
|
||||
std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
|
||||
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << "Dataset" << "Score " << std::setw(maxFileName) << "File" << " Hyperparameters" << std::endl;
|
||||
std::cout << "=== " << std::string(maxDatasetName, '=') << " =========== " << std::string(maxFileName, '=') << " " << std::string(maxHyper, '=') << std::endl;
|
||||
auto i = 0;
|
||||
bool odd = true;
|
||||
double total = 0;
|
||||
for (auto const& item : data.items()) {
|
||||
auto color = odd ? Colors::BLUE() : Colors::CYAN();
|
||||
double value = item.value().at(0).get<double>();
|
||||
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
|
||||
std::cout << std::setw(maxDatasetName) << std::left << item.key() << " ";
|
||||
std::cout << std::setw(11) << std::setprecision(9) << std::fixed << value << " ";
|
||||
std::cout << std::setw(maxFileName) << item.value().at(2).get<std::string>() << " ";
|
||||
std::cout << item.value().at(1) << " ";
|
||||
std::cout << std::endl;
|
||||
total += value;
|
||||
odd = !odd;
|
||||
}
|
||||
std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ===========" << std::endl;
|
||||
std::cout << std::setw(5 + maxDatasetName) << "Total.................. " << std::setw(11) << std::setprecision(8) << std::fixed << total << std::endl;
|
||||
}
|
||||
json BestResults::buildTableResults(std::vector<std::string> models)
|
||||
{
|
||||
json table;
|
||||
auto maxDate = std::filesystem::file_time_type::max();
|
||||
for (const auto& model : models) {
|
||||
this->model = model;
|
||||
std::string bestFileName = path + bestResultFile();
|
||||
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
|
||||
fclose(fileTest);
|
||||
} else {
|
||||
std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
auto dateWrite = std::filesystem::last_write_time(bestFileName);
|
||||
if (dateWrite < maxDate) {
|
||||
maxDate = dateWrite;
|
||||
}
|
||||
auto data = loadFile(bestFileName);
|
||||
table[model] = data;
|
||||
}
|
||||
table["dateTable"] = ftime_to_string(maxDate);
|
||||
return table;
|
||||
}
|
||||
void BestResults::printTableResults(std::vector<std::string> models, json table)
|
||||
{
|
||||
std::stringstream oss;
|
||||
oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<std::string>() << std::endl;
|
||||
std::cout << oss.str();
|
||||
std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
|
||||
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset");
|
||||
for (const auto& model : models) {
|
||||
std::cout << std::setw(maxModelName) << std::left << model << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "=== " << std::string(maxDatasetName, '=') << " ";
|
||||
for (const auto& model : models) {
|
||||
std::cout << std::string(maxModelName, '=') << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
auto i = 0;
|
||||
bool odd = true;
|
||||
std::map<std::string, double> totals;
|
||||
int nDatasets = table.begin().value().size();
|
||||
for (const auto& model : models) {
|
||||
totals[model] = 0.0;
|
||||
}
|
||||
auto datasets = getDatasets(table.begin().value());
|
||||
for (auto const& dataset : datasets) {
|
||||
auto color = odd ? Colors::BLUE() : Colors::CYAN();
|
||||
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
|
||||
std::cout << std::setw(maxDatasetName) << std::left << dataset << " ";
|
||||
double maxValue = 0;
|
||||
// Find out the max value for this dataset
|
||||
for (const auto& model : models) {
|
||||
double value = table[model].at(dataset).at(0).get<double>();
|
||||
if (value > maxValue) {
|
||||
maxValue = value;
|
||||
}
|
||||
}
|
||||
// Print the row with red colors on max values
|
||||
for (const auto& model : models) {
|
||||
std::string efectiveColor = color;
|
||||
double value = table[model].at(dataset).at(0).get<double>();
|
||||
if (value == maxValue) {
|
||||
efectiveColor = Colors::RED();
|
||||
}
|
||||
totals[model] += value;
|
||||
std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
odd = !odd;
|
||||
}
|
||||
std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ";
|
||||
for (const auto& model : models) {
|
||||
std::cout << std::string(maxModelName, '=') << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << Colors::GREEN() << std::setw(5 + maxDatasetName) << " Totals...................";
|
||||
double max = 0.0;
|
||||
for (const auto& total : totals) {
|
||||
if (total.second > max) {
|
||||
max = total.second;
|
||||
}
|
||||
}
|
||||
for (const auto& model : models) {
|
||||
std::string efectiveColor = Colors::GREEN();
|
||||
if (totals[model] == max) {
|
||||
efectiveColor = Colors::RED();
|
||||
}
|
||||
std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << totals[model] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
void BestResults::reportSingle(bool excel)
|
||||
{
|
||||
listFile();
|
||||
if (excel) {
|
||||
auto models = getModels();
|
||||
// Build the table of results
|
||||
json table = buildTableResults(models);
|
||||
std::vector<std::string> datasets = getDatasets(table.begin().value());
|
||||
BestResultsExcel excel(score, datasets);
|
||||
excel.reportSingle(model, path + bestResultFile());
|
||||
messageExcelFile(excel.getFileName());
|
||||
}
|
||||
}
|
||||
void BestResults::reportAll(bool excel)
|
||||
{
|
||||
auto models = getModels();
|
||||
// Build the table of results
|
||||
json table = buildTableResults(models);
|
||||
std::vector<std::string> datasets = getDatasets(table.begin().value());
|
||||
maxModelName = (*max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
|
||||
maxModelName = std::max(12, maxModelName);
|
||||
maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
|
||||
maxDatasetName = std::max(25, maxDatasetName);
|
||||
// Print the table of results
|
||||
printTableResults(models, table);
|
||||
// Compute the Friedman test
|
||||
std::map<std::string, std::map<std::string, float>> ranksModels;
|
||||
if (friedman) {
|
||||
Statistics stats(models, datasets, table, significance);
|
||||
auto result = stats.friedmanTest();
|
||||
stats.postHocHolmTest(result);
|
||||
ranksModels = stats.getRanks();
|
||||
}
|
||||
if (excel) {
|
||||
BestResultsExcel excel(score, datasets);
|
||||
excel.reportAll(models, table, ranksModels, friedman, significance);
|
||||
if (friedman) {
|
||||
int idx = -1;
|
||||
double min = 2000;
|
||||
// Find out the control model
|
||||
auto totals = std::vector<double>(models.size(), 0.0);
|
||||
for (const auto& dataset : datasets) {
|
||||
for (int i = 0; i < models.size(); ++i) {
|
||||
totals[i] += ranksModels[dataset][models[i]];
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < models.size(); ++i) {
|
||||
if (totals[i] < min) {
|
||||
min = totals[i];
|
||||
idx = i;
|
||||
}
|
||||
}
|
||||
model = models.at(idx);
|
||||
excel.reportSingle(model, path + bestResultFile());
|
||||
}
|
||||
messageExcelFile(excel.getFileName());
|
||||
}
|
||||
}
|
||||
void BestResults::messageExcelFile(const std::string& fileName)
|
||||
{
|
||||
std::cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << std::endl;
|
||||
}
|
||||
}
|
@@ -1,36 +0,0 @@
|
||||
#ifndef BESTRESULTS_H
|
||||
#define BESTRESULTS_H
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
using json = nlohmann::json;
|
||||
namespace platform {
|
||||
class BestResults {
|
||||
public:
|
||||
explicit BestResults(const std::string& path, const std::string& score, const std::string& model, bool friedman, double significance = 0.05)
|
||||
: path(path), score(score), model(model), friedman(friedman), significance(significance)
|
||||
{
|
||||
}
|
||||
std::string build();
|
||||
void reportSingle(bool excel);
|
||||
void reportAll(bool excel);
|
||||
void buildAll();
|
||||
private:
|
||||
std::vector<std::string> getModels();
|
||||
std::vector<std::string> getDatasets(json table);
|
||||
std::vector<std::string> loadResultFiles();
|
||||
void messageExcelFile(const std::string& fileName);
|
||||
json buildTableResults(std::vector<std::string> models);
|
||||
void printTableResults(std::vector<std::string> models, json table);
|
||||
std::string bestResultFile();
|
||||
json loadFile(const std::string& fileName);
|
||||
void listFile();
|
||||
std::string path;
|
||||
std::string score;
|
||||
std::string model;
|
||||
bool friedman;
|
||||
double significance;
|
||||
int maxModelName = 0;
|
||||
int maxDatasetName = 0;
|
||||
};
|
||||
}
|
||||
#endif //BESTRESULTS_H
|
@@ -1,300 +0,0 @@
|
||||
#include <sstream>
|
||||
#include "BestResultsExcel.h"
|
||||
#include "Paths.h"
|
||||
#include <map>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "Statistics.h"
|
||||
#include "ReportExcel.h"
|
||||
|
||||
namespace platform {
|
||||
json loadResultData(const std::string& fileName)
|
||||
{
|
||||
json data;
|
||||
std::ifstream resultData(fileName);
|
||||
if (resultData.is_open()) {
|
||||
data = json::parse(resultData);
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
|
||||
}
|
||||
return data;
|
||||
}
|
||||
std::string getColumnName(int colNum)
|
||||
{
|
||||
std::string columnName = "";
|
||||
if (colNum == 0)
|
||||
return "A";
|
||||
while (colNum > 0) {
|
||||
int modulo = colNum % 26;
|
||||
columnName = char(65 + modulo) + columnName;
|
||||
colNum = (int)((colNum - modulo) / 26);
|
||||
}
|
||||
return columnName;
|
||||
}
|
||||
BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets)
|
||||
{
|
||||
workbook = workbook_new((Paths::excel() + fileName).c_str());
|
||||
setProperties("Best Results");
|
||||
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
|
||||
datasetNameSize = std::max(datasetNameSize, maxDatasetName);
|
||||
createFormats();
|
||||
}
|
||||
void BestResultsExcel::reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance)
|
||||
{
|
||||
this->table = table;
|
||||
this->models = models;
|
||||
ranksModels = ranks;
|
||||
this->friedman = friedman;
|
||||
this->significance = significance;
|
||||
worksheet = workbook_add_worksheet(workbook, "Best Results");
|
||||
int maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
|
||||
modelNameSize = std::max(modelNameSize, maxModelName);
|
||||
formatColumns();
|
||||
build();
|
||||
}
|
||||
void BestResultsExcel::reportSingle(const std::string& model, const std::string& fileName)
|
||||
{
|
||||
worksheet = workbook_add_worksheet(workbook, "Report");
|
||||
if (FILE* fileTest = fopen(fileName.c_str(), "r")) {
|
||||
fclose(fileTest);
|
||||
} else {
|
||||
std::cerr << "File " << fileName << " doesn't exist." << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
json data = loadResultData(fileName);
|
||||
|
||||
std::string title = "Best results for " + model;
|
||||
worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]);
|
||||
// Body header
|
||||
row = 3;
|
||||
int col = 1;
|
||||
writeString(row, 0, "Nº", "bodyHeader");
|
||||
writeString(row, 1, "Dataset", "bodyHeader");
|
||||
writeString(row, 2, "Score", "bodyHeader");
|
||||
writeString(row, 3, "File", "bodyHeader");
|
||||
writeString(row, 4, "Hyperparameters", "bodyHeader");
|
||||
auto i = 0;
|
||||
std::string hyperparameters;
|
||||
int hypSize = 22;
|
||||
std::map<std::string, std::string> files; // map of files imported and their tabs
|
||||
for (auto const& item : data.items()) {
|
||||
row++;
|
||||
writeInt(row, 0, i++, "ints");
|
||||
writeString(row, 1, item.key().c_str(), "text");
|
||||
writeDouble(row, 2, item.value().at(0).get<double>(), "result");
|
||||
auto fileName = item.value().at(2).get<std::string>();
|
||||
std::string hyperlink = "";
|
||||
try {
|
||||
hyperlink = files.at(fileName);
|
||||
}
|
||||
catch (const std::out_of_range& oor) {
|
||||
auto tabName = "table_" + std::to_string(i);
|
||||
auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str());
|
||||
json data = loadResultData(Paths::results() + fileName);
|
||||
auto report = ReportExcel(data, false, workbook, worksheetNew);
|
||||
report.show();
|
||||
hyperlink = "#table_" + std::to_string(i);
|
||||
files[fileName] = hyperlink;
|
||||
}
|
||||
hyperlink += "!H" + std::to_string(i + 6);
|
||||
std::string fileNameText = "=HYPERLINK(\"" + hyperlink + "\",\"" + fileName + "\")";
|
||||
worksheet_write_formula(worksheet, row, 3, fileNameText.c_str(), efectiveStyle("text"));
|
||||
hyperparameters = item.value().at(1).dump();
|
||||
if (hyperparameters.size() > hypSize) {
|
||||
hypSize = hyperparameters.size();
|
||||
}
|
||||
writeString(row, 4, hyperparameters, "text");
|
||||
}
|
||||
row++;
|
||||
// Set Totals
|
||||
writeString(row, 1, "Total", "bodyHeader");
|
||||
std::stringstream oss;
|
||||
auto colName = getColumnName(2);
|
||||
oss << "=sum(" << colName << "5:" << colName << row << ")";
|
||||
worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]);
|
||||
// Set format
|
||||
worksheet_freeze_panes(worksheet, 4, 2);
|
||||
std::vector<int> columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 };
|
||||
for (int i = 0; i < columns_sizes.size(); ++i) {
|
||||
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
|
||||
}
|
||||
}
|
||||
BestResultsExcel::~BestResultsExcel()
|
||||
{
|
||||
workbook_close(workbook);
|
||||
}
|
||||
void BestResultsExcel::formatColumns()
|
||||
{
|
||||
worksheet_freeze_panes(worksheet, 4, 2);
|
||||
std::vector<int> columns_sizes = { 5, datasetNameSize };
|
||||
for (int i = 0; i < models.size(); ++i) {
|
||||
columns_sizes.push_back(modelNameSize);
|
||||
}
|
||||
for (int i = 0; i < columns_sizes.size(); ++i) {
|
||||
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
|
||||
}
|
||||
}
|
||||
void BestResultsExcel::addConditionalFormat(std::string formula)
|
||||
{
|
||||
// Add conditional format for max/min values in scores/ranks sheets
|
||||
lxw_format* custom_format = workbook_add_format(workbook);
|
||||
format_set_bg_color(custom_format, 0xFFC7CE);
|
||||
format_set_font_color(custom_format, 0x9C0006);
|
||||
// Create a conditional format object. A static object would also work.
|
||||
lxw_conditional_format* conditional_format = (lxw_conditional_format*)calloc(1, sizeof(lxw_conditional_format));
|
||||
conditional_format->type = LXW_CONDITIONAL_TYPE_FORMULA;
|
||||
std::string col = getColumnName(models.size() + 1);
|
||||
std::stringstream oss;
|
||||
oss << "=C5=" << formula << "($C5:$" << col << "5)";
|
||||
auto formulaValue = oss.str();
|
||||
conditional_format->value_string = formulaValue.c_str();
|
||||
conditional_format->format = custom_format;
|
||||
worksheet_conditional_format_range(worksheet, 4, 2, datasets.size() + 3, models.size() + 1, conditional_format);
|
||||
}
|
||||
void BestResultsExcel::build()
|
||||
{
|
||||
// Create Sheet with scores
|
||||
header(false);
|
||||
body(false);
|
||||
// Add conditional format for max values
|
||||
addConditionalFormat("max");
|
||||
footer(false);
|
||||
if (friedman) {
|
||||
// Create Sheet with ranks
|
||||
worksheet = workbook_add_worksheet(workbook, "Ranks");
|
||||
formatColumns();
|
||||
header(true);
|
||||
body(true);
|
||||
addConditionalFormat("min");
|
||||
footer(true);
|
||||
// Create Sheet with Friedman Test
|
||||
doFriedman();
|
||||
}
|
||||
}
|
||||
std::string BestResultsExcel::getFileName()
|
||||
{
|
||||
return Paths::excel() + fileName;
|
||||
}
|
||||
void BestResultsExcel::header(bool ranks)
|
||||
{
|
||||
row = 0;
|
||||
std::string message = ranks ? "Ranks for score " + score : "Best results for " + score;
|
||||
worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), message.c_str(), styles["headerFirst"]);
|
||||
// Body header
|
||||
row = 3;
|
||||
int col = 1;
|
||||
writeString(row, 0, "Nº", "bodyHeader");
|
||||
writeString(row, 1, "Dataset", "bodyHeader");
|
||||
for (const auto& model : models) {
|
||||
writeString(row, ++col, model.c_str(), "bodyHeader");
|
||||
}
|
||||
}
|
||||
void BestResultsExcel::body(bool ranks)
|
||||
{
|
||||
row = 4;
|
||||
int i = 0;
|
||||
json origin = table.begin().value();
|
||||
for (auto const& item : origin.items()) {
|
||||
writeInt(row, 0, i++, "ints");
|
||||
writeString(row, 1, item.key().c_str(), "text");
|
||||
int col = 1;
|
||||
for (const auto& model : models) {
|
||||
double value = ranks ? ranksModels[item.key()][model] : table[model].at(item.key()).at(0).get<double>();
|
||||
writeDouble(row, ++col, value, "result");
|
||||
}
|
||||
++row;
|
||||
}
|
||||
}
|
||||
void BestResultsExcel::footer(bool ranks)
|
||||
{
|
||||
// Set Totals
|
||||
writeString(row, 1, "Total", "bodyHeader");
|
||||
int col = 1;
|
||||
for (const auto& model : models) {
|
||||
std::stringstream oss;
|
||||
auto colName = getColumnName(col + 1);
|
||||
oss << "=SUM(" << colName << "5:" << colName << row << ")";
|
||||
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
|
||||
}
|
||||
if (ranks) {
|
||||
row++;
|
||||
writeString(row, 1, "Average ranks", "bodyHeader");
|
||||
int col = 1;
|
||||
for (const auto& model : models) {
|
||||
auto colName = getColumnName(col + 1);
|
||||
std::stringstream oss;
|
||||
oss << "=SUM(" << colName << "5:" << colName << row - 1 << ")/" << datasets.size();
|
||||
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
|
||||
}
|
||||
}
|
||||
}
|
||||
void BestResultsExcel::doFriedman()
|
||||
{
|
||||
worksheet = workbook_add_worksheet(workbook, "Friedman");
|
||||
std::vector<int> columns_sizes = { 5, datasetNameSize };
|
||||
for (int i = 0; i < models.size(); ++i) {
|
||||
columns_sizes.push_back(modelNameSize);
|
||||
}
|
||||
for (int i = 0; i < columns_sizes.size(); ++i) {
|
||||
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
|
||||
}
|
||||
worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), "Friedman Test", styles["headerFirst"]);
|
||||
row = 2;
|
||||
Statistics stats(models, datasets, table, significance, false);
|
||||
auto result = stats.friedmanTest();
|
||||
stats.postHocHolmTest(result);
|
||||
auto friedmanResult = stats.getFriedmanResult();
|
||||
auto holmResult = stats.getHolmResult();
|
||||
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]);
|
||||
row += 2;
|
||||
writeString(row, 1, "Friedman Q", "bodyHeader");
|
||||
writeDouble(row, 2, friedmanResult.statistic, "bodyHeader");
|
||||
row++;
|
||||
writeString(row, 1, "Critical χ2 value", "bodyHeader");
|
||||
writeDouble(row, 2, friedmanResult.criticalValue, "bodyHeader");
|
||||
row++;
|
||||
writeString(row, 1, "p-value", "bodyHeader");
|
||||
writeDouble(row, 2, friedmanResult.pvalue, "bodyHeader");
|
||||
writeString(row, 3, friedmanResult.reject ? "<" : ">", "bodyHeader");
|
||||
writeDouble(row, 4, significance, "bodyHeader");
|
||||
writeString(row, 5, friedmanResult.reject ? "Reject H0" : "Accept H0", "bodyHeader");
|
||||
row += 3;
|
||||
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Holm Test", styles["headerFirst"]);
|
||||
row += 2;
|
||||
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between the control model and the other models.'", styles["headerSmall"]);
|
||||
row += 2;
|
||||
std::string controlModel = "Control Model: " + holmResult.model;
|
||||
worksheet_merge_range(worksheet, row, 1, row, 7, controlModel.c_str(), styles["bodyHeader_odd"]);
|
||||
row++;
|
||||
writeString(row, 1, "Model", "bodyHeader");
|
||||
writeString(row, 2, "p-value", "bodyHeader");
|
||||
writeString(row, 3, "Rank", "bodyHeader");
|
||||
writeString(row, 4, "Win", "bodyHeader");
|
||||
writeString(row, 5, "Tie", "bodyHeader");
|
||||
writeString(row, 6, "Loss", "bodyHeader");
|
||||
writeString(row, 7, "Reject H0", "bodyHeader");
|
||||
row++;
|
||||
bool first = true;
|
||||
for (const auto& item : holmResult.holmLines) {
|
||||
writeString(row, 1, item.model, "text");
|
||||
if (first) {
|
||||
// Control model info
|
||||
first = false;
|
||||
writeString(row, 2, "", "text");
|
||||
writeDouble(row, 3, item.rank, "result");
|
||||
writeString(row, 4, "", "text");
|
||||
writeString(row, 5, "", "text");
|
||||
writeString(row, 6, "", "text");
|
||||
writeString(row, 7, "", "textCentered");
|
||||
} else {
|
||||
// Rest of the models info
|
||||
writeDouble(row, 2, item.pvalue, "result");
|
||||
writeDouble(row, 3, item.rank, "result");
|
||||
writeInt(row, 4, item.wtl.win, "ints");
|
||||
writeInt(row, 5, item.wtl.tie, "ints");
|
||||
writeInt(row, 6, item.wtl.loss, "ints");
|
||||
writeString(row, 7, item.reject ? "Yes" : "No", "textCentered");
|
||||
}
|
||||
row++;
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,39 +0,0 @@
|
||||
#ifndef BESTRESULTS_EXCEL_H
|
||||
#define BESTRESULTS_EXCEL_H
|
||||
#include "ExcelFile.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace platform {
|
||||
|
||||
class BestResultsExcel : ExcelFile {
|
||||
public:
|
||||
BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets);
|
||||
~BestResultsExcel();
|
||||
void reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance);
|
||||
void reportSingle(const std::string& model, const std::string& fileName);
|
||||
std::string getFileName();
|
||||
private:
|
||||
void build();
|
||||
void header(bool ranks);
|
||||
void body(bool ranks);
|
||||
void footer(bool ranks);
|
||||
void formatColumns();
|
||||
void doFriedman();
|
||||
void addConditionalFormat(std::string formula);
|
||||
const std::string fileName = "BestResults.xlsx";
|
||||
std::string score;
|
||||
std::vector<std::string> models;
|
||||
std::vector<std::string> datasets;
|
||||
json table;
|
||||
std::map<std::string, std::map<std::string, float>> ranksModels;
|
||||
bool friedman;
|
||||
double significance;
|
||||
int modelNameSize = 12; // Min size of the column
|
||||
int datasetNameSize = 25; // Min size of the column
|
||||
};
|
||||
}
|
||||
#endif //BESTRESULTS_EXCEL_H
|
@@ -1,28 +0,0 @@
|
||||
#ifndef BESTSCORE_H
|
||||
#define BESTSCORE_H
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include "DotEnv.h"
|
||||
namespace platform {
|
||||
class BestScore {
|
||||
public:
|
||||
static std::pair<std::string, double> getScore(const std::string& metric)
|
||||
{
|
||||
static std::map<std::pair<std::string, std::string>, std::pair<std::string, double>> data = {
|
||||
{{"discretiz", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
|
||||
{{"odte", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
|
||||
};
|
||||
auto env = platform::DotEnv();
|
||||
std::string experiment = env.get("experiment");
|
||||
try {
|
||||
return data[{experiment, metric}];
|
||||
}
|
||||
catch (...) {
|
||||
return { "", 0.0 };
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
@@ -1,22 +0,0 @@
|
||||
#ifndef LOCALE_H
|
||||
#define LOCALE_H
|
||||
#include <locale>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
namespace platform {
|
||||
struct separation : std::numpunct<char> {
|
||||
char do_decimal_point() const { return ','; }
|
||||
char do_thousands_sep() const { return '.'; }
|
||||
std::string do_grouping() const { return "\03"; }
|
||||
};
|
||||
class ConfigLocale {
|
||||
public:
|
||||
explicit ConfigLocale()
|
||||
{
|
||||
std::locale mylocale(std::cout.getloc(), new separation);
|
||||
std::locale::global(mylocale);
|
||||
std::cout.imbue(mylocale);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,22 +0,0 @@
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
|
||||
include_directories(${Python3_INCLUDE_DIRS})
|
||||
include_directories(${MPI_CXX_INCLUDE_DIRS})
|
||||
|
||||
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
|
||||
add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc HyperParameters.cc Folding.cc Datasets.cc Dataset.cc)
|
||||
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
|
||||
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc)
|
||||
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
|
||||
|
||||
target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp)
|
||||
target_link_libraries(b_grid BayesNet PyWrap ${MPI_CXX_LIBRARIES})
|
||||
target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}")
|
||||
target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap)
|
||||
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
|
@@ -1,15 +0,0 @@
|
||||
#ifndef COLORS_H
|
||||
#define COLORS_H
|
||||
class Colors {
|
||||
public:
|
||||
static std::string MAGENTA() { return "\033[1;35m"; }
|
||||
static std::string BLUE() { return "\033[1;34m"; }
|
||||
static std::string CYAN() { return "\033[1;36m"; }
|
||||
static std::string GREEN() { return "\033[1;32m"; }
|
||||
static std::string YELLOW() { return "\033[1;33m"; }
|
||||
static std::string RED() { return "\033[1;31m"; }
|
||||
static std::string WHITE() { return "\033[1;37m"; }
|
||||
static std::string IBLUE() { return "\033[0;94m"; }
|
||||
static std::string RESET() { return "\033[0m"; }
|
||||
};
|
||||
#endif // COLORS_H
|
@@ -1,87 +0,0 @@
|
||||
#include "CommandParser.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "Colors.h"
|
||||
#include "Utils.h"
|
||||
|
||||
namespace platform {
|
||||
void CommandParser::messageError(const std::string& message)
|
||||
{
|
||||
std::cout << Colors::RED() << message << Colors::RESET() << std::endl;
|
||||
}
|
||||
std::pair<char, int> CommandParser::parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex)
|
||||
{
|
||||
bool finished = false;
|
||||
while (!finished) {
|
||||
std::stringstream oss;
|
||||
std::string line;
|
||||
oss << color << "Choose option (";
|
||||
bool first = true;
|
||||
for (auto& option : options) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
oss << ", ";
|
||||
}
|
||||
oss << std::get<char>(option) << "=" << std::get<std::string>(option);
|
||||
}
|
||||
oss << "): ";
|
||||
std::cout << oss.str();
|
||||
getline(std::cin, line);
|
||||
std::cout << Colors::RESET();
|
||||
line = trim(line);
|
||||
if (line.size() == 0)
|
||||
continue;
|
||||
if (all_of(line.begin(), line.end(), ::isdigit)) {
|
||||
command = defaultCommand;
|
||||
index = stoi(line);
|
||||
if (index > maxIndex || index < 0) {
|
||||
messageError("Index out of range");
|
||||
continue;
|
||||
}
|
||||
finished = true;
|
||||
break;
|
||||
}
|
||||
bool found = false;
|
||||
for (auto& option : options) {
|
||||
if (line[0] == std::get<char>(option)) {
|
||||
found = true;
|
||||
// it's a match
|
||||
line.erase(line.begin());
|
||||
line = trim(line);
|
||||
if (std::get<bool>(option)) {
|
||||
// The option requires a value
|
||||
if (line.size() == 0) {
|
||||
messageError("Option " + std::get<std::string>(option) + " requires a value");
|
||||
break;
|
||||
}
|
||||
try {
|
||||
index = stoi(line);
|
||||
if (index > maxIndex || index < 0) {
|
||||
messageError("Index out of range");
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (const std::invalid_argument& ia) {
|
||||
messageError("Invalid value: " + line);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (line.size() > 0) {
|
||||
messageError("option " + std::get<std::string>(option) + " doesn't accept values");
|
||||
break;
|
||||
}
|
||||
}
|
||||
command = std::get<char>(option);
|
||||
finished = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
messageError("I don't know " + line);
|
||||
}
|
||||
}
|
||||
return { command, index };
|
||||
}
|
||||
} /* namespace platform */
|
@@ -1,20 +0,0 @@
|
||||
#ifndef COMMAND_PARSER_H
|
||||
#define COMMAND_PARSER_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
|
||||
namespace platform {
|
||||
class CommandParser {
|
||||
public:
|
||||
CommandParser() = default;
|
||||
std::pair<char, int> parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex);
|
||||
char getCommand() const { return command; };
|
||||
int getIndex() const { return index; };
|
||||
private:
|
||||
void messageError(const std::string& message);
|
||||
char command;
|
||||
int index;
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif /* COMMAND_PARSER_H */
|
@@ -1,215 +0,0 @@
|
||||
#include "Dataset.h"
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
namespace platform {
|
||||
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
|
||||
{
|
||||
}
|
||||
std::string Dataset::getName() const
|
||||
{
|
||||
return name;
|
||||
}
|
||||
std::string Dataset::getClassName() const
|
||||
{
|
||||
return className;
|
||||
}
|
||||
std::vector<std::string> Dataset::getFeatures() const
|
||||
{
|
||||
if (loaded) {
|
||||
return features;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
int Dataset::getNFeatures() const
|
||||
{
|
||||
if (loaded) {
|
||||
return n_features;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
int Dataset::getNSamples() const
|
||||
{
|
||||
if (loaded) {
|
||||
return n_samples;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
std::map<std::string, std::vector<int>> Dataset::getStates() const
|
||||
{
|
||||
if (loaded) {
|
||||
return states;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<std::vector<std::vector<float>>&, std::vector<int>&> Dataset::getVectors()
|
||||
{
|
||||
if (loaded) {
|
||||
return { Xv, yv };
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<std::vector<std::vector<int>>&, std::vector<int>&> Dataset::getVectorsDiscretized()
|
||||
{
|
||||
if (loaded) {
|
||||
return { Xd, yv };
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
|
||||
{
|
||||
if (loaded) {
|
||||
buildTensors();
|
||||
return { X, y };
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
void Dataset::load_csv()
|
||||
{
|
||||
ifstream file(path + "/" + name + ".csv");
|
||||
if (file.is_open()) {
|
||||
std::string line;
|
||||
getline(file, line);
|
||||
std::vector<std::string> tokens = split(line, ',');
|
||||
features = std::vector<std::string>(tokens.begin(), tokens.end() - 1);
|
||||
if (className == "-1") {
|
||||
className = tokens.back();
|
||||
}
|
||||
for (auto i = 0; i < features.size(); ++i) {
|
||||
Xv.push_back(std::vector<float>());
|
||||
}
|
||||
while (getline(file, line)) {
|
||||
tokens = split(line, ',');
|
||||
for (auto i = 0; i < features.size(); ++i) {
|
||||
Xv[i].push_back(stof(tokens[i]));
|
||||
}
|
||||
yv.push_back(stoi(tokens.back()));
|
||||
}
|
||||
file.close();
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open dataset file.");
|
||||
}
|
||||
}
|
||||
void Dataset::computeStates()
|
||||
{
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = std::vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
|
||||
auto item = states.at(features[i]);
|
||||
iota(begin(item), end(item), 0);
|
||||
}
|
||||
states[className] = std::vector<int>(*max_element(yv.begin(), yv.end()) + 1);
|
||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
}
|
||||
void Dataset::load_arff()
|
||||
{
|
||||
auto arff = ArffFiles();
|
||||
arff.load(path + "/" + name + ".arff", className);
|
||||
// Get Dataset X, y
|
||||
Xv = arff.getX();
|
||||
yv = arff.getY();
|
||||
// Get className & Features
|
||||
className = arff.getClassName();
|
||||
auto attributes = arff.getAttributes();
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
|
||||
}
|
||||
std::vector<std::string> tokenize(std::string line)
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
for (auto i = 0; i < line.size(); ++i) {
|
||||
if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
|
||||
std::string token = line.substr(0, i);
|
||||
tokens.push_back(token);
|
||||
line.erase(line.begin(), line.begin() + i + 1);
|
||||
i = 0;
|
||||
while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n')
|
||||
line.erase(line.begin(), line.begin() + i + 1);
|
||||
}
|
||||
}
|
||||
if (line.size() > 0) {
|
||||
tokens.push_back(line);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
void Dataset::load_rdata()
|
||||
{
|
||||
ifstream file(path + "/" + name + "_R.dat");
|
||||
if (file.is_open()) {
|
||||
std::string line;
|
||||
getline(file, line);
|
||||
line = ArffFiles::trim(line);
|
||||
std::vector<std::string> tokens = tokenize(line);
|
||||
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
|
||||
if (className == "-1") {
|
||||
className = ArffFiles::trim(tokens.back());
|
||||
}
|
||||
for (auto i = 0; i < features.size(); ++i) {
|
||||
Xv.push_back(std::vector<float>());
|
||||
}
|
||||
while (getline(file, line)) {
|
||||
tokens = tokenize(line);
|
||||
// We have to skip the first token, which is the instance number.
|
||||
for (auto i = 1; i < features.size() + 1; ++i) {
|
||||
const float value = stof(tokens[i]);
|
||||
Xv[i - 1].push_back(value);
|
||||
}
|
||||
yv.push_back(stoi(tokens.back()));
|
||||
}
|
||||
file.close();
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open dataset file.");
|
||||
}
|
||||
}
|
||||
void Dataset::load()
|
||||
{
|
||||
if (loaded) {
|
||||
return;
|
||||
}
|
||||
if (fileType == CSV) {
|
||||
load_csv();
|
||||
} else if (fileType == ARFF) {
|
||||
load_arff();
|
||||
} else if (fileType == RDATA) {
|
||||
load_rdata();
|
||||
}
|
||||
if (discretize) {
|
||||
Xd = discretizeDataset(Xv, yv);
|
||||
computeStates();
|
||||
}
|
||||
n_samples = Xv[0].size();
|
||||
n_features = Xv.size();
|
||||
loaded = true;
|
||||
}
|
||||
void Dataset::buildTensors()
|
||||
{
|
||||
if (discretize) {
|
||||
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
|
||||
} else {
|
||||
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
|
||||
}
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
if (discretize) {
|
||||
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||
} else {
|
||||
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
|
||||
}
|
||||
}
|
||||
y = torch::tensor(yv, torch::kInt32);
|
||||
}
|
||||
std::vector<mdlp::labels_t> Dataset::discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
{
|
||||
std::vector<mdlp::labels_t> Xd;
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
for (int i = 0; i < X.size(); i++) {
|
||||
fimdlp.fit(X[i], y);
|
||||
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
||||
Xd.push_back(xd);
|
||||
}
|
||||
return Xd;
|
||||
}
|
||||
}
|
@@ -1,78 +0,0 @@
|
||||
#ifndef DATASET_H
|
||||
#define DATASET_H
|
||||
#include <torch/torch.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "CPPFImdlp.h"
|
||||
#include "Utils.h"
|
||||
namespace platform {
|
||||
enum fileType_t { CSV, ARFF, RDATA };
|
||||
class SourceData {
|
||||
public:
|
||||
SourceData(std::string source)
|
||||
{
|
||||
if (source == "Surcov") {
|
||||
path = "datasets/";
|
||||
fileType = CSV;
|
||||
} else if (source == "Arff") {
|
||||
path = "datasets/";
|
||||
fileType = ARFF;
|
||||
} else if (source == "Tanveer") {
|
||||
path = "data/";
|
||||
fileType = RDATA;
|
||||
} else {
|
||||
throw std::invalid_argument("Unknown source.");
|
||||
}
|
||||
}
|
||||
std::string getPath()
|
||||
{
|
||||
return path;
|
||||
}
|
||||
fileType_t getFileType()
|
||||
{
|
||||
return fileType;
|
||||
}
|
||||
private:
|
||||
std::string path;
|
||||
fileType_t fileType;
|
||||
};
|
||||
class Dataset {
|
||||
private:
|
||||
std::string path;
|
||||
std::string name;
|
||||
fileType_t fileType;
|
||||
std::string className;
|
||||
int n_samples{ 0 }, n_features{ 0 };
|
||||
std::vector<std::string> features;
|
||||
std::map<std::string, std::vector<int>> states;
|
||||
bool loaded;
|
||||
bool discretize;
|
||||
torch::Tensor X, y;
|
||||
std::vector<std::vector<float>> Xv;
|
||||
std::vector<std::vector<int>> Xd;
|
||||
std::vector<int> yv;
|
||||
void buildTensors();
|
||||
void load_csv();
|
||||
void load_arff();
|
||||
void load_rdata();
|
||||
void computeStates();
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
|
||||
public:
|
||||
Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
|
||||
explicit Dataset(const Dataset&);
|
||||
std::string getName() const;
|
||||
std::string getClassName() const;
|
||||
std::vector<string> getFeatures() const;
|
||||
std::map<std::string, std::vector<int>> getStates() const;
|
||||
std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
|
||||
std::pair<vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized();
|
||||
std::pair<torch::Tensor&, torch::Tensor&> getTensors();
|
||||
int getNFeatures() const;
|
||||
int getNSamples() const;
|
||||
void load();
|
||||
const bool inline isLoaded() const { return loaded; };
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
@@ -1,129 +0,0 @@
|
||||
#include "Datasets.h"
|
||||
#include <fstream>
|
||||
namespace platform {
|
||||
void Datasets::load()
|
||||
{
|
||||
auto sd = SourceData(sfileType);
|
||||
fileType = sd.getFileType();
|
||||
path = sd.getPath();
|
||||
ifstream catalog(path + "all.txt");
|
||||
if (catalog.is_open()) {
|
||||
std::string line;
|
||||
while (getline(catalog, line)) {
|
||||
if (line.empty() || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
std::vector<std::string> tokens = split(line, ',');
|
||||
std::string name = tokens[0];
|
||||
std::string className;
|
||||
if (tokens.size() == 1) {
|
||||
className = "-1";
|
||||
} else {
|
||||
className = tokens[1];
|
||||
}
|
||||
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
|
||||
}
|
||||
catalog.close();
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]");
|
||||
}
|
||||
}
|
||||
std::vector<std::string> Datasets::getNames()
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
|
||||
return result;
|
||||
}
|
||||
std::vector<std::string> Datasets::getFeatures(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return datasets.at(name)->getFeatures();
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
map<std::string, std::vector<int>> Datasets::getStates(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return datasets.at(name)->getStates();
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
void Datasets::loadDataset(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return;
|
||||
} else {
|
||||
datasets.at(name)->load();
|
||||
}
|
||||
}
|
||||
std::string Datasets::getClassName(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return datasets.at(name)->getClassName();
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
int Datasets::getNSamples(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return datasets.at(name)->getNSamples();
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
int Datasets::getNClasses(const std::string& name)
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
auto className = datasets.at(name)->getClassName();
|
||||
if (discretize) {
|
||||
auto states = getStates(name);
|
||||
return states.at(className).size();
|
||||
}
|
||||
auto [Xv, yv] = getVectors(name);
|
||||
return *std::max_element(yv.begin(), yv.end()) + 1;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
std::vector<int> Datasets::getClassesCounts(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
auto [Xv, yv] = datasets.at(name)->getVectors();
|
||||
std::vector<int> counts(*std::max_element(yv.begin(), yv.end()) + 1);
|
||||
for (auto y : yv) {
|
||||
counts[y]++;
|
||||
}
|
||||
return counts;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<std::vector<std::vector<float>>&, std::vector<int>&> Datasets::getVectors(const std::string& name)
|
||||
{
|
||||
if (!datasets[name]->isLoaded()) {
|
||||
datasets[name]->load();
|
||||
}
|
||||
return datasets[name]->getVectors();
|
||||
}
|
||||
pair<std::vector<std::vector<int>>&, std::vector<int>&> Datasets::getVectorsDiscretized(const std::string& name)
|
||||
{
|
||||
if (!datasets[name]->isLoaded()) {
|
||||
datasets[name]->load();
|
||||
}
|
||||
return datasets[name]->getVectorsDiscretized();
|
||||
}
|
||||
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const std::string& name)
|
||||
{
|
||||
if (!datasets[name]->isLoaded()) {
|
||||
datasets[name]->load();
|
||||
}
|
||||
return datasets[name]->getTensors();
|
||||
}
|
||||
bool Datasets::isDataset(const std::string& name) const
|
||||
{
|
||||
return datasets.find(name) != datasets.end();
|
||||
}
|
||||
}
|
@@ -1,30 +0,0 @@
|
||||
#ifndef DATASETS_H
|
||||
#define DATASETS_H
|
||||
#include "Dataset.h"
|
||||
namespace platform {
|
||||
class Datasets {
|
||||
private:
|
||||
std::string path;
|
||||
fileType_t fileType;
|
||||
std::string sfileType;
|
||||
std::map<std::string, std::unique_ptr<Dataset>> datasets;
|
||||
bool discretize;
|
||||
void load(); // Loads the list of datasets
|
||||
public:
|
||||
explicit Datasets(bool discretize, std::string sfileType) : discretize(discretize), sfileType(sfileType) { load(); };
|
||||
std::vector<string> getNames();
|
||||
std::vector<string> getFeatures(const std::string& name) const;
|
||||
int getNSamples(const std::string& name) const;
|
||||
std::string getClassName(const std::string& name) const;
|
||||
int getNClasses(const std::string& name);
|
||||
std::vector<int> getClassesCounts(const std::string& name) const;
|
||||
std::map<std::string, std::vector<int>> getStates(const std::string& name) const;
|
||||
std::pair<std::vector<std::vector<float>>&, std::vector<int>&> getVectors(const std::string& name);
|
||||
std::pair<std::vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized(const std::string& name);
|
||||
std::pair<torch::Tensor&, torch::Tensor&> getTensors(const std::string& name);
|
||||
bool isDataset(const std::string& name) const;
|
||||
void loadDataset(const std::string& name) const;
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
@@ -1,55 +0,0 @@
|
||||
#ifndef DOTENV_H
|
||||
#define DOTENV_H
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include "Utils.h"
|
||||
|
||||
//#include "Dataset.h"
|
||||
namespace platform {
|
||||
class DotEnv {
|
||||
private:
|
||||
std::map<std::string, std::string> env;
|
||||
public:
|
||||
DotEnv()
|
||||
{
|
||||
std::ifstream file(".env");
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "File .env not found" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
std::string line;
|
||||
while (std::getline(file, line)) {
|
||||
line = trim(line);
|
||||
if (line.empty() || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
std::istringstream iss(line);
|
||||
std::string key, value;
|
||||
if (std::getline(iss, key, '=') && std::getline(iss, value)) {
|
||||
env[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string get(const std::string& key)
|
||||
{
|
||||
return env.at(key);
|
||||
}
|
||||
std::vector<int> getSeeds()
|
||||
{
|
||||
auto seeds = std::vector<int>();
|
||||
auto seeds_str = env["seeds"];
|
||||
seeds_str = trim(seeds_str);
|
||||
seeds_str = seeds_str.substr(1, seeds_str.size() - 2);
|
||||
auto seeds_str_split = split(seeds_str, ',');
|
||||
transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) {
|
||||
return stoi(str);
|
||||
});
|
||||
return seeds;
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,168 +0,0 @@
|
||||
#include "ExcelFile.h"
|
||||
|
||||
namespace platform {
|
||||
ExcelFile::ExcelFile()
|
||||
{
|
||||
setDefault();
|
||||
}
|
||||
ExcelFile::ExcelFile(lxw_workbook* workbook) : workbook(workbook)
|
||||
{
|
||||
setDefault();
|
||||
}
|
||||
ExcelFile::ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet) : workbook(workbook), worksheet(worksheet)
|
||||
{
|
||||
setDefault();
|
||||
}
|
||||
void ExcelFile::setDefault()
|
||||
{
|
||||
normalSize = 14; //font size for report body
|
||||
row = 0;
|
||||
colorTitle = 0xB1A0C7;
|
||||
colorOdd = 0xDCE6F1;
|
||||
colorEven = 0xFDE9D9;
|
||||
}
|
||||
|
||||
lxw_workbook* ExcelFile::getWorkbook()
|
||||
{
|
||||
return workbook;
|
||||
}
|
||||
void ExcelFile::setProperties(std::string title)
|
||||
{
|
||||
char line[title.size() + 1];
|
||||
strcpy(line, title.c_str());
|
||||
lxw_doc_properties properties = {
|
||||
.title = line,
|
||||
.subject = (char*)"Machine learning results",
|
||||
.author = (char*)"Ricardo Montañana Gómez",
|
||||
.manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta",
|
||||
.company = (char*)"UCLM",
|
||||
.comments = (char*)"Created with libxlsxwriter and c++",
|
||||
};
|
||||
workbook_set_properties(workbook, &properties);
|
||||
}
|
||||
lxw_format* ExcelFile::efectiveStyle(const std::string& style)
|
||||
{
|
||||
lxw_format* efectiveStyle = NULL;
|
||||
if (style != "") {
|
||||
std::string suffix = row % 2 ? "_odd" : "_even";
|
||||
try {
|
||||
efectiveStyle = styles.at(style + suffix);
|
||||
}
|
||||
catch (const std::out_of_range& oor) {
|
||||
try {
|
||||
efectiveStyle = styles.at(style);
|
||||
}
|
||||
catch (const std::out_of_range& oor) {
|
||||
throw std::invalid_argument("Style " + style + " not found");
|
||||
}
|
||||
}
|
||||
}
|
||||
return efectiveStyle;
|
||||
}
|
||||
void ExcelFile::writeString(int row, int col, const std::string& text, const std::string& style)
|
||||
{
|
||||
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
|
||||
}
|
||||
void ExcelFile::writeInt(int row, int col, const int number, const std::string& style)
|
||||
{
|
||||
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
|
||||
}
|
||||
void ExcelFile::writeDouble(int row, int col, const double number, const std::string& style)
|
||||
{
|
||||
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
|
||||
}
|
||||
void ExcelFile::addColor(lxw_format* style, bool odd)
|
||||
{
|
||||
uint32_t efectiveColor = odd ? colorEven : colorOdd;
|
||||
format_set_bg_color(style, lxw_color_t(efectiveColor));
|
||||
}
|
||||
void ExcelFile::createStyle(const std::string& name, lxw_format* style, bool odd)
|
||||
{
|
||||
addColor(style, odd);
|
||||
if (name == "textCentered") {
|
||||
format_set_align(style, LXW_ALIGN_CENTER);
|
||||
format_set_font_size(style, normalSize);
|
||||
format_set_border(style, LXW_BORDER_THIN);
|
||||
} else if (name == "text") {
|
||||
format_set_font_size(style, normalSize);
|
||||
format_set_border(style, LXW_BORDER_THIN);
|
||||
} else if (name == "bodyHeader") {
|
||||
format_set_bold(style);
|
||||
format_set_font_size(style, normalSize);
|
||||
format_set_align(style, LXW_ALIGN_CENTER);
|
||||
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
|
||||
format_set_border(style, LXW_BORDER_THIN);
|
||||
format_set_bg_color(style, lxw_color_t(colorTitle));
|
||||
} else if (name == "result") {
|
||||
format_set_font_size(style, normalSize);
|
||||
format_set_border(style, LXW_BORDER_THIN);
|
||||
format_set_num_format(style, "0.0000000");
|
||||
} else if (name == "time") {
|
||||
format_set_font_size(style, normalSize);
|
||||
format_set_border(style, LXW_BORDER_THIN);
|
||||
format_set_num_format(style, "#,##0.000000");
|
||||
} else if (name == "ints") {
|
||||
format_set_font_size(style, normalSize);
|
||||
format_set_num_format(style, "###,##0");
|
||||
format_set_border(style, LXW_BORDER_THIN);
|
||||
} else if (name == "floats") {
|
||||
format_set_border(style, LXW_BORDER_THIN);
|
||||
format_set_font_size(style, normalSize);
|
||||
format_set_num_format(style, "#,##0.00");
|
||||
}
|
||||
}
|
||||
|
||||
void ExcelFile::createFormats()
|
||||
{
|
||||
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
|
||||
lxw_format* style;
|
||||
for (std::string name : styleNames) {
|
||||
lxw_format* style = workbook_add_format(workbook);
|
||||
style = workbook_add_format(workbook);
|
||||
createStyle(name, style, true);
|
||||
styles[name + "_odd"] = style;
|
||||
style = workbook_add_format(workbook);
|
||||
createStyle(name, style, false);
|
||||
styles[name + "_even"] = style;
|
||||
}
|
||||
|
||||
// Header 1st line
|
||||
lxw_format* headerFirst = workbook_add_format(workbook);
|
||||
format_set_bold(headerFirst);
|
||||
format_set_font_size(headerFirst, 18);
|
||||
format_set_align(headerFirst, LXW_ALIGN_CENTER);
|
||||
format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER);
|
||||
format_set_border(headerFirst, LXW_BORDER_THIN);
|
||||
format_set_bg_color(headerFirst, lxw_color_t(colorTitle));
|
||||
|
||||
// Header rest
|
||||
lxw_format* headerRest = workbook_add_format(workbook);
|
||||
format_set_bold(headerRest);
|
||||
format_set_align(headerRest, LXW_ALIGN_CENTER);
|
||||
format_set_font_size(headerRest, 16);
|
||||
format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER);
|
||||
format_set_border(headerRest, LXW_BORDER_THIN);
|
||||
format_set_bg_color(headerRest, lxw_color_t(colorOdd));
|
||||
|
||||
// Header small
|
||||
lxw_format* headerSmall = workbook_add_format(workbook);
|
||||
format_set_bold(headerSmall);
|
||||
format_set_align(headerSmall, LXW_ALIGN_LEFT);
|
||||
format_set_font_size(headerSmall, 12);
|
||||
format_set_border(headerSmall, LXW_BORDER_THIN);
|
||||
format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER);
|
||||
format_set_bg_color(headerSmall, lxw_color_t(colorOdd));
|
||||
|
||||
// Summary style
|
||||
lxw_format* summaryStyle = workbook_add_format(workbook);
|
||||
format_set_bold(summaryStyle);
|
||||
format_set_font_size(summaryStyle, 16);
|
||||
format_set_border(summaryStyle, LXW_BORDER_THIN);
|
||||
format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER);
|
||||
|
||||
styles["headerFirst"] = headerFirst;
|
||||
styles["headerRest"] = headerRest;
|
||||
styles["headerSmall"] = headerSmall;
|
||||
styles["summaryStyle"] = summaryStyle;
|
||||
}
|
||||
}
|
@@ -1,43 +0,0 @@
|
||||
#ifndef EXCELFILE_H
|
||||
#define EXCELFILE_H
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "xlsxwriter.h"
|
||||
|
||||
namespace platform {
|
||||
struct separated : std::numpunct<char> {
|
||||
char do_decimal_point() const { return ','; }
|
||||
|
||||
char do_thousands_sep() const { return '.'; }
|
||||
|
||||
std::string do_grouping() const { return "\03"; }
|
||||
};
|
||||
class ExcelFile {
|
||||
public:
|
||||
ExcelFile();
|
||||
ExcelFile(lxw_workbook* workbook);
|
||||
ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet);
|
||||
lxw_workbook* getWorkbook();
|
||||
protected:
|
||||
void setProperties(std::string title);
|
||||
void writeString(int row, int col, const std::string& text, const std::string& style = "");
|
||||
void writeInt(int row, int col, const int number, const std::string& style = "");
|
||||
void writeDouble(int row, int col, const double number, const std::string& style = "");
|
||||
void createFormats();
|
||||
void createStyle(const std::string& name, lxw_format* style, bool odd);
|
||||
void addColor(lxw_format* style, bool odd);
|
||||
lxw_format* efectiveStyle(const std::string& name);
|
||||
lxw_workbook* workbook;
|
||||
lxw_worksheet* worksheet;
|
||||
std::map<std::string, lxw_format*> styles;
|
||||
int row;
|
||||
int normalSize; //font size for report body
|
||||
uint32_t colorTitle;
|
||||
uint32_t colorOdd;
|
||||
uint32_t colorEven;
|
||||
private:
|
||||
void setDefault();
|
||||
};
|
||||
}
|
||||
#endif // !EXCELFILE_H
|
@@ -1,226 +0,0 @@
|
||||
#include <fstream>
|
||||
#include "Experiment.h"
|
||||
#include "Datasets.h"
|
||||
#include "Models.h"
|
||||
#include "ReportConsole.h"
|
||||
#include "Paths.h"
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
std::string get_date()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%Y-%m-%d");
|
||||
return oss.str();
|
||||
}
|
||||
std::string get_time()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%H:%M:%S");
|
||||
return oss.str();
|
||||
}
|
||||
std::string Experiment::get_file_name()
|
||||
{
|
||||
std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
|
||||
return result;
|
||||
}
|
||||
|
||||
json Experiment::build_json()
|
||||
{
|
||||
json result;
|
||||
result["title"] = title;
|
||||
result["date"] = get_date();
|
||||
result["time"] = get_time();
|
||||
result["model"] = model;
|
||||
result["version"] = model_version;
|
||||
result["platform"] = platform;
|
||||
result["score_name"] = score_name;
|
||||
result["language"] = language;
|
||||
result["language_version"] = language_version;
|
||||
result["discretized"] = discretized;
|
||||
result["stratified"] = stratified;
|
||||
result["folds"] = nfolds;
|
||||
result["seeds"] = randomSeeds;
|
||||
result["duration"] = duration;
|
||||
result["results"] = json::array();
|
||||
for (const auto& r : results) {
|
||||
json j;
|
||||
j["dataset"] = r.getDataset();
|
||||
j["hyperparameters"] = r.getHyperparameters();
|
||||
j["samples"] = r.getSamples();
|
||||
j["features"] = r.getFeatures();
|
||||
j["classes"] = r.getClasses();
|
||||
j["score_train"] = r.getScoreTrain();
|
||||
j["score_test"] = r.getScoreTest();
|
||||
j["score"] = r.getScoreTest();
|
||||
j["score_std"] = r.getScoreTestStd();
|
||||
j["score_train_std"] = r.getScoreTrainStd();
|
||||
j["score_test_std"] = r.getScoreTestStd();
|
||||
j["train_time"] = r.getTrainTime();
|
||||
j["train_time_std"] = r.getTrainTimeStd();
|
||||
j["test_time"] = r.getTestTime();
|
||||
j["test_time_std"] = r.getTestTimeStd();
|
||||
j["time"] = r.getTestTime() + r.getTrainTime();
|
||||
j["time_std"] = r.getTestTimeStd() + r.getTrainTimeStd();
|
||||
j["scores_train"] = r.getScoresTrain();
|
||||
j["scores_test"] = r.getScoresTest();
|
||||
j["times_train"] = r.getTimesTrain();
|
||||
j["times_test"] = r.getTimesTest();
|
||||
j["nodes"] = r.getNodes();
|
||||
j["leaves"] = r.getLeaves();
|
||||
j["depth"] = r.getDepth();
|
||||
result["results"].push_back(j);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
void Experiment::save(const std::string& path)
|
||||
{
|
||||
json data = build_json();
|
||||
ofstream file(path + "/" + get_file_name());
|
||||
file << data;
|
||||
file.close();
|
||||
}
|
||||
|
||||
void Experiment::report()
|
||||
{
|
||||
json data = build_json();
|
||||
ReportConsole report(data);
|
||||
report.show();
|
||||
}
|
||||
|
||||
void Experiment::show()
|
||||
{
|
||||
json data = build_json();
|
||||
std::cout << data.dump(4) << std::endl;
|
||||
}
|
||||
|
||||
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet)
|
||||
{
|
||||
std::cout << "*** Starting experiment: " << title << " ***" << std::endl;
|
||||
for (auto fileName : filesToProcess) {
|
||||
std::cout << "- " << setw(20) << left << fileName << " " << right << flush;
|
||||
cross_validation(fileName, quiet);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::string getColor(bayesnet::status_t status)
|
||||
{
|
||||
switch (status) {
|
||||
case bayesnet::NORMAL:
|
||||
return Colors::GREEN();
|
||||
case bayesnet::WARNING:
|
||||
return Colors::YELLOW();
|
||||
case bayesnet::ERROR:
|
||||
return Colors::RED();
|
||||
default:
|
||||
return Colors::RESET();
|
||||
}
|
||||
}
|
||||
|
||||
void showProgress(int fold, const std::string& color, const std::string& phase)
|
||||
{
|
||||
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
|
||||
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
|
||||
|
||||
}
|
||||
void Experiment::cross_validation(const std::string& fileName, bool quiet)
|
||||
{
|
||||
auto datasets = Datasets(discretized, Paths::datasets());
|
||||
// Get dataset
|
||||
auto [X, y] = datasets.getTensors(fileName);
|
||||
auto states = datasets.getStates(fileName);
|
||||
auto features = datasets.getFeatures(fileName);
|
||||
auto samples = datasets.getNSamples(fileName);
|
||||
auto className = datasets.getClassName(fileName);
|
||||
if (!quiet) {
|
||||
std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
|
||||
}
|
||||
// Prepare Result
|
||||
auto result = Result();
|
||||
auto [values, counts] = at::_unique(y);
|
||||
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
|
||||
result.setHyperparameters(hyperparameters.get(fileName));
|
||||
// Initialize results std::vectors
|
||||
int nResults = nfolds * static_cast<int>(randomSeeds.size());
|
||||
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto edges = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto num_states = torch::zeros({ nResults }, torch::kFloat64);
|
||||
Timer train_timer, test_timer;
|
||||
int item = 0;
|
||||
for (auto seed : randomSeeds) {
|
||||
if (!quiet)
|
||||
std::cout << "(" << seed << ") doing Fold: " << flush;
|
||||
Fold* fold;
|
||||
if (stratified)
|
||||
fold = new StratifiedKFold(nfolds, y, seed);
|
||||
else
|
||||
fold = new KFold(nfolds, y.size(0), seed);
|
||||
for (int nfold = 0; nfold < nfolds; nfold++) {
|
||||
auto clf = Models::instance()->create(model);
|
||||
setModelVersion(clf->getVersion());
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, fileName);
|
||||
clf->setHyperparameters(hyperparameters.get(fileName));
|
||||
// Split train - test dataset
|
||||
train_timer.start();
|
||||
auto [train, test] = fold->getFold(nfold);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
auto X_train = X.index({ "...", train_t });
|
||||
auto y_train = y.index({ train_t });
|
||||
auto X_test = X.index({ "...", test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
|
||||
// Train model
|
||||
clf->fit(X_train, y_train, features, className, states);
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
|
||||
nodes[item] = clf->getNumberOfNodes();
|
||||
edges[item] = clf->getNumberOfEdges();
|
||||
num_states[item] = clf->getNumberOfStates();
|
||||
train_time[item] = train_timer.getDuration();
|
||||
// Score train
|
||||
auto accuracy_train_value = clf->score(X_train, y_train);
|
||||
// Test model
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
|
||||
test_timer.start();
|
||||
auto accuracy_test_value = clf->score(X_test, y_test);
|
||||
test_time[item] = test_timer.getDuration();
|
||||
accuracy_train[item] = accuracy_train_value;
|
||||
accuracy_test[item] = accuracy_test_value;
|
||||
if (!quiet)
|
||||
std::cout << "\b\b\b, " << flush;
|
||||
// Store results and times in std::vector
|
||||
result.addScoreTrain(accuracy_train_value);
|
||||
result.addScoreTest(accuracy_test_value);
|
||||
result.addTimeTrain(train_time[item].item<double>());
|
||||
result.addTimeTest(test_time[item].item<double>());
|
||||
item++;
|
||||
}
|
||||
if (!quiet)
|
||||
std::cout << "end. " << flush;
|
||||
delete fold;
|
||||
}
|
||||
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
|
||||
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
|
||||
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
||||
result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
|
||||
result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
|
||||
result.setDataset(fileName);
|
||||
addResult(result);
|
||||
}
|
||||
}
|
@@ -1,103 +0,0 @@
|
||||
#ifndef EXPERIMENT_H
|
||||
#define EXPERIMENT_H
|
||||
#include <torch/torch.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <string>
|
||||
#include "Folding.h"
|
||||
#include "BaseClassifier.h"
|
||||
#include "HyperParameters.h"
|
||||
#include "TAN.h"
|
||||
#include "KDB.h"
|
||||
#include "AODE.h"
|
||||
#include "Timer.h"
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
class Result {
|
||||
private:
|
||||
std::string dataset, model_version;
|
||||
json hyperparameters;
|
||||
int samples{ 0 }, features{ 0 }, classes{ 0 };
|
||||
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
|
||||
float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
|
||||
std::vector<double> scores_train, scores_test, times_train, times_test;
|
||||
public:
|
||||
Result() = default;
|
||||
Result& setDataset(const std::string& dataset) { this->dataset = dataset; return *this; }
|
||||
Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
|
||||
Result& setSamples(int samples) { this->samples = samples; return *this; }
|
||||
Result& setFeatures(int features) { this->features = features; return *this; }
|
||||
Result& setClasses(int classes) { this->classes = classes; return *this; }
|
||||
Result& setScoreTrain(double score) { this->score_train = score; return *this; }
|
||||
Result& setScoreTest(double score) { this->score_test = score; return *this; }
|
||||
Result& setScoreTrainStd(double score_std) { this->score_train_std = score_std; return *this; }
|
||||
Result& setScoreTestStd(double score_std) { this->score_test_std = score_std; return *this; }
|
||||
Result& setTrainTime(double train_time) { this->train_time = train_time; return *this; }
|
||||
Result& setTrainTimeStd(double train_time_std) { this->train_time_std = train_time_std; return *this; }
|
||||
Result& setTestTime(double test_time) { this->test_time = test_time; return *this; }
|
||||
Result& setTestTimeStd(double test_time_std) { this->test_time_std = test_time_std; return *this; }
|
||||
Result& setNodes(float nodes) { this->nodes = nodes; return *this; }
|
||||
Result& setLeaves(float leaves) { this->leaves = leaves; return *this; }
|
||||
Result& setDepth(float depth) { this->depth = depth; return *this; }
|
||||
Result& addScoreTrain(double score) { scores_train.push_back(score); return *this; }
|
||||
Result& addScoreTest(double score) { scores_test.push_back(score); return *this; }
|
||||
Result& addTimeTrain(double time) { times_train.push_back(time); return *this; }
|
||||
Result& addTimeTest(double time) { times_test.push_back(time); return *this; }
|
||||
const float get_score_train() const { return score_train; }
|
||||
float get_score_test() { return score_test; }
|
||||
const std::string& getDataset() const { return dataset; }
|
||||
const json& getHyperparameters() const { return hyperparameters; }
|
||||
const int getSamples() const { return samples; }
|
||||
const int getFeatures() const { return features; }
|
||||
const int getClasses() const { return classes; }
|
||||
const double getScoreTrain() const { return score_train; }
|
||||
const double getScoreTest() const { return score_test; }
|
||||
const double getScoreTrainStd() const { return score_train_std; }
|
||||
const double getScoreTestStd() const { return score_test_std; }
|
||||
const double getTrainTime() const { return train_time; }
|
||||
const double getTrainTimeStd() const { return train_time_std; }
|
||||
const double getTestTime() const { return test_time; }
|
||||
const double getTestTimeStd() const { return test_time_std; }
|
||||
const float getNodes() const { return nodes; }
|
||||
const float getLeaves() const { return leaves; }
|
||||
const float getDepth() const { return depth; }
|
||||
const std::vector<double>& getScoresTrain() const { return scores_train; }
|
||||
const std::vector<double>& getScoresTest() const { return scores_test; }
|
||||
const std::vector<double>& getTimesTrain() const { return times_train; }
|
||||
const std::vector<double>& getTimesTest() const { return times_test; }
|
||||
};
|
||||
class Experiment {
|
||||
public:
|
||||
Experiment() = default;
|
||||
Experiment& setTitle(const std::string& title) { this->title = title; return *this; }
|
||||
Experiment& setModel(const std::string& model) { this->model = model; return *this; }
|
||||
Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; }
|
||||
Experiment& setScoreName(const std::string& score_name) { this->score_name = score_name; return *this; }
|
||||
Experiment& setModelVersion(const std::string& model_version) { this->model_version = model_version; return *this; }
|
||||
Experiment& setLanguage(const std::string& language) { this->language = language; return *this; }
|
||||
Experiment& setLanguageVersion(const std::string& language_version) { this->language_version = language_version; return *this; }
|
||||
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
|
||||
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
|
||||
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
|
||||
Experiment& addResult(Result result) { results.push_back(result); return *this; }
|
||||
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
|
||||
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
|
||||
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
|
||||
std::string get_file_name();
|
||||
void save(const std::string& path);
|
||||
void cross_validation(const std::string& fileName, bool quiet);
|
||||
void go(std::vector<std::string> filesToProcess, bool quiet);
|
||||
void show();
|
||||
void report();
|
||||
private:
|
||||
std::string title, model, platform, score_name, model_version, language_version, language;
|
||||
bool discretized{ false }, stratified{ false };
|
||||
std::vector<Result> results;
|
||||
std::vector<int> randomSeeds;
|
||||
HyperParameters hyperparameters;
|
||||
int nfolds{ 0 };
|
||||
float duration{ 0 };
|
||||
json build_json();
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,104 +0,0 @@
|
||||
#include "Folding.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
namespace platform {
|
||||
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
|
||||
{
|
||||
std::random_device rd;
|
||||
random_seed = std::default_random_engine(seed == -1 ? rd() : seed);
|
||||
std::srand(seed == -1 ? time(0) : seed);
|
||||
}
|
||||
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(std::vector<int>(n))
|
||||
{
|
||||
std::iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
|
||||
shuffle(indices.begin(), indices.end(), random_seed);
|
||||
}
|
||||
std::pair<std::vector<int>, std::vector<int>> KFold::getFold(int nFold)
|
||||
{
|
||||
if (nFold >= k || nFold < 0) {
|
||||
throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")");
|
||||
}
|
||||
int nTest = n / k;
|
||||
auto train = std::vector<int>();
|
||||
auto test = std::vector<int>();
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
|
||||
test.push_back(indices[i]);
|
||||
} else {
|
||||
train.push_back(indices[i]);
|
||||
}
|
||||
}
|
||||
return { train, test };
|
||||
}
|
||||
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
|
||||
{
|
||||
n = y.numel();
|
||||
this->y = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
|
||||
build();
|
||||
}
|
||||
StratifiedKFold::StratifiedKFold(int k, const std::vector<int>& y, int seed)
|
||||
: Fold(k, y.size(), seed)
|
||||
{
|
||||
this->y = y;
|
||||
n = y.size();
|
||||
build();
|
||||
}
|
||||
void StratifiedKFold::build()
|
||||
{
|
||||
stratified_indices = std::vector<std::vector<int>>(k);
|
||||
int fold_size = n / k;
|
||||
|
||||
// Compute class counts and indices
|
||||
auto class_indices = std::map<int, std::vector<int>>();
|
||||
std::vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
class_counts[y[i]]++;
|
||||
class_indices[y[i]].push_back(i);
|
||||
}
|
||||
// Shuffle class indices
|
||||
for (auto& [cls, indices] : class_indices) {
|
||||
shuffle(indices.begin(), indices.end(), random_seed);
|
||||
}
|
||||
// Assign indices to folds
|
||||
for (auto label = 0; label < class_counts.size(); ++label) {
|
||||
auto num_samples_to_take = class_counts.at(label) / k;
|
||||
if (num_samples_to_take == 0) {
|
||||
std::cerr << "Warning! The number of samples in class " << label << " (" << class_counts.at(label)
|
||||
<< ") is less than the number of folds (" << k << ")." << std::endl;
|
||||
faulty = true;
|
||||
continue;
|
||||
}
|
||||
auto remainder_samples_to_take = class_counts[label] % k;
|
||||
for (auto fold = 0; fold < k; ++fold) {
|
||||
auto it = next(class_indices[label].begin(), num_samples_to_take);
|
||||
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
|
||||
class_indices[label].erase(class_indices[label].begin(), it);
|
||||
}
|
||||
auto chosen = std::vector<bool>(k, false);
|
||||
while (remainder_samples_to_take > 0) {
|
||||
int fold = (rand() % static_cast<int>(k));
|
||||
if (chosen.at(fold)) {
|
||||
continue;
|
||||
}
|
||||
chosen[fold] = true;
|
||||
auto it = next(class_indices[label].begin(), 1);
|
||||
stratified_indices[fold].push_back(*class_indices[label].begin());
|
||||
class_indices[label].erase(class_indices[label].begin(), it);
|
||||
remainder_samples_to_take--;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::pair<std::vector<int>, std::vector<int>> StratifiedKFold::getFold(int nFold)
|
||||
{
|
||||
if (nFold >= k || nFold < 0) {
|
||||
throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")");
|
||||
}
|
||||
std::vector<int> test_indices = stratified_indices[nFold];
|
||||
std::vector<int> train_indices;
|
||||
for (int i = 0; i < k; ++i) {
|
||||
if (i == nFold) continue;
|
||||
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());
|
||||
}
|
||||
return { train_indices, test_indices };
|
||||
}
|
||||
}
|
@@ -1,39 +0,0 @@
|
||||
#ifndef FOLDING_H
|
||||
#define FOLDING_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
namespace platform {
|
||||
class Fold {
|
||||
protected:
|
||||
int k;
|
||||
int n;
|
||||
int seed;
|
||||
std::default_random_engine random_seed;
|
||||
public:
|
||||
Fold(int k, int n, int seed = -1);
|
||||
virtual std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) = 0;
|
||||
virtual ~Fold() = default;
|
||||
int getNumberOfFolds() { return k; }
|
||||
};
|
||||
class KFold : public Fold {
|
||||
private:
|
||||
std::vector<int> indices;
|
||||
public:
|
||||
KFold(int k, int n, int seed = -1);
|
||||
std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) override;
|
||||
};
|
||||
class StratifiedKFold : public Fold {
|
||||
private:
|
||||
std::vector<int> y;
|
||||
std::vector<std::vector<int>> stratified_indices;
|
||||
void build();
|
||||
bool faulty = false; // Only true if the number of samples of any class is less than the number of folds.
|
||||
public:
|
||||
StratifiedKFold(int k, const std::vector<int>& y, int seed = -1);
|
||||
StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
|
||||
std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) override;
|
||||
bool isFaulty() { return faulty; }
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,75 +0,0 @@
|
||||
#include "GridData.h"
|
||||
#include <fstream>
|
||||
|
||||
namespace platform {
|
||||
GridData::GridData(const std::string& fileName)
|
||||
{
|
||||
json grid_file;
|
||||
std::ifstream resultData(fileName);
|
||||
if (resultData.is_open()) {
|
||||
grid_file = json::parse(resultData);
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open input file. [" + fileName + "]");
|
||||
}
|
||||
for (const auto& item : grid_file.items()) {
|
||||
auto key = item.key();
|
||||
auto value = item.value();
|
||||
grid[key] = value;
|
||||
}
|
||||
|
||||
}
|
||||
int GridData::computeNumCombinations(const json& line)
|
||||
{
|
||||
int numCombinations = 1;
|
||||
for (const auto& item : line.items()) {
|
||||
numCombinations *= item.value().size();
|
||||
}
|
||||
return numCombinations;
|
||||
}
|
||||
int GridData::getNumCombinations(const std::string& dataset)
|
||||
{
|
||||
int numCombinations = 0;
|
||||
auto selected = decide_dataset(dataset);
|
||||
for (const auto& line : grid.at(selected)) {
|
||||
numCombinations += computeNumCombinations(line);
|
||||
}
|
||||
return numCombinations;
|
||||
}
|
||||
json GridData::generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination)
|
||||
{
|
||||
if (index == last) {
|
||||
// If we reached the end of input, store the current combination
|
||||
output.push_back(currentCombination);
|
||||
return currentCombination;
|
||||
}
|
||||
const auto& key = index.key();
|
||||
const auto& values = index.value();
|
||||
for (const auto& value : values) {
|
||||
auto combination = currentCombination;
|
||||
combination[key] = value;
|
||||
json::iterator nextIndex = index;
|
||||
generateCombinations(++nextIndex, last, output, combination);
|
||||
}
|
||||
return currentCombination;
|
||||
}
|
||||
std::vector<json> GridData::getGrid(const std::string& dataset)
|
||||
{
|
||||
auto selected = decide_dataset(dataset);
|
||||
auto result = std::vector<json>();
|
||||
for (json line : grid.at(selected)) {
|
||||
generateCombinations(line.begin(), line.end(), result, json({}));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
json& GridData::getInputGrid(const std::string& dataset)
|
||||
{
|
||||
auto selected = decide_dataset(dataset);
|
||||
return grid.at(selected);
|
||||
}
|
||||
std::string GridData::decide_dataset(const std::string& dataset)
|
||||
{
|
||||
if (grid.find(dataset) != grid.end())
|
||||
return dataset;
|
||||
return ALL_DATASETS;
|
||||
}
|
||||
} /* namespace platform */
|
@@ -1,26 +0,0 @@
|
||||
#ifndef GRIDDATA_H
|
||||
#define GRIDDATA_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
const std::string ALL_DATASETS = "all";
|
||||
class GridData {
|
||||
public:
|
||||
explicit GridData(const std::string& fileName);
|
||||
~GridData() = default;
|
||||
std::vector<json> getGrid(const std::string& dataset = ALL_DATASETS);
|
||||
int getNumCombinations(const std::string& dataset = ALL_DATASETS);
|
||||
json& getInputGrid(const std::string& dataset = ALL_DATASETS);
|
||||
std::map<std::string, json>& getGridFile() { return grid; }
|
||||
private:
|
||||
std::string decide_dataset(const std::string& dataset);
|
||||
json generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination);
|
||||
int computeNumCombinations(const json& line);
|
||||
std::map<std::string, json> grid;
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif /* GRIDDATA_H */
|
@@ -1,599 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <torch/torch.h>
|
||||
#include "GridSearch.h"
|
||||
#include "Models.h"
|
||||
#include "Paths.h"
|
||||
#include "Folding.h"
|
||||
#include "Colors.h"
|
||||
|
||||
namespace platform {
|
||||
std::string get_date()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%Y-%m-%d");
|
||||
return oss.str();
|
||||
}
|
||||
std::string get_time()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%H:%M:%S");
|
||||
return oss.str();
|
||||
}
|
||||
GridSearch::GridSearch(struct ConfigGrid& config) : config(config)
|
||||
{
|
||||
}
|
||||
json GridSearch::getResults()
|
||||
{
|
||||
std::ifstream file(Paths::grid_output(config.model));
|
||||
if (file.is_open()) {
|
||||
return json::parse(file);
|
||||
}
|
||||
return json();
|
||||
}
|
||||
vector<std::string> GridSearch::processDatasets(Datasets& datasets)
|
||||
{
|
||||
// Load datasets
|
||||
auto datasets_names = datasets.getNames();
|
||||
if (config.continue_from != NO_CONTINUE()) {
|
||||
// Continue previous execution:
|
||||
if (std::find(datasets_names.begin(), datasets_names.end(), config.continue_from) == datasets_names.end()) {
|
||||
throw std::invalid_argument("Dataset " + config.continue_from + " not found");
|
||||
}
|
||||
// Remove datasets already processed
|
||||
vector< string >::iterator it = datasets_names.begin();
|
||||
while (it != datasets_names.end()) {
|
||||
if (*it != config.continue_from) {
|
||||
it = datasets_names.erase(it);
|
||||
} else {
|
||||
if (config.only)
|
||||
++it;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Exclude datasets
|
||||
for (const auto& name : config.excluded) {
|
||||
auto dataset = name.get<std::string>();
|
||||
auto it = std::find(datasets_names.begin(), datasets_names.end(), dataset);
|
||||
if (it == datasets_names.end()) {
|
||||
throw std::invalid_argument("Dataset " + dataset + " already excluded or doesn't exist!");
|
||||
}
|
||||
datasets_names.erase(it);
|
||||
}
|
||||
return datasets_names;
|
||||
}
|
||||
void showProgressComb(const int num, const int n_folds, const int total, const std::string& color)
|
||||
{
|
||||
int spaces = int(log(total) / log(10)) + 1;
|
||||
int magic = n_folds * 3 + 22 + 2 * spaces;
|
||||
std::string prefix = num == 1 ? "" : string(magic, '\b') + string(magic + 1, ' ') + string(magic + 1, '\b');
|
||||
std::cout << prefix << color << "(" << setw(spaces) << num << "/" << setw(spaces) << total << ") " << Colors::RESET() << flush;
|
||||
}
|
||||
void showProgressFold(int fold, const std::string& color, const std::string& phase)
|
||||
{
|
||||
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
|
||||
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
|
||||
}
|
||||
std::string getColor(bayesnet::status_t status)
|
||||
{
|
||||
switch (status) {
|
||||
case bayesnet::NORMAL:
|
||||
return Colors::GREEN();
|
||||
case bayesnet::WARNING:
|
||||
return Colors::YELLOW();
|
||||
case bayesnet::ERROR:
|
||||
return Colors::RED();
|
||||
default:
|
||||
return Colors::RESET();
|
||||
}
|
||||
}
|
||||
json GridSearch::build_tasks_mpi()
|
||||
{
|
||||
auto tasks = json::array();
|
||||
auto grid = GridData(Paths::grid_input(config.model));
|
||||
auto datasets = Datasets(false, Paths::datasets());
|
||||
auto datasets_names = processDatasets(datasets);
|
||||
for (const auto& dataset : datasets_names) {
|
||||
for (const auto& seed : config.seeds) {
|
||||
auto combinations = grid.getGrid(dataset);
|
||||
for (int n_fold = 0; n_fold < config.n_folds; n_fold++) {
|
||||
json task = {
|
||||
{ "dataset", dataset },
|
||||
{ "seed", seed },
|
||||
{ "fold", n_fold}
|
||||
};
|
||||
tasks.push_back(task);
|
||||
}
|
||||
}
|
||||
}
|
||||
// It's important to shuffle the array so heavy datasets are spread across the Workers
|
||||
std::mt19937 g{ 271 }; // Use fixed seed to obtain the same shuffle
|
||||
std::shuffle(tasks.begin(), tasks.end(), g);
|
||||
std::cout << "Tasks size: " << tasks.size() << std::endl;
|
||||
std::cout << "|";
|
||||
for (int i = 0; i < tasks.size(); ++i) {
|
||||
std::cout << (i + 1) % 10;
|
||||
}
|
||||
std::cout << "|" << std::endl << "|" << std::flush;
|
||||
return tasks;
|
||||
}
|
||||
std::pair<int, int> GridSearch::part_range_mpi(int n_tasks, int nprocs, int rank)
|
||||
{
|
||||
int assigned = 0;
|
||||
int remainder = n_tasks % nprocs;
|
||||
int start = 0;
|
||||
if (rank < remainder) {
|
||||
assigned = n_tasks / nprocs + 1;
|
||||
} else {
|
||||
assigned = n_tasks / nprocs;
|
||||
start = remainder;
|
||||
}
|
||||
start += rank * assigned;
|
||||
int end = start + assigned;
|
||||
if (rank == nprocs - 1) {
|
||||
end = n_tasks;
|
||||
}
|
||||
return { start, end };
|
||||
}
|
||||
std::string get_color_rank(int rank)
|
||||
{
|
||||
auto colors = { Colors::RED(), Colors::GREEN(), Colors::BLUE(), Colors::MAGENTA(), Colors::CYAN() };
|
||||
return *(colors.begin() + rank % colors.size());
|
||||
}
|
||||
void GridSearch::process_task_mpi(struct ConfigMPI& config_mpi, json& task, Datasets& datasets, json& results)
|
||||
{
|
||||
// Process the task and store the result in the results json
|
||||
Timer timer;
|
||||
timer.start();
|
||||
auto grid = GridData(Paths::grid_input(config.model));
|
||||
auto dataset = task["dataset"].get<std::string>();
|
||||
auto seed = task["seed"].get<int>();
|
||||
auto n_fold = task["fold"].get<int>();
|
||||
// Generate the hyperparamters combinations
|
||||
auto combinations = grid.getGrid(dataset);
|
||||
auto [X, y] = datasets.getTensors(dataset);
|
||||
auto states = datasets.getStates(dataset);
|
||||
auto features = datasets.getFeatures(dataset);
|
||||
auto className = datasets.getClassName(dataset);
|
||||
//
|
||||
// Start working on task
|
||||
//
|
||||
Fold* fold;
|
||||
if (config.stratified)
|
||||
fold = new StratifiedKFold(config.n_folds, y, seed);
|
||||
else
|
||||
fold = new KFold(config.n_folds, y.size(0), seed);
|
||||
auto [train, test] = fold->getFold(n_fold);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
auto X_train = X.index({ "...", train_t });
|
||||
auto y_train = y.index({ train_t });
|
||||
auto X_test = X.index({ "...", test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
auto num = 0;
|
||||
double best_fold_score = 0.0;
|
||||
json best_fold_hyper;
|
||||
for (const auto& hyperparam_line : combinations) {
|
||||
auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line);
|
||||
Fold* nested_fold;
|
||||
if (config.stratified)
|
||||
nested_fold = new StratifiedKFold(config.nested, y_train, seed);
|
||||
else
|
||||
nested_fold = new KFold(config.nested, y_train.size(0), seed);
|
||||
double score = 0.0;
|
||||
for (int n_nested_fold = 0; n_nested_fold < config.nested; n_nested_fold++) {
|
||||
// Nested level fold
|
||||
auto [train_nested, test_nested] = nested_fold->getFold(n_nested_fold);
|
||||
auto train_nested_t = torch::tensor(train_nested);
|
||||
auto test_nested_t = torch::tensor(test_nested);
|
||||
auto X_nested_train = X_train.index({ "...", train_nested_t });
|
||||
auto y_nested_train = y_train.index({ train_nested_t });
|
||||
auto X_nested_test = X_train.index({ "...", test_nested_t });
|
||||
auto y_nested_test = y_train.index({ test_nested_t });
|
||||
// Build Classifier with selected hyperparameters
|
||||
auto clf = Models::instance()->create(config.model);
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, dataset);
|
||||
clf->setHyperparameters(hyperparameters.get(dataset));
|
||||
// Train model
|
||||
clf->fit(X_nested_train, y_nested_train, features, className, states);
|
||||
// Test model
|
||||
score += clf->score(X_nested_test, y_nested_test);
|
||||
}
|
||||
delete nested_fold;
|
||||
score /= config.nested;
|
||||
if (score > best_fold_score) {
|
||||
best_fold_score = score;
|
||||
best_fold_hyper = hyperparam_line;
|
||||
}
|
||||
}
|
||||
delete fold;
|
||||
// Build Classifier with the best hyperparameters to obtain the best score
|
||||
auto hyperparameters = platform::HyperParameters(datasets.getNames(), best_fold_hyper);
|
||||
auto clf = Models::instance()->create(config.model);
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, dataset);
|
||||
clf->setHyperparameters(best_fold_hyper);
|
||||
clf->fit(X_train, y_train, features, className, states);
|
||||
best_fold_score = clf->score(X_test, y_test);
|
||||
// Save results
|
||||
results[dataset][std::to_string(n_fold)]["score"] = best_fold_score;
|
||||
results[dataset][std::to_string(n_fold)]["hyperparameters"] = best_fold_hyper;
|
||||
results[dataset][std::to_string(n_fold)]["seed"] = seed;
|
||||
results[dataset][std::to_string(n_fold)]["duration"] = timer.getDuration();
|
||||
std::cout << get_color_rank(config_mpi.rank) << "*" << std::flush;
|
||||
}
|
||||
void GridSearch::go_mpi(struct ConfigMPI& config_mpi)
|
||||
{
|
||||
/*
|
||||
* Each task is a json object with the following structure:
|
||||
* {
|
||||
* "dataset": "dataset_name",
|
||||
* "seed": # of seed to use,
|
||||
* "model": "model_name",
|
||||
* "Fold": # of fold to process
|
||||
* }
|
||||
*
|
||||
* The overall process consists in these steps:
|
||||
* 1. Manager will broadcast the tasks to all the processes
|
||||
* 1.1 Broadcast the number of tasks
|
||||
* 1.2 Broadcast the length of the following string
|
||||
* 1.2 Broadcast the tasks as a char* string
|
||||
* 2. Workers will receive the tasks and start the process
|
||||
* 2.1 A method will tell each worker the range of tasks to process
|
||||
* 2.2 Each worker will process the tasks and generate the best score for each task
|
||||
* 3. Manager gather the scores from all the workers and find out the best hyperparameters for each dataset
|
||||
* 3.1 Obtain the maximum size of the results message of all the workers
|
||||
* 3.2 Gather all the results from the workers into the manager
|
||||
* 3.3 Compile the results from all the workers
|
||||
* 3.4 Filter the best hyperparameters for each dataset
|
||||
*/
|
||||
char* msg;
|
||||
int tasks_size;
|
||||
if (config_mpi.rank == config_mpi.manager) {
|
||||
timer.start();
|
||||
auto tasks = build_tasks_mpi();
|
||||
auto tasks_str = tasks.dump();
|
||||
tasks_size = tasks_str.size();
|
||||
msg = new char[tasks_size + 1];
|
||||
strcpy(msg, tasks_str.c_str());
|
||||
}
|
||||
//
|
||||
// 1. Manager will broadcast the tasks to all the processes
|
||||
//
|
||||
MPI_Bcast(&tasks_size, 1, MPI_INT, config_mpi.manager, MPI_COMM_WORLD);
|
||||
if (config_mpi.rank != config_mpi.manager) {
|
||||
msg = new char[tasks_size + 1];
|
||||
}
|
||||
MPI_Bcast(msg, tasks_size + 1, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD);
|
||||
json tasks = json::parse(msg);
|
||||
delete[] msg;
|
||||
//
|
||||
// 2. All Workers will receive the tasks and start the process
|
||||
//
|
||||
int num_tasks = tasks.size();
|
||||
// 2.1 A method will tell each worker the range of tasks to process
|
||||
auto [start, end] = part_range_mpi(num_tasks, config_mpi.n_procs, config_mpi.rank);
|
||||
// 2.2 Each worker will process the tasks and return the best scores obtained
|
||||
auto datasets = Datasets(config.discretize, Paths::datasets());
|
||||
json results;
|
||||
for (int i = start; i < end; ++i) {
|
||||
// Process task
|
||||
process_task_mpi(config_mpi, tasks[i], datasets, results);
|
||||
}
|
||||
int size = results.dump().size() + 1;
|
||||
int max_size = 0;
|
||||
//
|
||||
// 3. Manager gather the scores from all the workers and find out the best hyperparameters for each dataset
|
||||
//
|
||||
//3.1 Obtain the maximum size of the results message of all the workers
|
||||
MPI_Allreduce(&size, &max_size, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
|
||||
// Assign the memory to the message and initialize it to 0s
|
||||
char* total = NULL;
|
||||
msg = new char[max_size];
|
||||
strncpy(msg, results.dump().c_str(), size);
|
||||
if (config_mpi.rank == config_mpi.manager) {
|
||||
total = new char[max_size * config_mpi.n_procs];
|
||||
}
|
||||
// 3.2 Gather all the results from the workers into the manager
|
||||
MPI_Gather(msg, max_size, MPI_CHAR, total, max_size, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD);
|
||||
delete[] msg;
|
||||
if (config_mpi.rank == config_mpi.manager) {
|
||||
std::cout << Colors::RESET() << "|" << std::endl;
|
||||
json total_results;
|
||||
json best_results;
|
||||
// 3.3 Compile the results from all the workers
|
||||
for (int i = 0; i < config_mpi.n_procs; ++i) {
|
||||
json partial_results = json::parse(total + i * max_size);
|
||||
for (auto& [dataset, folds] : partial_results.items()) {
|
||||
for (auto& [fold, result] : folds.items()) {
|
||||
total_results[dataset][fold] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
delete[] total;
|
||||
// 3.4 Filter the best hyperparameters for each dataset
|
||||
auto grid = GridData(Paths::grid_input(config.model));
|
||||
for (auto& [dataset, folds] : total_results.items()) {
|
||||
double best_score = 0.0;
|
||||
double duration = 0.0;
|
||||
json best_hyper;
|
||||
for (auto& [fold, result] : folds.items()) {
|
||||
duration += result["duration"].get<double>();
|
||||
if (result["score"] > best_score) {
|
||||
best_score = result["score"];
|
||||
best_hyper = result["hyperparameters"];
|
||||
}
|
||||
}
|
||||
auto timer = Timer();
|
||||
json result = {
|
||||
{ "score", best_score },
|
||||
{ "hyperparameters", best_hyper },
|
||||
{ "date", get_date() + " " + get_time() },
|
||||
{ "grid", grid.getInputGrid(dataset) },
|
||||
{ "duration", timer.translate2String(duration) }
|
||||
};
|
||||
best_results[dataset] = result;
|
||||
}
|
||||
save(best_results);
|
||||
}
|
||||
}
|
||||
void GridSearch::go()
|
||||
{
|
||||
timer.start();
|
||||
auto grid_type = config.nested == 0 ? "Single" : "Nested";
|
||||
auto datasets = Datasets(config.discretize, Paths::datasets());
|
||||
auto datasets_names = processDatasets(datasets);
|
||||
json results = initializeResults();
|
||||
std::cout << "***************** Starting " << grid_type << " Gridsearch *****************" << std::endl;
|
||||
std::cout << "input file=" << Paths::grid_input(config.model) << std::endl;
|
||||
auto grid = GridData(Paths::grid_input(config.model));
|
||||
Timer timer_dataset;
|
||||
double bestScore = 0;
|
||||
json bestHyperparameters;
|
||||
for (const auto& dataset : datasets_names) {
|
||||
if (!config.quiet)
|
||||
std::cout << "- " << setw(20) << left << dataset << " " << right << flush;
|
||||
auto combinations = grid.getGrid(dataset);
|
||||
timer_dataset.start();
|
||||
if (config.nested == 0)
|
||||
// for dataset // for hyperparameters // for seed // for fold
|
||||
tie(bestScore, bestHyperparameters) = processFileSingle(dataset, datasets, combinations);
|
||||
else
|
||||
// for dataset // for seed // for fold // for hyperparameters // for nested fold
|
||||
tie(bestScore, bestHyperparameters) = processFileNested(dataset, datasets, combinations);
|
||||
if (!config.quiet) {
|
||||
std::cout << "end." << " Score: " << Colors::IBLUE() << setw(9) << setprecision(7) << fixed
|
||||
<< bestScore << Colors::BLUE() << " [" << bestHyperparameters.dump() << "]"
|
||||
<< Colors::RESET() << ::endl;
|
||||
}
|
||||
json result = {
|
||||
{ "score", bestScore },
|
||||
{ "hyperparameters", bestHyperparameters },
|
||||
{ "date", get_date() + " " + get_time() },
|
||||
{ "grid", grid.getInputGrid(dataset) },
|
||||
{ "duration", timer_dataset.getDurationString() }
|
||||
};
|
||||
results[dataset] = result;
|
||||
// Save partial results
|
||||
save(results);
|
||||
}
|
||||
// Save final results
|
||||
save(results);
|
||||
std::cout << "***************** Ending " << grid_type << " Gridsearch *******************" << std::endl;
|
||||
}
|
||||
pair<double, json> GridSearch::processFileSingle(std::string fileName, Datasets& datasets, vector<json>& combinations)
|
||||
{
|
||||
int num = 0;
|
||||
double bestScore = 0.0;
|
||||
json bestHyperparameters;
|
||||
auto totalComb = combinations.size();
|
||||
for (const auto& hyperparam_line : combinations) {
|
||||
if (!config.quiet)
|
||||
showProgressComb(++num, config.n_folds, totalComb, Colors::CYAN());
|
||||
auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line);
|
||||
// Get dataset
|
||||
auto [X, y] = datasets.getTensors(fileName);
|
||||
auto states = datasets.getStates(fileName);
|
||||
auto features = datasets.getFeatures(fileName);
|
||||
auto className = datasets.getClassName(fileName);
|
||||
double totalScore = 0.0;
|
||||
int numItems = 0;
|
||||
for (const auto& seed : config.seeds) {
|
||||
if (!config.quiet)
|
||||
std::cout << "(" << seed << ") doing Fold: " << flush;
|
||||
Fold* fold;
|
||||
if (config.stratified)
|
||||
fold = new StratifiedKFold(config.n_folds, y, seed);
|
||||
else
|
||||
fold = new KFold(config.n_folds, y.size(0), seed);
|
||||
for (int nfold = 0; nfold < config.n_folds; nfold++) {
|
||||
auto clf = Models::instance()->create(config.model);
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, fileName);
|
||||
clf->setHyperparameters(hyperparameters.get(fileName));
|
||||
auto [train, test] = fold->getFold(nfold);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
auto X_train = X.index({ "...", train_t });
|
||||
auto y_train = y.index({ train_t });
|
||||
auto X_test = X.index({ "...", test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
// Train model
|
||||
if (!config.quiet)
|
||||
showProgressFold(nfold + 1, getColor(clf->getStatus()), "a");
|
||||
clf->fit(X_train, y_train, features, className, states);
|
||||
// Test model
|
||||
if (!config.quiet)
|
||||
showProgressFold(nfold + 1, getColor(clf->getStatus()), "b");
|
||||
totalScore += clf->score(X_test, y_test);
|
||||
numItems++;
|
||||
if (!config.quiet)
|
||||
std::cout << "\b\b\b, \b" << flush;
|
||||
}
|
||||
delete fold;
|
||||
}
|
||||
double score = numItems == 0 ? 0.0 : totalScore / numItems;
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
bestHyperparameters = hyperparam_line;
|
||||
}
|
||||
}
|
||||
return { bestScore, bestHyperparameters };
|
||||
}
|
||||
pair<double, json> GridSearch::processFileNested(std::string fileName, Datasets& datasets, vector<json>& combinations)
|
||||
{
|
||||
// Get dataset
|
||||
auto [X, y] = datasets.getTensors(fileName);
|
||||
auto states = datasets.getStates(fileName);
|
||||
auto features = datasets.getFeatures(fileName);
|
||||
auto className = datasets.getClassName(fileName);
|
||||
int spcs_combinations = int(log(combinations.size()) / log(10)) + 1;
|
||||
double goatScore = 0.0;
|
||||
json goatHyperparameters;
|
||||
// for dataset // for seed // for fold // for hyperparameters // for nested fold
|
||||
for (const auto& seed : config.seeds) {
|
||||
Fold* fold;
|
||||
if (config.stratified)
|
||||
fold = new StratifiedKFold(config.n_folds, y, seed);
|
||||
else
|
||||
fold = new KFold(config.n_folds, y.size(0), seed);
|
||||
double bestScore = 0.0;
|
||||
json bestHyperparameters;
|
||||
std::cout << "(" << seed << ") doing Fold: " << flush;
|
||||
for (int nfold = 0; nfold < config.n_folds; nfold++) {
|
||||
if (!config.quiet)
|
||||
std::cout << Colors::GREEN() << nfold + 1 << " " << flush;
|
||||
// First level fold
|
||||
auto [train, test] = fold->getFold(nfold);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
auto X_train = X.index({ "...", train_t });
|
||||
auto y_train = y.index({ train_t });
|
||||
auto X_test = X.index({ "...", test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
auto num = 0;
|
||||
json result_fold;
|
||||
double hypScore = 0.0;
|
||||
double bestHypScore = 0.0;
|
||||
json bestHypHyperparameters;
|
||||
for (const auto& hyperparam_line : combinations) {
|
||||
std::cout << "[" << setw(spcs_combinations) << ++num << "/" << setw(spcs_combinations)
|
||||
<< combinations.size() << "] " << std::flush;
|
||||
Fold* nested_fold;
|
||||
if (config.stratified)
|
||||
nested_fold = new StratifiedKFold(config.nested, y_train, seed);
|
||||
else
|
||||
nested_fold = new KFold(config.nested, y_train.size(0), seed);
|
||||
for (int n_nested_fold = 0; n_nested_fold < config.nested; n_nested_fold++) {
|
||||
// Nested level fold
|
||||
auto [train_nested, test_nested] = nested_fold->getFold(n_nested_fold);
|
||||
auto train_nested_t = torch::tensor(train_nested);
|
||||
auto test_nested_t = torch::tensor(test_nested);
|
||||
auto X_nexted_train = X_train.index({ "...", train_nested_t });
|
||||
auto y_nested_train = y_train.index({ train_nested_t });
|
||||
auto X_nested_test = X_train.index({ "...", test_nested_t });
|
||||
auto y_nested_test = y_train.index({ test_nested_t });
|
||||
// Build Classifier with selected hyperparameters
|
||||
auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line);
|
||||
auto clf = Models::instance()->create(config.model);
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, fileName);
|
||||
clf->setHyperparameters(hyperparameters.get(fileName));
|
||||
// Train model
|
||||
if (!config.quiet)
|
||||
showProgressFold(n_nested_fold + 1, getColor(clf->getStatus()), "a");
|
||||
clf->fit(X_nexted_train, y_nested_train, features, className, states);
|
||||
// Test model
|
||||
if (!config.quiet)
|
||||
showProgressFold(n_nested_fold + 1, getColor(clf->getStatus()), "b");
|
||||
hypScore += clf->score(X_nested_test, y_nested_test);
|
||||
if (!config.quiet)
|
||||
std::cout << "\b\b\b, \b" << flush;
|
||||
}
|
||||
int magic = 3 * config.nested + 2 * spcs_combinations + 4;
|
||||
std::cout << string(magic, '\b') << string(magic, ' ') << string(magic, '\b') << flush;
|
||||
delete nested_fold;
|
||||
hypScore /= config.nested;
|
||||
if (hypScore > bestHypScore) {
|
||||
bestHypScore = hypScore;
|
||||
bestHypHyperparameters = hyperparam_line;
|
||||
}
|
||||
}
|
||||
// Build Classifier with selected hyperparameters
|
||||
auto clf = Models::instance()->create(config.model);
|
||||
clf->setHyperparameters(bestHypHyperparameters);
|
||||
// Train model
|
||||
if (!config.quiet)
|
||||
showProgressFold(nfold + 1, getColor(clf->getStatus()), "a");
|
||||
clf->fit(X_train, y_train, features, className, states);
|
||||
// Test model
|
||||
if (!config.quiet)
|
||||
showProgressFold(nfold + 1, getColor(clf->getStatus()), "b");
|
||||
double score = clf->score(X_test, y_test);
|
||||
if (!config.quiet)
|
||||
std::cout << string(2 * config.nested - 1, '\b') << "," << string(2 * config.nested, ' ') << string(2 * config.nested - 1, '\b') << flush;
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
bestHyperparameters = bestHypHyperparameters;
|
||||
}
|
||||
}
|
||||
if (bestScore > goatScore) {
|
||||
goatScore = bestScore;
|
||||
goatHyperparameters = bestHyperparameters;
|
||||
}
|
||||
delete fold;
|
||||
}
|
||||
return { goatScore, goatHyperparameters };
|
||||
}
|
||||
json GridSearch::initializeResults()
|
||||
{
|
||||
// Load previous results
|
||||
json results;
|
||||
if (config.continue_from != NO_CONTINUE()) {
|
||||
if (!config.quiet)
|
||||
std::cout << "* Loading previous results" << std::endl;
|
||||
try {
|
||||
std::ifstream file(Paths::grid_output(config.model));
|
||||
if (file.is_open()) {
|
||||
results = json::parse(file);
|
||||
results = results["results"];
|
||||
}
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "* There were no previous results" << std::endl;
|
||||
std::cerr << "* Initizalizing new results" << std::endl;
|
||||
results = json();
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
void GridSearch::save(json& results)
|
||||
{
|
||||
std::ofstream file(Paths::grid_output(config.model));
|
||||
json output = {
|
||||
{ "model", config.model },
|
||||
{ "score", config.score },
|
||||
{ "discretize", config.discretize },
|
||||
{ "stratified", config.stratified },
|
||||
{ "n_folds", config.n_folds },
|
||||
{ "seeds", config.seeds },
|
||||
{ "date", get_date() + " " + get_time()},
|
||||
{ "nested", config.nested},
|
||||
{ "platform", config.platform },
|
||||
{ "duration", timer.getDurationString(true)},
|
||||
{ "results", results }
|
||||
|
||||
};
|
||||
file << output.dump(4);
|
||||
}
|
||||
} /* namespace platform */
|
@@ -1,54 +0,0 @@
|
||||
#ifndef GRIDSEARCH_H
|
||||
#define GRIDSEARCH_H
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <mpi.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "Datasets.h"
|
||||
#include "HyperParameters.h"
|
||||
#include "GridData.h"
|
||||
#include "Timer.h"
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
struct ConfigGrid {
|
||||
std::string model;
|
||||
std::string score;
|
||||
std::string continue_from;
|
||||
std::string platform;
|
||||
bool quiet;
|
||||
bool only; // used with continue_from to only compute that dataset
|
||||
bool discretize;
|
||||
bool stratified;
|
||||
int nested;
|
||||
int n_folds;
|
||||
json excluded;
|
||||
std::vector<int> seeds;
|
||||
};
|
||||
struct ConfigMPI {
|
||||
int rank;
|
||||
int n_procs;
|
||||
int manager;
|
||||
};
|
||||
class GridSearch {
|
||||
public:
|
||||
explicit GridSearch(struct ConfigGrid& config);
|
||||
void go();
|
||||
void go_mpi(struct ConfigMPI& config_mpi);
|
||||
~GridSearch() = default;
|
||||
json getResults();
|
||||
static inline std::string NO_CONTINUE() { return "NO_CONTINUE"; }
|
||||
private:
|
||||
void save(json& results);
|
||||
json initializeResults();
|
||||
vector<std::string> processDatasets(Datasets& datasets);
|
||||
pair<double, json> processFileSingle(std::string fileName, Datasets& datasets, std::vector<json>& combinations);
|
||||
pair<double, json> processFileNested(std::string fileName, Datasets& datasets, std::vector<json>& combinations);
|
||||
struct ConfigGrid config;
|
||||
pair<int, int> part_range_mpi(int n_tasks, int nprocs, int rank);
|
||||
json build_tasks_mpi();
|
||||
void process_task_mpi(struct ConfigMPI& config_mpi, json& task, Datasets& datasets, json& results);
|
||||
Timer timer; // used to measure the time of the whole process
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif /* GRIDSEARCH_H */
|
@@ -1,55 +0,0 @@
|
||||
#include "HyperParameters.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
namespace platform {
|
||||
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_)
|
||||
{
|
||||
// Initialize all datasets with the given hyperparameters
|
||||
for (const auto& item : datasets) {
|
||||
hyperparameters[item] = hyperparameters_;
|
||||
}
|
||||
}
|
||||
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/
|
||||
std::string join(std::vector<std::string> const& strings, std::string delim)
|
||||
{
|
||||
std::stringstream ss;
|
||||
std::copy(strings.begin(), strings.end(),
|
||||
std::ostream_iterator<std::string>(ss, delim.c_str()));
|
||||
return ss.str();
|
||||
}
|
||||
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file)
|
||||
{
|
||||
// Check if file exists
|
||||
std::ifstream file(hyperparameters_file);
|
||||
if (!file.is_open()) {
|
||||
throw std::runtime_error("File " + hyperparameters_file + " not found");
|
||||
}
|
||||
// Check if file is a json
|
||||
json input_hyperparameters = json::parse(file);
|
||||
// Check if hyperparameters are valid
|
||||
for (const auto& dataset : datasets) {
|
||||
if (!input_hyperparameters.contains(dataset)) {
|
||||
std::cerr << "*Warning: Dataset " << dataset << " not found in hyperparameters file" << " assuming default hyperparameters" << std::endl;
|
||||
hyperparameters[dataset] = json({});
|
||||
continue;
|
||||
}
|
||||
hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get<json>();
|
||||
}
|
||||
}
|
||||
void HyperParameters::check(const std::vector<std::string>& valid, const std::string& fileName)
|
||||
{
|
||||
json result = hyperparameters.at(fileName);
|
||||
for (const auto& item : result.items()) {
|
||||
if (find(valid.begin(), valid.end(), item.key()) == valid.end()) {
|
||||
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid. Passed Hyperparameters are: "
|
||||
+ result.dump(4) + "\n Valid hyperparameters are: {" + join(valid, ",") + "}");
|
||||
}
|
||||
}
|
||||
}
|
||||
json HyperParameters::get(const std::string& fileName)
|
||||
{
|
||||
return hyperparameters.at(fileName);
|
||||
}
|
||||
} /* namespace platform */
|
@@ -1,23 +0,0 @@
|
||||
#ifndef HYPERPARAMETERS_H
|
||||
#define HYPERPARAMETERS_H
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
class HyperParameters {
|
||||
public:
|
||||
HyperParameters() = default;
|
||||
explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_);
|
||||
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file);
|
||||
~HyperParameters() = default;
|
||||
bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); }
|
||||
void check(const std::vector<std::string>& valid, const std::string& fileName);
|
||||
json get(const std::string& fileName);
|
||||
private:
|
||||
std::map<std::string, json> hyperparameters;
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif /* HYPERPARAMETERS_H */
|
@@ -1,213 +0,0 @@
|
||||
#include "ManageResults.h"
|
||||
#include "CommandParser.h"
|
||||
#include <filesystem>
|
||||
#include <tuple>
|
||||
#include "Colors.h"
|
||||
#include "CLocale.h"
|
||||
#include "Paths.h"
|
||||
#include "ReportConsole.h"
|
||||
#include "ReportExcel.h"
|
||||
|
||||
namespace platform {
|
||||
|
||||
ManageResults::ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare) :
|
||||
numFiles{ numFiles }, complete{ complete }, partial{ partial }, compare{ compare }, results(Results(Paths::results(), model, score, complete, partial))
|
||||
{
|
||||
indexList = true;
|
||||
openExcel = false;
|
||||
workbook = NULL;
|
||||
if (numFiles == 0) {
|
||||
this->numFiles = results.size();
|
||||
}
|
||||
}
|
||||
void ManageResults::doMenu()
|
||||
{
|
||||
if (results.empty()) {
|
||||
std::cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl;
|
||||
return;
|
||||
}
|
||||
results.sortDate();
|
||||
list();
|
||||
menu();
|
||||
if (openExcel) {
|
||||
workbook_close(workbook);
|
||||
}
|
||||
std::cout << Colors::RESET() << "Done!" << std::endl;
|
||||
}
|
||||
void ManageResults::list()
|
||||
{
|
||||
auto temp = ConfigLocale();
|
||||
std::string suffix = numFiles != results.size() ? " of " + std::to_string(results.size()) : "";
|
||||
std::stringstream oss;
|
||||
oss << "Results on screen: " << numFiles << suffix;
|
||||
std::cout << Colors::GREEN() << oss.str() << std::endl;
|
||||
std::cout << std::string(oss.str().size(), '-') << std::endl;
|
||||
if (complete) {
|
||||
std::cout << Colors::MAGENTA() << "Only listing complete results" << std::endl;
|
||||
}
|
||||
if (partial) {
|
||||
std::cout << Colors::MAGENTA() << "Only listing partial results" << std::endl;
|
||||
}
|
||||
auto i = 0;
|
||||
int maxModel = results.maxModelSize();
|
||||
std::cout << Colors::GREEN() << " # Date " << std::setw(maxModel) << std::left << "Model" << " Score Name Score C/P Duration Title" << std::endl;
|
||||
std::cout << "=== ========== " << std::string(maxModel, '=') << " =========== =========== === ========= =============================================================" << std::endl;
|
||||
bool odd = true;
|
||||
for (auto& result : results) {
|
||||
auto color = odd ? Colors::BLUE() : Colors::CYAN();
|
||||
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
|
||||
std::cout << result.to_string(maxModel) << std::endl;
|
||||
if (i == numFiles) {
|
||||
break;
|
||||
}
|
||||
odd = !odd;
|
||||
}
|
||||
}
|
||||
bool ManageResults::confirmAction(const std::string& intent, const std::string& fileName) const
|
||||
{
|
||||
std::string color;
|
||||
if (intent == "delete") {
|
||||
color = Colors::RED();
|
||||
} else {
|
||||
color = Colors::YELLOW();
|
||||
}
|
||||
std::string line;
|
||||
bool finished = false;
|
||||
while (!finished) {
|
||||
std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): ";
|
||||
getline(std::cin, line);
|
||||
finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n'));
|
||||
}
|
||||
if (tolower(line[0]) == 'y') {
|
||||
return true;
|
||||
}
|
||||
std::cout << "Not done!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
void ManageResults::report(const int index, const bool excelReport)
|
||||
{
|
||||
std::cout << Colors::YELLOW() << "Reporting " << results.at(index).getFilename() << std::endl;
|
||||
auto data = results.at(index).load();
|
||||
if (excelReport) {
|
||||
ReportExcel reporter(data, compare, workbook);
|
||||
reporter.show();
|
||||
openExcel = true;
|
||||
workbook = reporter.getWorkbook();
|
||||
std::cout << "Adding sheet to " << Paths::excel() + Paths::excelResults() << std::endl;
|
||||
} else {
|
||||
ReportConsole reporter(data, compare);
|
||||
reporter.show();
|
||||
}
|
||||
}
|
||||
void ManageResults::showIndex(const int index, const int idx)
|
||||
{
|
||||
// Show a dataset result inside a report
|
||||
auto data = results.at(index).load();
|
||||
std::cout << Colors::YELLOW() << "Showing " << results.at(index).getFilename() << std::endl;
|
||||
ReportConsole reporter(data, compare, idx);
|
||||
reporter.show();
|
||||
}
|
||||
void ManageResults::sortList()
|
||||
{
|
||||
std::cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): ";
|
||||
std::string line;
|
||||
char option;
|
||||
getline(std::cin, line);
|
||||
if (line.size() == 0)
|
||||
return;
|
||||
if (line.size() > 1) {
|
||||
std::cout << "Invalid option" << std::endl;
|
||||
return;
|
||||
}
|
||||
option = line[0];
|
||||
switch (option) {
|
||||
case 'd':
|
||||
results.sortDate();
|
||||
break;
|
||||
case 's':
|
||||
results.sortScore();
|
||||
break;
|
||||
case 'u':
|
||||
results.sortDuration();
|
||||
break;
|
||||
case 'm':
|
||||
results.sortModel();
|
||||
break;
|
||||
default:
|
||||
std::cout << "Invalid option" << std::endl;
|
||||
}
|
||||
}
|
||||
void ManageResults::menu()
|
||||
{
|
||||
char option;
|
||||
int index, subIndex;
|
||||
bool finished = false;
|
||||
std::string filename;
|
||||
// tuple<Option, digit, requires value>
|
||||
std::vector<std::tuple<std::string, char, bool>> mainOptions = {
|
||||
{"quit", 'q', false},
|
||||
{"list", 'l', false},
|
||||
{"delete", 'd', true},
|
||||
{"hide", 'h', true},
|
||||
{"sort", 's', false},
|
||||
{"report", 'r', true},
|
||||
{"excel", 'e', true}
|
||||
};
|
||||
std::vector<std::tuple<std::string, char, bool>> listOptions = {
|
||||
{"report", 'r', true},
|
||||
{"list", 'l', false},
|
||||
{"quit", 'q', false}
|
||||
};
|
||||
auto parser = CommandParser();
|
||||
while (!finished) {
|
||||
if (indexList) {
|
||||
std::tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1);
|
||||
} else {
|
||||
std::tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).load()["results"].size() - 1);
|
||||
}
|
||||
switch (option) {
|
||||
case 'q':
|
||||
finished = true;
|
||||
break;
|
||||
case 'l':
|
||||
list();
|
||||
indexList = true;
|
||||
break;
|
||||
case 'd':
|
||||
filename = results.at(index).getFilename();
|
||||
if (!confirmAction("delete", filename))
|
||||
break;
|
||||
std::cout << "Deleting " << filename << std::endl;
|
||||
results.deleteResult(index);
|
||||
std::cout << "File: " + filename + " deleted!" << std::endl;
|
||||
list();
|
||||
break;
|
||||
case 'h':
|
||||
filename = results.at(index).getFilename();
|
||||
if (!confirmAction("hide", filename))
|
||||
break;
|
||||
filename = results.at(index).getFilename();
|
||||
std::cout << "Hiding " << filename << std::endl;
|
||||
results.hideResult(index, Paths::hiddenResults());
|
||||
std::cout << "File: " + filename + " hidden! (moved to " << Paths::hiddenResults() << ")" << std::endl;
|
||||
list();
|
||||
break;
|
||||
case 's':
|
||||
sortList();
|
||||
list();
|
||||
break;
|
||||
case 'r':
|
||||
if (indexList) {
|
||||
report(index, false);
|
||||
indexList = false;
|
||||
} else {
|
||||
showIndex(index, subIndex);
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
report(index, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} /* namespace platform */
|
@@ -1,31 +0,0 @@
|
||||
#ifndef MANAGE_RESULTS_H
|
||||
#define MANAGE_RESULTS_H
|
||||
#include "Results.h"
|
||||
#include "xlsxwriter.h"
|
||||
|
||||
namespace platform {
|
||||
class ManageResults {
|
||||
public:
|
||||
ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare);
|
||||
~ManageResults() = default;
|
||||
void doMenu();
|
||||
private:
|
||||
void list();
|
||||
bool confirmAction(const std::string& intent, const std::string& fileName) const;
|
||||
void report(const int index, const bool excelReport);
|
||||
void showIndex(const int index, const int idx);
|
||||
void sortList();
|
||||
void menu();
|
||||
int numFiles;
|
||||
bool indexList;
|
||||
bool openExcel;
|
||||
bool complete;
|
||||
bool partial;
|
||||
bool compare;
|
||||
Results results;
|
||||
lxw_workbook* workbook;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif /* MANAGE_RESULTS_H */
|
@@ -1,52 +0,0 @@
|
||||
#include "Models.h"
|
||||
namespace platform {
|
||||
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
|
||||
Models* Models::factory = nullptr;;
|
||||
Models* Models::instance()
|
||||
{
|
||||
//manages singleton
|
||||
if (factory == nullptr)
|
||||
factory = new Models();
|
||||
return factory;
|
||||
}
|
||||
void Models::registerFactoryFunction(const std::string& name,
|
||||
function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
|
||||
{
|
||||
// register the class factory function
|
||||
functionRegistry[name] = classFactoryFunction;
|
||||
}
|
||||
shared_ptr<bayesnet::BaseClassifier> Models::create(const std::string& name)
|
||||
{
|
||||
bayesnet::BaseClassifier* instance = nullptr;
|
||||
|
||||
// find name in the registry and call factory method.
|
||||
auto it = functionRegistry.find(name);
|
||||
if (it != functionRegistry.end())
|
||||
instance = it->second();
|
||||
// wrap instance in a shared ptr and return
|
||||
if (instance != nullptr)
|
||||
return unique_ptr<bayesnet::BaseClassifier>(instance);
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<std::string> Models::getNames()
|
||||
{
|
||||
std::vector<std::string> names;
|
||||
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
|
||||
[](const pair<std::string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
|
||||
return names;
|
||||
}
|
||||
std::string Models::tostring()
|
||||
{
|
||||
std::string result = "";
|
||||
for (const auto& pair : functionRegistry) {
|
||||
result += pair.first + ", ";
|
||||
}
|
||||
return "{" + result.substr(0, result.size() - 2) + "}";
|
||||
}
|
||||
Registrar::Registrar(const std::string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
|
||||
{
|
||||
// register the class factory function
|
||||
Models::instance()->registerFactoryFunction(name, classFactoryFunction);
|
||||
}
|
||||
}
|
@@ -1,41 +0,0 @@
|
||||
#ifndef MODELS_H
|
||||
#define MODELS_H
|
||||
#include <map>
|
||||
#include "BaseClassifier.h"
|
||||
#include "AODE.h"
|
||||
#include "TAN.h"
|
||||
#include "KDB.h"
|
||||
#include "SPODE.h"
|
||||
#include "TANLd.h"
|
||||
#include "KDBLd.h"
|
||||
#include "SPODELd.h"
|
||||
#include "AODELd.h"
|
||||
#include "BoostAODE.h"
|
||||
#include "STree.h"
|
||||
#include "ODTE.h"
|
||||
#include "SVC.h"
|
||||
#include "RandomForest.h"
|
||||
namespace platform {
|
||||
class Models {
|
||||
private:
|
||||
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
|
||||
static Models* factory; //singleton
|
||||
Models() {};
|
||||
public:
|
||||
Models(Models&) = delete;
|
||||
void operator=(const Models&) = delete;
|
||||
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
|
||||
static Models* instance();
|
||||
shared_ptr<bayesnet::BaseClassifier> create(const std::string& name);
|
||||
void registerFactoryFunction(const std::string& name,
|
||||
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
|
||||
std::vector<string> getNames();
|
||||
std::string tostring();
|
||||
|
||||
};
|
||||
class Registrar {
|
||||
public:
|
||||
Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,39 +0,0 @@
|
||||
#ifndef PATHS_H
|
||||
#define PATHS_H
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include "DotEnv.h"
|
||||
namespace platform {
|
||||
class Paths {
|
||||
public:
|
||||
static std::string results() { return "results/"; }
|
||||
static std::string hiddenResults() { return "hidden_results/"; }
|
||||
static std::string excel() { return "excel/"; }
|
||||
static std::string grid() { return "grid/"; }
|
||||
static std::string datasets()
|
||||
{
|
||||
auto env = platform::DotEnv();
|
||||
return env.get("source_data");
|
||||
}
|
||||
static void createPath(const std::string& path)
|
||||
{
|
||||
// Create directory if it does not exist
|
||||
try {
|
||||
std::filesystem::create_directory(path);
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
throw std::runtime_error("Could not create directory " + path);
|
||||
}
|
||||
}
|
||||
static std::string excelResults() { return "some_results.xlsx"; }
|
||||
static std::string grid_input(const std::string& model)
|
||||
{
|
||||
return grid() + "grid_" + model + "_input.json";
|
||||
}
|
||||
static std::string grid_output(const std::string& model)
|
||||
{
|
||||
return grid() + "grid_" + model + "_output.json";
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,113 +0,0 @@
|
||||
#include <sstream>
|
||||
#include <locale>
|
||||
#include "Datasets.h"
|
||||
#include "ReportBase.h"
|
||||
#include "DotEnv.h"
|
||||
|
||||
namespace platform {
|
||||
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
|
||||
{
|
||||
std::stringstream oss;
|
||||
oss << "Better than ZeroR + " << std::setprecision(1) << fixed << margin * 100 << "%";
|
||||
meaning = {
|
||||
{Symbols::equal_best, "Equal to best"},
|
||||
{Symbols::better_best, "Better than best"},
|
||||
{Symbols::cross, "Less than or equal to ZeroR"},
|
||||
{Symbols::upward_arrow, oss.str()}
|
||||
};
|
||||
}
|
||||
std::string ReportBase::fromVector(const std::string& key)
|
||||
{
|
||||
std::stringstream oss;
|
||||
std::string sep = "";
|
||||
oss << "[";
|
||||
for (auto& item : data[key]) {
|
||||
oss << sep << item.get<double>();
|
||||
sep = ", ";
|
||||
}
|
||||
oss << "]";
|
||||
return oss.str();
|
||||
}
|
||||
std::string ReportBase::fVector(const std::string& title, const json& data, const int width, const int precision)
|
||||
{
|
||||
std::stringstream oss;
|
||||
std::string sep = "";
|
||||
oss << title << "[";
|
||||
for (const auto& item : data) {
|
||||
oss << sep << fixed << setw(width) << std::setprecision(precision) << item.get<double>();
|
||||
sep = ", ";
|
||||
}
|
||||
oss << "]";
|
||||
return oss.str();
|
||||
}
|
||||
void ReportBase::show()
|
||||
{
|
||||
header();
|
||||
body();
|
||||
}
|
||||
std::string ReportBase::compareResult(const std::string& dataset, double result)
|
||||
{
|
||||
std::string status = " ";
|
||||
if (compare) {
|
||||
double best = bestResult(dataset, data["model"].get<std::string>());
|
||||
if (result == best) {
|
||||
status = Symbols::equal_best;
|
||||
} else if (result > best) {
|
||||
status = Symbols::better_best;
|
||||
}
|
||||
} else {
|
||||
if (data["score_name"].get<std::string>() == "accuracy") {
|
||||
auto dt = Datasets(false, Paths::datasets());
|
||||
dt.loadDataset(dataset);
|
||||
auto numClasses = dt.getNClasses(dataset);
|
||||
if (numClasses == 2) {
|
||||
std::vector<int> distribution = dt.getClassesCounts(dataset);
|
||||
double nSamples = dt.getNSamples(dataset);
|
||||
std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
|
||||
double mark = *maxValue / nSamples * (1 + margin);
|
||||
if (mark > 1) {
|
||||
mark = 0.9995;
|
||||
}
|
||||
status = result < mark ? Symbols::cross : result > mark ? Symbols::upward_arrow : "=";
|
||||
}
|
||||
}
|
||||
}
|
||||
if (status != " ") {
|
||||
auto item = summary.find(status);
|
||||
if (item != summary.end()) {
|
||||
summary[status]++;
|
||||
} else {
|
||||
summary[status] = 1;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
double ReportBase::bestResult(const std::string& dataset, const std::string& model)
|
||||
{
|
||||
double value = 0.0;
|
||||
if (bestResults.size() == 0) {
|
||||
// try to load the best results
|
||||
std::string score = data["score_name"];
|
||||
replace(score.begin(), score.end(), '_', '-');
|
||||
std::string fileName = "best_results_" + score + "_" + model + ".json";
|
||||
ifstream resultData(Paths::results() + "/" + fileName);
|
||||
if (resultData.is_open()) {
|
||||
bestResults = json::parse(resultData);
|
||||
} else {
|
||||
existBestFile = false;
|
||||
}
|
||||
}
|
||||
try {
|
||||
value = bestResults.at(dataset).at(0);
|
||||
}
|
||||
catch (exception) {
|
||||
value = 1.0;
|
||||
|
||||
}
|
||||
return value;
|
||||
}
|
||||
bool ReportBase::getExistBestFile()
|
||||
{
|
||||
return existBestFile;
|
||||
}
|
||||
}
|
@@ -1,36 +0,0 @@
|
||||
#ifndef REPORTBASE_H
|
||||
#define REPORTBASE_H
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "Paths.h"
|
||||
#include "Symbols.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
using json = nlohmann::json;
|
||||
namespace platform {
|
||||
|
||||
class ReportBase {
|
||||
public:
|
||||
explicit ReportBase(json data_, bool compare);
|
||||
virtual ~ReportBase() = default;
|
||||
void show();
|
||||
protected:
|
||||
json data;
|
||||
std::string fromVector(const std::string& key);
|
||||
std::string fVector(const std::string& title, const json& data, const int width, const int precision);
|
||||
bool getExistBestFile();
|
||||
virtual void header() = 0;
|
||||
virtual void body() = 0;
|
||||
virtual void showSummary() = 0;
|
||||
std::string compareResult(const std::string& dataset, double result);
|
||||
std::map<std::string, int> summary;
|
||||
double margin;
|
||||
std::map<std::string, std::string> meaning;
|
||||
bool compare;
|
||||
private:
|
||||
double bestResult(const std::string& dataset, const std::string& model);
|
||||
json bestResults;
|
||||
bool existBestFile = true;
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -1,114 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <locale>
|
||||
#include "ReportConsole.h"
|
||||
#include "BestScore.h"
|
||||
#include "CLocale.h"
|
||||
|
||||
namespace platform {
|
||||
std::string ReportConsole::headerLine(const std::string& text, int utf = 0)
|
||||
{
|
||||
int n = MAXL - text.length() - 3;
|
||||
n = n < 0 ? 0 : n;
|
||||
return "* " + text + std::string(n + utf, ' ') + "*\n";
|
||||
}
|
||||
|
||||
void ReportConsole::header()
|
||||
{
|
||||
std::stringstream oss;
|
||||
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
|
||||
std::cout << headerLine(
|
||||
"Report " + data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>()
|
||||
+ " with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size())
|
||||
+ " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
|
||||
);
|
||||
std::cout << headerLine(data["title"].get<std::string>());
|
||||
std::cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
|
||||
oss << "Execution took " << std::setprecision(2) << std::fixed << data["duration"].get<float>()
|
||||
<< " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<std::string>();
|
||||
std::cout << headerLine(oss.str());
|
||||
std::cout << headerLine("Score is " + data["score_name"].get<std::string>());
|
||||
std::cout << std::string(MAXL, '*') << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
void ReportConsole::body()
|
||||
{
|
||||
auto tmp = ConfigLocale();
|
||||
int maxHyper = 15;
|
||||
int maxDataset = 7;
|
||||
for (const auto& r : data["results"]) {
|
||||
maxHyper = std::max(maxHyper, (int)r["hyperparameters"].dump().size());
|
||||
maxDataset = std::max(maxDataset, (int)r["dataset"].get<std::string>().size());
|
||||
|
||||
}
|
||||
std::cout << Colors::GREEN() << " # " << std::setw(maxDataset) << std::left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << std::endl;
|
||||
std::cout << "=== " << std::string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << std::string(maxHyper, '=') << std::endl;
|
||||
json lastResult;
|
||||
double totalScore = 0.0;
|
||||
bool odd = true;
|
||||
int index = 0;
|
||||
for (const auto& r : data["results"]) {
|
||||
if (selectedIndex != -1 && index != selectedIndex) {
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
auto color = odd ? Colors::CYAN() : Colors::BLUE();
|
||||
std::cout << color;
|
||||
std::cout << std::setw(3) << std::right << index++ << " ";
|
||||
std::cout << std::setw(maxDataset) << std::left << r["dataset"].get<std::string>() << " ";
|
||||
std::cout << std::setw(6) << std::right << r["samples"].get<int>() << " ";
|
||||
std::cout << std::setw(5) << std::right << r["features"].get<int>() << " ";
|
||||
std::cout << std::setw(3) << std::right << r["classes"].get<int>() << " ";
|
||||
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["nodes"].get<float>() << " ";
|
||||
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["leaves"].get<float>() << " ";
|
||||
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["depth"].get<float>() << " ";
|
||||
std::cout << std::setw(8) << std::right << std::setprecision(6) << std::fixed << r["score"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["score_std"].get<double>();
|
||||
const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
|
||||
std::cout << status;
|
||||
std::cout << std::setw(12) << std::right << std::setprecision(6) << std::fixed << r["time"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["time_std"].get<double>() << " ";
|
||||
std::cout << r["hyperparameters"].dump();
|
||||
std::cout << std::endl;
|
||||
std::cout << std::flush;
|
||||
lastResult = r;
|
||||
totalScore += r["score"].get<double>();
|
||||
odd = !odd;
|
||||
}
|
||||
if (data["results"].size() == 1 || selectedIndex != -1) {
|
||||
std::cout << std::string(MAXL, '*') << std::endl;
|
||||
std::cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
|
||||
std::cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
|
||||
std::cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
|
||||
std::cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
|
||||
std::cout << std::string(MAXL, '*') << std::endl;
|
||||
} else {
|
||||
footer(totalScore);
|
||||
}
|
||||
}
|
||||
void ReportConsole::showSummary()
|
||||
{
|
||||
for (const auto& item : summary) {
|
||||
std::stringstream oss;
|
||||
oss << std::setw(3) << std::left << item.first;
|
||||
oss << std::setw(3) << std::right << item.second << " ";
|
||||
oss << std::left << meaning.at(item.first);
|
||||
std::cout << headerLine(oss.str(), 2);
|
||||
}
|
||||
}
|
||||
|
||||
void ReportConsole::footer(double totalScore)
|
||||
{
|
||||
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
|
||||
showSummary();
|
||||
auto score = data["score_name"].get<std::string>();
|
||||
auto best = BestScore::getScore(score);
|
||||
if (best.first != "") {
|
||||
std::stringstream oss;
|
||||
oss << score << " compared to " << best.first << " .: " << totalScore / best.second;
|
||||
std::cout << headerLine(oss.str());
|
||||
}
|
||||
if (!getExistBestFile() && compare) {
|
||||
std::cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
|
||||
}
|
||||
std::cout << std::string(MAXL, '*') << std::endl << Colors::RESET();
|
||||
}
|
||||
}
|
@@ -1,22 +0,0 @@
|
||||
#ifndef REPORTCONSOLE_H
|
||||
#define REPORTCONSOLE_H
|
||||
#include <string>
|
||||
#include "ReportBase.h"
|
||||
#include "Colors.h"
|
||||
|
||||
namespace platform {
|
||||
const int MAXL = 133;
|
||||
class ReportConsole : public ReportBase {
|
||||
public:
|
||||
explicit ReportConsole(json data_, bool compare = false, int index = -1) : ReportBase(data_, compare), selectedIndex(index) {};
|
||||
virtual ~ReportConsole() = default;
|
||||
private:
|
||||
int selectedIndex;
|
||||
std::string headerLine(const std::string& text, int utf);
|
||||
void header() override;
|
||||
void body() override;
|
||||
void footer(double totalScore);
|
||||
void showSummary() override;
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -1,180 +0,0 @@
|
||||
#include <sstream>
|
||||
#include <locale>
|
||||
#include "ReportExcel.h"
|
||||
#include "BestScore.h"
|
||||
|
||||
|
||||
namespace platform {
|
||||
|
||||
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet) : ReportBase(data_, compare), ExcelFile(workbook, worksheet)
|
||||
{
|
||||
createFile();
|
||||
}
|
||||
|
||||
void ReportExcel::formatColumns()
|
||||
{
|
||||
worksheet_freeze_panes(worksheet, 6, 1);
|
||||
std::vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
|
||||
for (int i = 0; i < columns_sizes.size(); ++i) {
|
||||
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
|
||||
}
|
||||
}
|
||||
void ReportExcel::createWorksheet()
|
||||
{
|
||||
const std::string name = data["model"].get<std::string>();
|
||||
std::string suffix = "";
|
||||
std::string efectiveName;
|
||||
int num = 1;
|
||||
// Create a sheet with the name of the model
|
||||
while (true) {
|
||||
efectiveName = name + suffix;
|
||||
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
|
||||
suffix = std::to_string(++num);
|
||||
} else {
|
||||
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
|
||||
break;
|
||||
}
|
||||
if (num > 100) {
|
||||
throw std::invalid_argument("Couldn't create sheet " + efectiveName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ReportExcel::createFile()
|
||||
{
|
||||
if (workbook == NULL) {
|
||||
workbook = workbook_new((Paths::excel() + Paths::excelResults()).c_str());
|
||||
}
|
||||
if (worksheet == NULL) {
|
||||
createWorksheet();
|
||||
}
|
||||
setProperties(data["title"].get<std::string>());
|
||||
createFormats();
|
||||
formatColumns();
|
||||
}
|
||||
|
||||
void ReportExcel::closeFile()
|
||||
{
|
||||
workbook_close(workbook);
|
||||
}
|
||||
|
||||
void ReportExcel::header()
|
||||
{
|
||||
std::locale mylocale(std::cout.getloc(), new separated);
|
||||
std::locale::global(mylocale);
|
||||
std::cout.imbue(mylocale);
|
||||
std::stringstream oss;
|
||||
std::string message = data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>() + " " +
|
||||
data["language"].get<std::string>() + " ver. " + data["language_version"].get<std::string>() +
|
||||
" with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size()) +
|
||||
" random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>();
|
||||
worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
|
||||
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<std::string>().c_str(), styles["headerRest"]);
|
||||
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<std::string>()).c_str(), styles["headerRest"]);
|
||||
worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
|
||||
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() << " s";
|
||||
worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
|
||||
oss.str("");
|
||||
oss.clear();
|
||||
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() / 3600 << " h";
|
||||
worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]);
|
||||
worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]);
|
||||
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<std::string>().c_str(), styles["headerRest"]);
|
||||
worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]);
|
||||
oss.str("");
|
||||
oss.clear();
|
||||
oss << "Stratified: " << (data["stratified"].get<bool>() ? "True" : "False");
|
||||
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
|
||||
oss.str("");
|
||||
oss.clear();
|
||||
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False");
|
||||
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
|
||||
}
|
||||
|
||||
void ReportExcel::body()
|
||||
{
|
||||
auto head = std::vector<std::string>(
|
||||
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time",
|
||||
"Time Std.", "Hyperparameters" });
|
||||
int col = 0;
|
||||
for (const auto& item : head) {
|
||||
writeString(5, col++, item, "bodyHeader");
|
||||
}
|
||||
row = 6;
|
||||
col = 0;
|
||||
int hypSize = 22;
|
||||
json lastResult;
|
||||
double totalScore = 0.0;
|
||||
std::string hyperparameters;
|
||||
for (const auto& r : data["results"]) {
|
||||
writeString(row, col, r["dataset"].get<std::string>(), "text");
|
||||
writeInt(row, col + 1, r["samples"].get<int>(), "ints");
|
||||
writeInt(row, col + 2, r["features"].get<int>(), "ints");
|
||||
writeInt(row, col + 3, r["classes"].get<int>(), "ints");
|
||||
writeDouble(row, col + 4, r["nodes"].get<float>(), "floats");
|
||||
writeDouble(row, col + 5, r["leaves"].get<float>(), "floats");
|
||||
writeDouble(row, col + 6, r["depth"].get<double>(), "floats");
|
||||
writeDouble(row, col + 7, r["score"].get<double>(), "result");
|
||||
writeDouble(row, col + 8, r["score_std"].get<double>(), "result");
|
||||
const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
|
||||
writeString(row, col + 9, status, "textCentered");
|
||||
writeDouble(row, col + 10, r["time"].get<double>(), "time");
|
||||
writeDouble(row, col + 11, r["time_std"].get<double>(), "time");
|
||||
hyperparameters = r["hyperparameters"].dump();
|
||||
if (hyperparameters.size() > hypSize) {
|
||||
hypSize = hyperparameters.size();
|
||||
}
|
||||
writeString(row, col + 12, hyperparameters, "text");
|
||||
lastResult = r;
|
||||
totalScore += r["score"].get<double>();
|
||||
row++;
|
||||
}
|
||||
// Set the right column width of hyperparameters with the maximum length
|
||||
worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL);
|
||||
// Show totals if only one dataset is present in the result
|
||||
if (data["results"].size() == 1) {
|
||||
for (const std::string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
|
||||
row++;
|
||||
col = 1;
|
||||
writeString(row, col, group, "text");
|
||||
for (double item : lastResult[group]) {
|
||||
std::string style = group.find("scores") != std::string::npos ? "result" : "time";
|
||||
writeDouble(row, ++col, item, style);
|
||||
}
|
||||
}
|
||||
// Set with of columns to show those totals completely
|
||||
worksheet_set_column(worksheet, 1, 1, 12, NULL);
|
||||
for (int i = 2; i < 7; ++i) {
|
||||
// doesn't work with from col to col, so...
|
||||
worksheet_set_column(worksheet, i, i, 15, NULL);
|
||||
}
|
||||
} else {
|
||||
footer(totalScore, row);
|
||||
}
|
||||
}
|
||||
|
||||
void ReportExcel::showSummary()
|
||||
{
|
||||
for (const auto& item : summary) {
|
||||
worksheet_write_string(worksheet, row + 2, 1, item.first.c_str(), styles["summaryStyle"]);
|
||||
worksheet_write_number(worksheet, row + 2, 2, item.second, styles["summaryStyle"]);
|
||||
worksheet_merge_range(worksheet, row + 2, 3, row + 2, 5, meaning.at(item.first).c_str(), styles["summaryStyle"]);
|
||||
row += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void ReportExcel::footer(double totalScore, int row)
|
||||
{
|
||||
showSummary();
|
||||
row += 4 + summary.size();
|
||||
auto score = data["score_name"].get<std::string>();
|
||||
auto best = BestScore::getScore(score);
|
||||
if (best.first != "") {
|
||||
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + best.first + " .:").c_str(), efectiveStyle("text"));
|
||||
writeDouble(row, 6, totalScore / best.second, "result");
|
||||
}
|
||||
if (!getExistBestFile() && compare) {
|
||||
worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]);
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,24 +0,0 @@
|
||||
#ifndef REPORTEXCEL_H
|
||||
#define REPORTEXCEL_H
|
||||
#include<map>
|
||||
#include "xlsxwriter.h"
|
||||
#include "ReportBase.h"
|
||||
#include "ExcelFile.h"
|
||||
#include "Colors.h"
|
||||
namespace platform {
|
||||
class ReportExcel : public ReportBase, public ExcelFile {
|
||||
public:
|
||||
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL);
|
||||
private:
|
||||
void formatColumns();
|
||||
void createFile();
|
||||
void createWorksheet();
|
||||
void closeFile();
|
||||
void header() override;
|
||||
void body() override;
|
||||
void showSummary() override;
|
||||
void footer(double totalScore, int row);
|
||||
|
||||
};
|
||||
};
|
||||
#endif // !REPORTEXCEL_H
|
@@ -1,58 +0,0 @@
|
||||
#include "Result.h"
|
||||
#include "BestScore.h"
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include "Colors.h"
|
||||
#include "DotEnv.h"
|
||||
#include "CLocale.h"
|
||||
|
||||
namespace platform {
|
||||
Result::Result(const std::string& path, const std::string& filename)
|
||||
: path(path)
|
||||
, filename(filename)
|
||||
{
|
||||
auto data = load();
|
||||
date = data["date"];
|
||||
score = 0;
|
||||
for (const auto& result : data["results"]) {
|
||||
score += result["score"].get<double>();
|
||||
}
|
||||
scoreName = data["score_name"];
|
||||
auto best = BestScore::getScore(scoreName);
|
||||
if (best.first != "") {
|
||||
score /= best.second;
|
||||
}
|
||||
title = data["title"];
|
||||
duration = data["duration"];
|
||||
model = data["model"];
|
||||
complete = data["results"].size() > 1;
|
||||
}
|
||||
|
||||
json Result::load() const
|
||||
{
|
||||
std::ifstream resultData(path + "/" + filename);
|
||||
if (resultData.is_open()) {
|
||||
json data = json::parse(resultData);
|
||||
return data;
|
||||
}
|
||||
throw std::invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
|
||||
}
|
||||
|
||||
std::string Result::to_string(int maxModel) const
|
||||
{
|
||||
auto tmp = ConfigLocale();
|
||||
std::stringstream oss;
|
||||
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
|
||||
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
|
||||
oss << date << " ";
|
||||
oss << std::setw(maxModel) << std::left << model << " ";
|
||||
oss << std::setw(11) << std::left << scoreName << " ";
|
||||
oss << std::right << std::setw(11) << std::setprecision(7) << std::fixed << score << " ";
|
||||
auto completeString = isComplete() ? "C" : "P";
|
||||
oss << std::setw(1) << " " << completeString << " ";
|
||||
oss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
|
||||
oss << std::setw(50) << std::left << title << " ";
|
||||
return oss.str();
|
||||
}
|
||||
}
|
@@ -1,35 +0,0 @@
|
||||
#ifndef RESULT_H
|
||||
#define RESULT_H
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
|
||||
class Result {
|
||||
public:
|
||||
Result(const std::string& path, const std::string& filename);
|
||||
json load() const;
|
||||
std::string to_string(int maxModel) const;
|
||||
std::string getFilename() const { return filename; };
|
||||
std::string getDate() const { return date; };
|
||||
double getScore() const { return score; };
|
||||
std::string getTitle() const { return title; };
|
||||
double getDuration() const { return duration; };
|
||||
std::string getModel() const { return model; };
|
||||
std::string getScoreName() const { return scoreName; };
|
||||
bool isComplete() const { return complete; };
|
||||
private:
|
||||
std::string path;
|
||||
std::string filename;
|
||||
std::string date;
|
||||
double score;
|
||||
std::string title;
|
||||
double duration;
|
||||
std::string model;
|
||||
std::string scoreName;
|
||||
bool complete;
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -1,74 +0,0 @@
|
||||
#include "Results.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace platform {
|
||||
Results::Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial) :
|
||||
path(path), model(model), scoreName(score), complete(complete), partial(partial)
|
||||
{
|
||||
load();
|
||||
if (!files.empty()) {
|
||||
maxModel = (*max_element(files.begin(), files.end(), [](const Result& a, const Result& b) { return a.getModel().size() < b.getModel().size(); })).getModel().size();
|
||||
} else {
|
||||
maxModel = 0;
|
||||
}
|
||||
};
|
||||
void Results::load()
|
||||
{
|
||||
using std::filesystem::directory_iterator;
|
||||
for (const auto& file : directory_iterator(path)) {
|
||||
auto filename = file.path().filename().string();
|
||||
if (filename.find(".json") != std::string::npos && filename.find("results_") == 0) {
|
||||
auto result = Result(path, filename);
|
||||
bool addResult = true;
|
||||
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete())
|
||||
addResult = false;
|
||||
if (addResult)
|
||||
files.push_back(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
void Results::hideResult(int index, const std::string& pathHidden)
|
||||
{
|
||||
auto filename = files.at(index).getFilename();
|
||||
rename((path + "/" + filename).c_str(), (pathHidden + "/" + filename).c_str());
|
||||
files.erase(files.begin() + index);
|
||||
}
|
||||
void Results::deleteResult(int index)
|
||||
{
|
||||
auto filename = files.at(index).getFilename();
|
||||
remove((path + "/" + filename).c_str());
|
||||
files.erase(files.begin() + index);
|
||||
}
|
||||
int Results::size() const
|
||||
{
|
||||
return files.size();
|
||||
}
|
||||
void Results::sortDate()
|
||||
{
|
||||
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
|
||||
return a.getDate() > b.getDate();
|
||||
});
|
||||
}
|
||||
void Results::sortModel()
|
||||
{
|
||||
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
|
||||
return a.getModel() > b.getModel();
|
||||
});
|
||||
}
|
||||
void Results::sortDuration()
|
||||
{
|
||||
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
|
||||
return a.getDuration() > b.getDuration();
|
||||
});
|
||||
}
|
||||
void Results::sortScore()
|
||||
{
|
||||
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
|
||||
return a.getScore() > b.getScore();
|
||||
});
|
||||
}
|
||||
bool Results::empty() const
|
||||
{
|
||||
return files.empty();
|
||||
}
|
||||
}
|
@@ -1,36 +0,0 @@
|
||||
#ifndef RESULTS_H
|
||||
#define RESULTS_H
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "Result.h"
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
class Results {
|
||||
public:
|
||||
Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial);
|
||||
void sortDate();
|
||||
void sortScore();
|
||||
void sortModel();
|
||||
void sortDuration();
|
||||
int maxModelSize() const { return maxModel; };
|
||||
void hideResult(int index, const std::string& pathHidden);
|
||||
void deleteResult(int index);
|
||||
int size() const;
|
||||
bool empty() const;
|
||||
std::vector<Result>::iterator begin() { return files.begin(); };
|
||||
std::vector<Result>::iterator end() { return files.end(); };
|
||||
Result& at(int index) { return files.at(index); };
|
||||
private:
|
||||
std::string path;
|
||||
std::string model;
|
||||
std::string scoreName;
|
||||
bool complete;
|
||||
bool partial;
|
||||
int maxModel;
|
||||
std::vector<Result> files;
|
||||
void load(); // Loads the list of results
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -1,252 +0,0 @@
|
||||
#include <sstream>
|
||||
#include "Statistics.h"
|
||||
#include "Colors.h"
|
||||
#include "Symbols.h"
|
||||
#include <boost/math/distributions/chi_squared.hpp>
|
||||
#include <boost/math/distributions/normal.hpp>
|
||||
#include "CLocale.h"
|
||||
|
||||
|
||||
namespace platform {
|
||||
|
||||
Statistics::Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance, bool output) :
|
||||
models(models), datasets(datasets), data(data), significance(significance), output(output)
|
||||
{
|
||||
nModels = models.size();
|
||||
nDatasets = datasets.size();
|
||||
auto temp = ConfigLocale();
|
||||
};
|
||||
|
||||
void Statistics::fit()
|
||||
{
|
||||
if (nModels < 3 || nDatasets < 3) {
|
||||
std::cerr << "nModels: " << nModels << std::endl;
|
||||
std::cerr << "nDatasets: " << nDatasets << std::endl;
|
||||
throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
|
||||
}
|
||||
ranksModels.clear();
|
||||
computeRanks();
|
||||
// Set the control model as the one with the lowest average rank
|
||||
controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
|
||||
computeWTL();
|
||||
maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
|
||||
maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
|
||||
fitted = true;
|
||||
}
|
||||
std::map<std::string, float> assignRanks(std::vector<std::pair<std::string, double>>& ranksOrder)
|
||||
{
|
||||
// sort the ranksOrder std::vector by value
|
||||
std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair<std::string, double>& a, const std::pair<std::string, double>& b) {
|
||||
return a.second > b.second;
|
||||
});
|
||||
//Assign ranks to values and if they are the same they share the same averaged rank
|
||||
std::map<std::string, float> ranks;
|
||||
for (int i = 0; i < ranksOrder.size(); i++) {
|
||||
ranks[ranksOrder[i].first] = i + 1.0;
|
||||
}
|
||||
int i = 0;
|
||||
while (i < static_cast<int>(ranksOrder.size())) {
|
||||
int j = i + 1;
|
||||
int sumRanks = ranks[ranksOrder[i].first];
|
||||
while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
|
||||
sumRanks += ranks[ranksOrder[j++].first];
|
||||
}
|
||||
if (j > i + 1) {
|
||||
float averageRank = (float)sumRanks / (j - i);
|
||||
for (int k = i; k < j; k++) {
|
||||
ranks[ranksOrder[k].first] = averageRank;
|
||||
}
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
return ranks;
|
||||
}
|
||||
void Statistics::computeRanks()
|
||||
{
|
||||
std::map<std::string, float> ranksLine;
|
||||
for (const auto& dataset : datasets) {
|
||||
std::vector<std::pair<std::string, double>> ranksOrder;
|
||||
for (const auto& model : models) {
|
||||
double value = data[model].at(dataset).at(0).get<double>();
|
||||
ranksOrder.push_back({ model, value });
|
||||
}
|
||||
// Assign the ranks
|
||||
ranksLine = assignRanks(ranksOrder);
|
||||
// Store the ranks of the dataset
|
||||
ranksModels[dataset] = ranksLine;
|
||||
if (ranks.size() == 0) {
|
||||
ranks = ranksLine;
|
||||
} else {
|
||||
for (const auto& rank : ranksLine) {
|
||||
ranks[rank.first] += rank.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Average the ranks
|
||||
for (const auto& rank : ranks) {
|
||||
ranks[rank.first] /= nDatasets;
|
||||
}
|
||||
}
|
||||
void Statistics::computeWTL()
|
||||
{
|
||||
// Compute the WTL matrix
|
||||
for (int i = 0; i < nModels; ++i) {
|
||||
wtl[i] = { 0, 0, 0 };
|
||||
}
|
||||
json origin = data.begin().value();
|
||||
for (auto const& item : origin.items()) {
|
||||
auto controlModel = models.at(controlIdx);
|
||||
double controlValue = data[controlModel].at(item.key()).at(0).get<double>();
|
||||
for (int i = 0; i < nModels; ++i) {
|
||||
if (i == controlIdx) {
|
||||
continue;
|
||||
}
|
||||
double value = data[models[i]].at(item.key()).at(0).get<double>();
|
||||
if (value < controlValue) {
|
||||
wtl[i].win++;
|
||||
} else if (value == controlValue) {
|
||||
wtl[i].tie++;
|
||||
} else {
|
||||
wtl[i].loss++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Statistics::postHocHolmTest(bool friedmanResult)
|
||||
{
|
||||
if (!fitted) {
|
||||
fit();
|
||||
}
|
||||
std::stringstream oss;
|
||||
// Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
|
||||
// Post-hoc Holm test
|
||||
// Calculate the p-value for the models paired with the control model
|
||||
std::map<int, double> stats; // p-value of each model paired with the control model
|
||||
boost::math::normal dist(0.0, 1.0);
|
||||
double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
|
||||
for (int i = 0; i < nModels; i++) {
|
||||
if (i == controlIdx) {
|
||||
stats[i] = 0.0;
|
||||
continue;
|
||||
}
|
||||
double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
|
||||
double p_value = (long double)2 * (1 - cdf(dist, z));
|
||||
stats[i] = p_value;
|
||||
}
|
||||
// Sort the models by p-value
|
||||
std::vector<std::pair<int, double>> statsOrder;
|
||||
for (const auto& stat : stats) {
|
||||
statsOrder.push_back({ stat.first, stat.second });
|
||||
}
|
||||
std::sort(statsOrder.begin(), statsOrder.end(), [](const std::pair<int, double>& a, const std::pair<int, double>& b) {
|
||||
return a.second < b.second;
|
||||
});
|
||||
|
||||
// Holm adjustment
|
||||
for (int i = 0; i < statsOrder.size(); ++i) {
|
||||
auto item = statsOrder.at(i);
|
||||
double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
|
||||
double p_value = std::min((double)1.0, item.second * (nModels - i));
|
||||
p_value = std::max(before, p_value);
|
||||
statsOrder[i] = { item.first, p_value };
|
||||
}
|
||||
holmResult.model = models.at(controlIdx);
|
||||
auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
|
||||
oss << color;
|
||||
oss << " *************************************************************************************************************" << std::endl;
|
||||
oss << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl;
|
||||
oss << " Control model: " << models.at(controlIdx) << std::endl;
|
||||
oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl;
|
||||
oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl;
|
||||
// sort ranks from lowest to highest
|
||||
std::vector<std::pair<std::string, float>> ranksOrder;
|
||||
for (const auto& rank : ranks) {
|
||||
ranksOrder.push_back({ rank.first, rank.second });
|
||||
}
|
||||
std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair<std::string, float>& a, const std::pair<std::string, float>& b) {
|
||||
return a.second < b.second;
|
||||
});
|
||||
// Show the control model info.
|
||||
oss << " " << Colors::BLUE() << std::left << std::setw(maxModelName) << ranksOrder.at(0).first << " ";
|
||||
oss << std::setw(12) << " " << std::setprecision(7) << std::fixed << " " << ranksOrder.at(0).second << std::endl;
|
||||
for (const auto& item : ranksOrder) {
|
||||
auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
|
||||
double pvalue = 0.0;
|
||||
for (const auto& stat : statsOrder) {
|
||||
if (stat.first == idx) {
|
||||
pvalue = stat.second;
|
||||
}
|
||||
}
|
||||
holmResult.holmLines.push_back({ item.first, pvalue, item.second, wtl.at(idx), pvalue < significance });
|
||||
if (item.first == models.at(controlIdx)) {
|
||||
continue;
|
||||
}
|
||||
auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
|
||||
auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
|
||||
auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
|
||||
oss << " " << colorStatus << std::left << std::setw(maxModelName) << item.first << " ";
|
||||
oss << std::setprecision(6) << std::scientific << pvalue << std::setprecision(7) << std::fixed << " " << item.second;
|
||||
oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss;
|
||||
oss << " " << status << textStatus << std::endl;
|
||||
}
|
||||
oss << color << " *************************************************************************************************************" << std::endl;
|
||||
oss << Colors::RESET();
|
||||
if (output) {
|
||||
std::cout << oss.str();
|
||||
}
|
||||
}
|
||||
bool Statistics::friedmanTest()
|
||||
{
|
||||
if (!fitted) {
|
||||
fit();
|
||||
}
|
||||
std::stringstream oss;
|
||||
// Friedman test
|
||||
// Calculate the Friedman statistic
|
||||
oss << Colors::BLUE() << std::endl;
|
||||
oss << "***************************************************************************************************************" << std::endl;
|
||||
oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl;
|
||||
double degreesOfFreedom = nModels - 1.0;
|
||||
double sumSquared = 0;
|
||||
for (const auto& rank : ranks) {
|
||||
sumSquared += pow(rank.second, 2);
|
||||
}
|
||||
// Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
|
||||
double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
|
||||
// Calculate the critical value
|
||||
boost::math::chi_squared chiSquared(degreesOfFreedom);
|
||||
long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
|
||||
double criticalValue = quantile(chiSquared, 1 - significance);
|
||||
oss << "Friedman statistic: " << friedmanQ << std::endl;
|
||||
oss << "Critical χ2 Value for df=" << std::fixed << (int)degreesOfFreedom
|
||||
<< " and alpha=" << std::setprecision(2) << std::fixed << significance << ": " << std::setprecision(7) << std::scientific << criticalValue << std::endl;
|
||||
oss << "p-value: " << std::scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << std::setprecision(2) << std::fixed << significance << std::endl;
|
||||
bool result;
|
||||
if (p_value < significance) {
|
||||
oss << Colors::GREEN() << "The null hypothesis H0 is rejected." << std::endl;
|
||||
result = true;
|
||||
} else {
|
||||
oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl;
|
||||
result = false;
|
||||
}
|
||||
oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << std::endl;
|
||||
if (output) {
|
||||
std::cout << oss.str();
|
||||
}
|
||||
friedmanResult = { friedmanQ, criticalValue, p_value, result };
|
||||
return result;
|
||||
}
|
||||
FriedmanResult& Statistics::getFriedmanResult()
|
||||
{
|
||||
return friedmanResult;
|
||||
}
|
||||
HolmResult& Statistics::getHolmResult()
|
||||
{
|
||||
return holmResult;
|
||||
}
|
||||
std::map<std::string, std::map<std::string, float>>& Statistics::getRanks()
|
||||
{
|
||||
return ranksModels;
|
||||
}
|
||||
} // namespace platform
|
@@ -1,63 +0,0 @@
|
||||
#ifndef STATISTICS_H
|
||||
#define STATISTICS_H
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace platform {
|
||||
struct WTL {
|
||||
int win;
|
||||
int tie;
|
||||
int loss;
|
||||
};
|
||||
struct FriedmanResult {
|
||||
double statistic;
|
||||
double criticalValue;
|
||||
long double pvalue;
|
||||
bool reject;
|
||||
};
|
||||
struct HolmLine {
|
||||
std::string model;
|
||||
long double pvalue;
|
||||
double rank;
|
||||
WTL wtl;
|
||||
bool reject;
|
||||
};
|
||||
struct HolmResult {
|
||||
std::string model;
|
||||
std::vector<HolmLine> holmLines;
|
||||
};
|
||||
class Statistics {
|
||||
public:
|
||||
Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true);
|
||||
bool friedmanTest();
|
||||
void postHocHolmTest(bool friedmanResult);
|
||||
FriedmanResult& getFriedmanResult();
|
||||
HolmResult& getHolmResult();
|
||||
std::map<std::string, std::map<std::string, float>>& getRanks();
|
||||
private:
|
||||
void fit();
|
||||
void computeRanks();
|
||||
void computeWTL();
|
||||
const std::vector<std::string>& models;
|
||||
const std::vector<std::string>& datasets;
|
||||
const json& data;
|
||||
double significance;
|
||||
bool output;
|
||||
bool fitted = false;
|
||||
int nModels = 0;
|
||||
int nDatasets = 0;
|
||||
int controlIdx = 0;
|
||||
std::map<int, WTL> wtl;
|
||||
std::map<std::string, float> ranks;
|
||||
int maxModelName = 0;
|
||||
int maxDatasetName = 0;
|
||||
FriedmanResult friedmanResult;
|
||||
HolmResult holmResult;
|
||||
std::map<std::string, std::map<std::string, float>> ranksModels;
|
||||
};
|
||||
}
|
||||
#endif // !STATISTICS_H
|
@@ -1,17 +0,0 @@
|
||||
#ifndef SYMBOLS_H
|
||||
#define SYMBOLS_H
|
||||
#include <string>
|
||||
namespace platform {
|
||||
class Symbols {
|
||||
public:
|
||||
inline static const std::string check_mark{ "\u2714" };
|
||||
inline static const std::string exclamation{ "\u2757" };
|
||||
inline static const std::string black_star{ "\u2605" };
|
||||
inline static const std::string cross{ "\u2717" };
|
||||
inline static const std::string upward_arrow{ "\u27B6" };
|
||||
inline static const std::string down_arrow{ "\u27B4" };
|
||||
inline static const std::string equal_best{ check_mark };
|
||||
inline static const std::string better_best{ black_star };
|
||||
};
|
||||
}
|
||||
#endif // !SYMBOLS_H
|
@@ -1,43 +0,0 @@
|
||||
#ifndef TIMER_H
|
||||
#define TIMER_H
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
namespace platform {
|
||||
class Timer {
|
||||
private:
|
||||
std::chrono::high_resolution_clock::time_point begin;
|
||||
std::chrono::high_resolution_clock::time_point end;
|
||||
public:
|
||||
Timer() = default;
|
||||
~Timer() = default;
|
||||
void start() { begin = std::chrono::high_resolution_clock::now(); }
|
||||
void stop() { end = std::chrono::high_resolution_clock::now(); }
|
||||
double getDuration()
|
||||
{
|
||||
stop();
|
||||
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (end - begin);
|
||||
return time_span.count();
|
||||
}
|
||||
double getLapse()
|
||||
{
|
||||
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (std::chrono::high_resolution_clock::now() - begin);
|
||||
return time_span.count();
|
||||
}
|
||||
std::string getDurationString(bool lapse = false)
|
||||
{
|
||||
double duration = lapse ? getLapse() : getDuration();
|
||||
return translate2String(duration);
|
||||
}
|
||||
std::string translate2String(double duration)
|
||||
{
|
||||
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
|
||||
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
|
||||
std::stringstream ss;
|
||||
ss << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit;
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif /* TIMER_H */
|
@@ -1,30 +0,0 @@
|
||||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
namespace platform {
|
||||
//static std::vector<std::string> split(const std::string& text, char delimiter);
|
||||
static std::vector<std::string> split(const std::string& text, char delimiter)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
std::stringstream ss(text);
|
||||
std::string token;
|
||||
while (std::getline(ss, token, delimiter)) {
|
||||
result.push_back(token);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
static std::string trim(const std::string& str)
|
||||
{
|
||||
std::string result = str;
|
||||
result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) {
|
||||
return !std::isspace(ch);
|
||||
}));
|
||||
result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) {
|
||||
return !std::isspace(ch);
|
||||
}).base(), result.end());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
#endif
|
@@ -1,85 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include "Paths.h"
|
||||
#include "BestResults.h"
|
||||
#include "Colors.h"
|
||||
|
||||
|
||||
void manageArguments(argparse::ArgumentParser& program, int argc, char** argv)
|
||||
{
|
||||
program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
|
||||
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
|
||||
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
|
||||
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
|
||||
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
|
||||
program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true);
|
||||
program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const std::string& value) {
|
||||
try {
|
||||
auto k = std::stod(value);
|
||||
if (k < 0.01 || k > 0.15) {
|
||||
throw std::runtime_error("Significance level hast to be a number in [0.01, 0.15]");
|
||||
}
|
||||
return k;
|
||||
}
|
||||
catch (const std::runtime_error& err) {
|
||||
throw std::runtime_error(err.what());
|
||||
}
|
||||
catch (...) {
|
||||
throw std::runtime_error("Number of folds must be an decimal number");
|
||||
}});
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
argparse::ArgumentParser program("b_sbest");
|
||||
manageArguments(program, argc, argv);
|
||||
std::string model, score;
|
||||
bool build, report, friedman, excel;
|
||||
double level;
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
model = program.get<std::string>("model");
|
||||
score = program.get<std::string>("score");
|
||||
build = program.get<bool>("build");
|
||||
report = program.get<bool>("report");
|
||||
friedman = program.get<bool>("friedman");
|
||||
excel = program.get<bool>("excel");
|
||||
level = program.get<double>("level");
|
||||
if (model == "" || score == "") {
|
||||
throw std::runtime_error("Model and score name must be supplied");
|
||||
}
|
||||
if (friedman && model != "any") {
|
||||
std::cerr << "Friedman test can only be used with all models" << std::endl;
|
||||
std::cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
if (!report && !build) {
|
||||
std::cerr << "Either build, report or both, have to be selected to do anything!" << std::endl;
|
||||
std::cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
catch (const std::exception& err) {
|
||||
std::cerr << err.what() << std::endl;
|
||||
std::cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
// Generate report
|
||||
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman, level);
|
||||
if (build) {
|
||||
if (model == "any") {
|
||||
results.buildAll();
|
||||
} else {
|
||||
std::string fileName = results.build();
|
||||
std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl;
|
||||
}
|
||||
}
|
||||
if (report) {
|
||||
if (model == "any") {
|
||||
results.reportAll(excel);
|
||||
} else {
|
||||
results.reportSingle(excel);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@@ -1,227 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include <map>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <mpi.h>
|
||||
#include "DotEnv.h"
|
||||
#include "Models.h"
|
||||
#include "modelRegister.h"
|
||||
#include "GridSearch.h"
|
||||
#include "Paths.h"
|
||||
#include "Timer.h"
|
||||
#include "Colors.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
const int MAXL = 133;
|
||||
|
||||
void manageArguments(argparse::ArgumentParser& program)
|
||||
{
|
||||
auto env = platform::DotEnv();
|
||||
auto& group = program.add_mutually_exclusive_group(true);
|
||||
program.add_argument("-m", "--model")
|
||||
.help("Model to use " + platform::Models::instance()->tostring())
|
||||
.action([](const std::string& value) {
|
||||
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
|
||||
if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
||||
return value;
|
||||
}
|
||||
throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring());
|
||||
}
|
||||
);
|
||||
group.add_argument("--dump").help("Show the grid combinations").default_value(false).implicit_value(true);
|
||||
group.add_argument("--report").help("Report the computed hyperparameters").default_value(false).implicit_value(true);
|
||||
group.add_argument("--compute").help("Perform computation of the grid output hyperparameters").default_value(false).implicit_value(true);
|
||||
program.add_argument("--discretize").help("Discretize input datasets").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
|
||||
program.add_argument("--mpi").help("Use MPI computing grid").default_value(false).implicit_value(true);
|
||||
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
|
||||
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
|
||||
program.add_argument("--continue").help("Continue computing from that dataset").default_value(platform::GridSearch::NO_CONTINUE());
|
||||
program.add_argument("--only").help("Used with continue to compute that dataset only").default_value(false).implicit_value(true);
|
||||
program.add_argument("--exclude").default_value("[]").help("Datasets to exclude in json format, e.g. [\"dataset1\", \"dataset2\"]");
|
||||
program.add_argument("--nested").help("Do a double/nested cross validation with n folds").default_value(0).scan<'i', int>();
|
||||
program.add_argument("--score").help("Score used in gridsearch").default_value("accuracy");
|
||||
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
|
||||
try {
|
||||
auto k = stoi(value);
|
||||
if (k < 2) {
|
||||
throw std::runtime_error("Number of folds must be greater than 1");
|
||||
}
|
||||
return k;
|
||||
}
|
||||
catch (const runtime_error& err) {
|
||||
throw std::runtime_error(err.what());
|
||||
}
|
||||
catch (...) {
|
||||
throw std::runtime_error("Number of folds must be an integer");
|
||||
}});
|
||||
auto seed_values = env.getSeeds();
|
||||
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
|
||||
}
|
||||
|
||||
void list_dump(std::string& model)
|
||||
{
|
||||
auto data = platform::GridData(platform::Paths::grid_input(model));
|
||||
std::cout << Colors::MAGENTA() << "Listing configuration input file (Grid)" << std::endl << std::endl;
|
||||
int index = 0;
|
||||
int max_hyper = 15;
|
||||
int max_dataset = 7;
|
||||
auto combinations = data.getGridFile();
|
||||
for (auto const& item : combinations) {
|
||||
if (item.first.size() > max_dataset) {
|
||||
max_dataset = item.first.size();
|
||||
}
|
||||
if (item.second.dump().size() > max_hyper) {
|
||||
max_hyper = item.second.dump().size();
|
||||
}
|
||||
}
|
||||
std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. "
|
||||
<< setw(max_hyper) << "Hyperparameters" << std::endl;
|
||||
std::cout << "=== " << string(max_dataset, '=') << " ===== " << string(max_hyper, '=') << std::endl;
|
||||
bool odd = true;
|
||||
for (auto const& item : combinations) {
|
||||
auto color = odd ? Colors::CYAN() : Colors::BLUE();
|
||||
std::cout << color;
|
||||
auto num_combinations = data.getNumCombinations(item.first);
|
||||
std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first
|
||||
<< " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << item.second.dump() << std::endl;
|
||||
odd = !odd;
|
||||
}
|
||||
std::cout << Colors::RESET() << std::endl;
|
||||
}
|
||||
std::string headerLine(const std::string& text, int utf = 0)
|
||||
{
|
||||
int n = MAXL - text.length() - 3;
|
||||
n = n < 0 ? 0 : n;
|
||||
return "* " + text + std::string(n + utf, ' ') + "*\n";
|
||||
}
|
||||
void list_results(json& results, std::string& model)
|
||||
{
|
||||
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
|
||||
std::cout << headerLine("Listing computed hyperparameters for model " + model);
|
||||
std::cout << headerLine("Date & time: " + results["date"].get<std::string>() + " Duration: " + results["duration"].get<std::string>());
|
||||
std::cout << headerLine("Score: " + results["score"].get<std::string>());
|
||||
std::cout << headerLine(
|
||||
"Random seeds: " + results["seeds"].dump()
|
||||
+ " Discretized: " + (results["discretize"].get<bool>() ? "True" : "False")
|
||||
+ " Stratified: " + (results["stratified"].get<bool>() ? "True" : "False")
|
||||
+ " #Folds: " + std::to_string(results["n_folds"].get<int>())
|
||||
+ " Nested: " + (results["nested"].get<int>() == 0 ? "False" : to_string(results["nested"].get<int>()))
|
||||
);
|
||||
std::cout << std::string(MAXL, '*') << std::endl;
|
||||
int spaces = 0;
|
||||
int hyperparameters_spaces = 0;
|
||||
for (const auto& item : results["results"].items()) {
|
||||
auto key = item.key();
|
||||
auto value = item.value();
|
||||
if (key.size() > spaces) {
|
||||
spaces = key.size();
|
||||
}
|
||||
if (value["hyperparameters"].dump().size() > hyperparameters_spaces) {
|
||||
hyperparameters_spaces = value["hyperparameters"].dump().size();
|
||||
}
|
||||
}
|
||||
std::cout << Colors::GREEN() << " # " << left << setw(spaces) << "Dataset" << " " << setw(19) << "Date" << " "
|
||||
<< "Duration " << setw(8) << "Score" << " " << "Hyperparameters" << std::endl;
|
||||
std::cout << "=== " << string(spaces, '=') << " " << string(19, '=') << " " << string(8, '=') << " "
|
||||
<< string(8, '=') << " " << string(hyperparameters_spaces, '=') << std::endl;
|
||||
bool odd = true;
|
||||
int index = 0;
|
||||
for (const auto& item : results["results"].items()) {
|
||||
auto color = odd ? Colors::CYAN() : Colors::BLUE();
|
||||
auto key = item.key();
|
||||
auto value = item.value();
|
||||
std::cout << color;
|
||||
std::cout << std::setw(3) << std::right << index++ << " ";
|
||||
std::cout << left << setw(spaces) << key << " " << value["date"].get<string>()
|
||||
<< " " << setw(8) << right << value["duration"].get<string>() << " " << setw(8) << setprecision(6)
|
||||
<< fixed << right << value["score"].get<double>() << " " << value["hyperparameters"].dump() << std::endl;
|
||||
odd = !odd;
|
||||
}
|
||||
std::cout << Colors::RESET() << std::endl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Main
|
||||
*/
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
argparse::ArgumentParser program("b_grid");
|
||||
manageArguments(program);
|
||||
struct platform::ConfigGrid config;
|
||||
bool dump, compute;
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
config.model = program.get<std::string>("model");
|
||||
config.score = program.get<std::string>("score");
|
||||
config.discretize = program.get<bool>("discretize");
|
||||
config.stratified = program.get<bool>("stratified");
|
||||
config.n_folds = program.get<int>("folds");
|
||||
config.quiet = program.get<bool>("quiet");
|
||||
config.only = program.get<bool>("only");
|
||||
config.seeds = program.get<std::vector<int>>("seeds");
|
||||
config.nested = program.get<int>("nested");
|
||||
config.continue_from = program.get<std::string>("continue");
|
||||
if (config.continue_from == platform::GridSearch::NO_CONTINUE() && config.only) {
|
||||
throw std::runtime_error("Cannot use --only without --continue");
|
||||
}
|
||||
dump = program.get<bool>("dump");
|
||||
compute = program.get<bool>("compute");
|
||||
if (dump && (config.continue_from != platform::GridSearch::NO_CONTINUE() || config.only)) {
|
||||
throw std::runtime_error("Cannot use --dump with --continue or --only");
|
||||
}
|
||||
auto excluded = program.get<std::string>("exclude");
|
||||
config.excluded = json::parse(excluded);
|
||||
if (program.get<bool>("mpi")) {
|
||||
if (!compute || config.nested == 0) {
|
||||
throw std::runtime_error("Cannot use --mpi without --compute or without --nested");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (const exception& err) {
|
||||
cerr << err.what() << std::endl;
|
||||
cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
/*
|
||||
* Begin Processing
|
||||
*/
|
||||
auto env = platform::DotEnv();
|
||||
config.platform = env.get("platform");
|
||||
platform::Paths::createPath(platform::Paths::grid());
|
||||
auto grid_search = platform::GridSearch(config);
|
||||
platform::Timer timer;
|
||||
timer.start();
|
||||
if (dump) {
|
||||
list_dump(config.model);
|
||||
} else {
|
||||
if (compute) {
|
||||
if (program.get<bool>("mpi")) {
|
||||
struct platform::ConfigMPI mpi_config;
|
||||
mpi_config.manager = 0; // which process is the manager
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_config.rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &mpi_config.n_procs);
|
||||
grid_search.go_mpi(mpi_config);
|
||||
if (mpi_config.rank == mpi_config.manager) {
|
||||
auto results = grid_search.getResults();
|
||||
list_results(results, config.model);
|
||||
std::cout << "Process took " << timer.getDurationString() << std::endl;
|
||||
}
|
||||
MPI_Finalize();
|
||||
} else {
|
||||
grid_search.go();
|
||||
std::cout << "Process took " << timer.getDurationString() << std::endl;
|
||||
}
|
||||
} else {
|
||||
// List results
|
||||
auto results = grid_search.getResults();
|
||||
if (results.empty()) {
|
||||
std::cout << "** No results found" << std::endl;
|
||||
} else {
|
||||
list_results(results, config.model);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << "Done!" << std::endl;
|
||||
return 0;
|
||||
}
|
@@ -1,56 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <locale>
|
||||
#include "Paths.h"
|
||||
#include "Colors.h"
|
||||
#include "Datasets.h"
|
||||
|
||||
const int BALANCE_LENGTH = 75;
|
||||
|
||||
struct separated : numpunct<char> {
|
||||
char do_decimal_point() const { return ','; }
|
||||
char do_thousands_sep() const { return '.'; }
|
||||
std::string do_grouping() const { return "\03"; }
|
||||
};
|
||||
|
||||
void outputBalance(const std::string& balance)
|
||||
{
|
||||
auto temp = std::string(balance);
|
||||
while (temp.size() > BALANCE_LENGTH - 1) {
|
||||
auto part = temp.substr(0, BALANCE_LENGTH);
|
||||
std::cout << part << std::endl;
|
||||
std::cout << setw(48) << " ";
|
||||
temp = temp.substr(BALANCE_LENGTH);
|
||||
}
|
||||
std::cout << temp << std::endl;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
auto data = platform::Datasets(false, platform::Paths::datasets());
|
||||
locale mylocale(std::cout.getloc(), new separated);
|
||||
locale::global(mylocale);
|
||||
std::cout.imbue(mylocale);
|
||||
std::cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << std::endl;
|
||||
std::string balanceBars = std::string(BALANCE_LENGTH, '=');
|
||||
std::cout << "============================== ====== ===== === " << balanceBars << std::endl;
|
||||
bool odd = true;
|
||||
for (const auto& dataset : data.getNames()) {
|
||||
auto color = odd ? Colors::CYAN() : Colors::BLUE();
|
||||
std::cout << color << setw(30) << left << dataset << " ";
|
||||
data.loadDataset(dataset);
|
||||
auto nSamples = data.getNSamples(dataset);
|
||||
std::cout << setw(6) << right << nSamples << " ";
|
||||
std::cout << setw(5) << right << data.getFeatures(dataset).size() << " ";
|
||||
std::cout << setw(3) << right << data.getNClasses(dataset) << " ";
|
||||
std::stringstream oss;
|
||||
std::string sep = "";
|
||||
for (auto number : data.getClassesCounts(dataset)) {
|
||||
oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
|
||||
sep = " / ";
|
||||
}
|
||||
outputBalance(oss.str());
|
||||
odd = !odd;
|
||||
}
|
||||
std::cout << Colors::RESET() << std::endl;
|
||||
return 0;
|
||||
}
|
@@ -1,134 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "Experiment.h"
|
||||
#include "Datasets.h"
|
||||
#include "DotEnv.h"
|
||||
#include "Models.h"
|
||||
#include "modelRegister.h"
|
||||
#include "Paths.h"
|
||||
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
void manageArguments(argparse::ArgumentParser& program)
|
||||
{
|
||||
auto env = platform::DotEnv();
|
||||
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
|
||||
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
|
||||
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
|
||||
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
|
||||
program.add_argument("-m", "--model")
|
||||
.help("Model to use " + platform::Models::instance()->tostring())
|
||||
.action([](const std::string& value) {
|
||||
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
|
||||
if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
||||
return value;
|
||||
}
|
||||
throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring());
|
||||
}
|
||||
);
|
||||
program.add_argument("--title").default_value("").help("Experiment title");
|
||||
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
|
||||
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
|
||||
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
|
||||
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
|
||||
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
|
||||
try {
|
||||
auto k = stoi(value);
|
||||
if (k < 2) {
|
||||
throw std::runtime_error("Number of folds must be greater than 1");
|
||||
}
|
||||
return k;
|
||||
}
|
||||
catch (const runtime_error& err) {
|
||||
throw std::runtime_error(err.what());
|
||||
}
|
||||
catch (...) {
|
||||
throw std::runtime_error("Number of folds must be an integer");
|
||||
}});
|
||||
auto seed_values = env.getSeeds();
|
||||
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
argparse::ArgumentParser program("b_main");
|
||||
manageArguments(program);
|
||||
std::string file_name, model_name, title, hyperparameters_file;
|
||||
json hyperparameters_json;
|
||||
bool discretize_dataset, stratified, saveResults, quiet;
|
||||
std::vector<int> seeds;
|
||||
std::vector<std::string> filesToTest;
|
||||
int n_folds;
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
file_name = program.get<std::string>("dataset");
|
||||
model_name = program.get<std::string>("model");
|
||||
discretize_dataset = program.get<bool>("discretize");
|
||||
stratified = program.get<bool>("stratified");
|
||||
quiet = program.get<bool>("quiet");
|
||||
n_folds = program.get<int>("folds");
|
||||
seeds = program.get<std::vector<int>>("seeds");
|
||||
auto hyperparameters = program.get<std::string>("hyperparameters");
|
||||
hyperparameters_json = json::parse(hyperparameters);
|
||||
hyperparameters_file = program.get<std::string>("hyper-file");
|
||||
if (hyperparameters_file != "" && hyperparameters != "{}") {
|
||||
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
|
||||
}
|
||||
title = program.get<std::string>("title");
|
||||
if (title == "" && file_name == "") {
|
||||
throw runtime_error("title is mandatory if dataset is not provided");
|
||||
}
|
||||
saveResults = program.get<bool>("save");
|
||||
}
|
||||
catch (const exception& err) {
|
||||
cerr << err.what() << std::endl;
|
||||
cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets());
|
||||
if (file_name != "") {
|
||||
if (!datasets.isDataset(file_name)) {
|
||||
cerr << "Dataset " << file_name << " not found" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
if (title == "") {
|
||||
title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds";
|
||||
}
|
||||
filesToTest.push_back(file_name);
|
||||
} else {
|
||||
filesToTest = datasets.getNames();
|
||||
saveResults = true;
|
||||
}
|
||||
platform::HyperParameters test_hyperparams;
|
||||
if (hyperparameters_file != "") {
|
||||
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file);
|
||||
} else {
|
||||
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
|
||||
}
|
||||
|
||||
/*
|
||||
* Begin Processing
|
||||
*/
|
||||
auto env = platform::DotEnv();
|
||||
auto experiment = platform::Experiment();
|
||||
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
|
||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
|
||||
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
|
||||
experiment.setHyperparameters(test_hyperparams);
|
||||
for (auto seed : seeds) {
|
||||
experiment.addRandomSeed(seed);
|
||||
}
|
||||
platform::Timer timer;
|
||||
timer.start();
|
||||
experiment.go(filesToTest, quiet);
|
||||
experiment.setDuration(timer.getDuration());
|
||||
if (saveResults) {
|
||||
experiment.save(platform::Paths::results());
|
||||
}
|
||||
if (!quiet)
|
||||
experiment.report();
|
||||
std::cout << "Done!" << std::endl;
|
||||
return 0;
|
||||
}
|
@@ -1,48 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include "ManageResults.h"
|
||||
|
||||
|
||||
void manageArguments(argparse::ArgumentParser& program, int argc, char** argv)
|
||||
{
|
||||
program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>();
|
||||
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
|
||||
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
|
||||
program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true);
|
||||
program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true);
|
||||
program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true);
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
auto number = program.get<int>("number");
|
||||
if (number < 0) {
|
||||
throw std::runtime_error("Number of results must be greater than or equal to 0");
|
||||
}
|
||||
auto model = program.get<std::string>("model");
|
||||
auto score = program.get<std::string>("score");
|
||||
auto complete = program.get<bool>("complete");
|
||||
auto partial = program.get<bool>("partial");
|
||||
auto compare = program.get<bool>("compare");
|
||||
}
|
||||
catch (const std::exception& err) {
|
||||
std::cerr << err.what() << std::endl;
|
||||
std::cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
auto program = argparse::ArgumentParser("b_manage");
|
||||
manageArguments(program, argc, argv);
|
||||
int number = program.get<int>("number");
|
||||
std::string model = program.get<std::string>("model");
|
||||
std::string score = program.get<std::string>("score");
|
||||
auto complete = program.get<bool>("complete");
|
||||
auto partial = program.get<bool>("partial");
|
||||
auto compare = program.get<bool>("compare");
|
||||
if (complete)
|
||||
partial = false;
|
||||
auto manager = platform::ManageResults(number, model, score, complete, partial, compare);
|
||||
manager.doMenu();
|
||||
return 0;
|
||||
}
|
@@ -1,29 +0,0 @@
|
||||
#ifndef MODEL_REGISTER_H
|
||||
#define MODEL_REGISTER_H
|
||||
static platform::Registrar registrarT("TAN",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
|
||||
static platform::Registrar registrarTLD("TANLd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
|
||||
static platform::Registrar registrarS("SPODE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
|
||||
static platform::Registrar registrarSLD("SPODELd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
|
||||
static platform::Registrar registrarK("KDB",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);});
|
||||
static platform::Registrar registrarKLD("KDBLd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
|
||||
static platform::Registrar registrarA("AODE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
|
||||
static platform::Registrar registrarALD("AODELd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
|
||||
static platform::Registrar registrarBA("BoostAODE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
|
||||
static platform::Registrar registrarSt("STree",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::STree();});
|
||||
static platform::Registrar registrarOdte("Odte",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::ODTE();});
|
||||
static platform::Registrar registrarSvc("SVC",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::SVC();});
|
||||
static platform::Registrar registrarRaF("RandomForest",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();});
|
||||
#endif
|
@@ -1,9 +0,0 @@
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
|
||||
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
|
||||
include_directories(${Python3_INCLUDE_DIRS})
|
||||
include_directories(${TORCH_INCLUDE_DIRS})
|
||||
|
||||
add_library(PyWrap SHARED PyWrap.cc STree.cc ODTE.cc SVC.cc RandomForest.cc PyClassifier.cc)
|
||||
#target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy xgboost::xgboost ArffFiles)
|
||||
target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles)
|
@@ -1,24 +0,0 @@
|
||||
#include "ODTE.h"
|
||||
|
||||
namespace pywrap {
|
||||
ODTE::ODTE() : PyClassifier("odte", "Odte")
|
||||
{
|
||||
validHyperparameters = { "n_jobs", "n_estimators", "random_state" };
|
||||
}
|
||||
int ODTE::getNumberOfNodes() const
|
||||
{
|
||||
return callMethodInt("get_nodes");
|
||||
}
|
||||
int ODTE::getNumberOfEdges() const
|
||||
{
|
||||
return callMethodInt("get_leaves");
|
||||
}
|
||||
int ODTE::getNumberOfStates() const
|
||||
{
|
||||
return callMethodInt("get_depth");
|
||||
}
|
||||
std::string ODTE::graph()
|
||||
{
|
||||
return callMethodString("graph");
|
||||
}
|
||||
} /* namespace pywrap */
|
@@ -1,17 +0,0 @@
|
||||
#ifndef ODTE_H
|
||||
#define ODTE_H
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "PyClassifier.h"
|
||||
|
||||
namespace pywrap {
|
||||
class ODTE : public PyClassifier {
|
||||
public:
|
||||
ODTE();
|
||||
~ODTE() = default;
|
||||
int getNumberOfNodes() const override;
|
||||
int getNumberOfEdges() const override;
|
||||
int getNumberOfStates() const override;
|
||||
std::string graph();
|
||||
};
|
||||
} /* namespace pywrap */
|
||||
#endif /* ODTE_H */
|
@@ -1,96 +0,0 @@
|
||||
#include "PyClassifier.h"
|
||||
namespace pywrap {
|
||||
namespace bp = boost::python;
|
||||
namespace np = boost::python::numpy;
|
||||
PyClassifier::PyClassifier(const std::string& module, const std::string& className, bool sklearn) : module(module), className(className), sklearn(sklearn), fitted(false)
|
||||
{
|
||||
// This id allows to have more than one instance of the same module/class
|
||||
id = reinterpret_cast<clfId_t>(this);
|
||||
pyWrap = PyWrap::GetInstance();
|
||||
pyWrap->importClass(id, module, className);
|
||||
}
|
||||
PyClassifier::~PyClassifier()
|
||||
{
|
||||
pyWrap->clean(id);
|
||||
}
|
||||
np::ndarray tensor2numpy(torch::Tensor& X)
|
||||
{
|
||||
int m = X.size(0);
|
||||
int n = X.size(1);
|
||||
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<float>(), bp::make_tuple(m, n), bp::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2), bp::object());
|
||||
Xn = Xn.transpose();
|
||||
return Xn;
|
||||
}
|
||||
std::pair<np::ndarray, np::ndarray> tensors2numpy(torch::Tensor& X, torch::Tensor& y)
|
||||
{
|
||||
int n = X.size(1);
|
||||
auto yn = np::from_data(y.data_ptr(), np::dtype::get_builtin<int32_t>(), bp::make_tuple(n), bp::make_tuple(sizeof(y.dtype()) * 2), bp::object());
|
||||
return { tensor2numpy(X), yn };
|
||||
}
|
||||
std::string PyClassifier::version()
|
||||
{
|
||||
if (sklearn) {
|
||||
return pyWrap->sklearnVersion();
|
||||
}
|
||||
return pyWrap->version(id);
|
||||
}
|
||||
std::string PyClassifier::callMethodString(const std::string& method)
|
||||
{
|
||||
return pyWrap->callMethodString(id, method);
|
||||
}
|
||||
int PyClassifier::callMethodSumOfItems(const std::string& method) const
|
||||
{
|
||||
return pyWrap->callMethodSumOfItems(id, method);
|
||||
}
|
||||
int PyClassifier::callMethodInt(const std::string& method) const
|
||||
{
|
||||
return pyWrap->callMethodInt(id, method);
|
||||
}
|
||||
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y)
|
||||
{
|
||||
if (!fitted && hyperparameters.size() > 0) {
|
||||
pyWrap->setHyperparameters(id, hyperparameters);
|
||||
}
|
||||
auto [Xn, yn] = tensors2numpy(X, y);
|
||||
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
|
||||
CPyObject yp = bp::incref(bp::object(yn).ptr());
|
||||
pyWrap->fit(id, Xp, yp);
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
{
|
||||
return fit(X, y);
|
||||
}
|
||||
torch::Tensor PyClassifier::predict(torch::Tensor& X)
|
||||
{
|
||||
int dimension = X.size(1);
|
||||
auto Xn = tensor2numpy(X);
|
||||
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
|
||||
PyObject* incoming = pyWrap->predict(id, Xp);
|
||||
bp::handle<> handle(incoming);
|
||||
bp::object object(handle);
|
||||
np::ndarray prediction = np::from_object(object);
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Print();
|
||||
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
|
||||
}
|
||||
int* data = reinterpret_cast<int*>(prediction.get_data());
|
||||
std::vector<int> vPrediction(data, data + prediction.shape(0));
|
||||
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
|
||||
Py_XDECREF(incoming);
|
||||
return resultTensor;
|
||||
}
|
||||
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
|
||||
{
|
||||
auto [Xn, yn] = tensors2numpy(X, y);
|
||||
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
|
||||
CPyObject yp = bp::incref(bp::object(yn).ptr());
|
||||
float result = pyWrap->score(id, Xp, yp);
|
||||
return result;
|
||||
}
|
||||
void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||
{
|
||||
this->hyperparameters = hyperparameters;
|
||||
}
|
||||
} /* namespace pywrap */
|
@@ -1,56 +0,0 @@
|
||||
#ifndef PYCLASSIFIER_H
|
||||
#define PYCLASSIFIER_H
|
||||
#include "boost/python/detail/wrap_python.hpp"
|
||||
#include <boost/python/numpy.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <torch/torch.h>
|
||||
#include "PyWrap.h"
|
||||
#include "Classifier.h"
|
||||
#include "TypeId.h"
|
||||
|
||||
namespace pywrap {
|
||||
class PyClassifier : public bayesnet::BaseClassifier {
|
||||
public:
|
||||
PyClassifier(const std::string& module, const std::string& className, const bool sklearn = false);
|
||||
virtual ~PyClassifier();
|
||||
PyClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override { return *this; };
|
||||
// X is nxm tensor, y is nx1 tensor
|
||||
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
|
||||
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y);
|
||||
PyClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override { return *this; };
|
||||
PyClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override { return *this; };
|
||||
torch::Tensor predict(torch::Tensor& X) override;
|
||||
std::vector<int> predict(std::vector<std::vector<int >>& X) override { return std::vector<int>(); };
|
||||
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override { return 0.0; };
|
||||
float score(torch::Tensor& X, torch::Tensor& y) override;
|
||||
std::string version();
|
||||
std::string callMethodString(const std::string& method);
|
||||
int callMethodSumOfItems(const std::string& method) const;
|
||||
int callMethodInt(const std::string& method) const;
|
||||
std::string getVersion() override { return this->version(); };
|
||||
int getNumberOfNodes() const override { return 0; };
|
||||
int getNumberOfEdges() const override { return 0; };
|
||||
int getNumberOfStates() const override { return 0; };
|
||||
std::vector<std::string> show() const override { return std::vector<std::string>(); }
|
||||
std::vector<std::string> graph(const std::string& title = "") const override { return std::vector<std::string>(); }
|
||||
bayesnet::status_t getStatus() const override { return bayesnet::NORMAL; };
|
||||
std::vector<std::string> topological_order() override { return std::vector<std::string>(); }
|
||||
void dump_cpt() const override {};
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
protected:
|
||||
nlohmann::json hyperparameters;
|
||||
void trainModel(const torch::Tensor& weights) override {};
|
||||
private:
|
||||
PyWrap* pyWrap;
|
||||
std::string module;
|
||||
std::string className;
|
||||
bool sklearn;
|
||||
clfId_t id;
|
||||
bool fitted;
|
||||
};
|
||||
} /* namespace pywrap */
|
||||
#endif /* PYCLASSIFIER_H */
|
@@ -1,15 +0,0 @@
|
||||
#ifndef PYCLF_H
|
||||
#define PYCLF_H
|
||||
#include <string>
|
||||
#include "DotEnv.h"
|
||||
namespace PyClassifiers {
|
||||
class PyClf {
|
||||
public:
|
||||
PyClf(const std::string& name);
|
||||
virtual ~PyClf();
|
||||
private:
|
||||
std::string name;
|
||||
|
||||
};
|
||||
} /* namespace PyClassifiers */
|
||||
#endif /* PYCLF_H */
|
@@ -1,87 +0,0 @@
|
||||
#ifndef PYHELPER_HPP
|
||||
#define PYHELPER_HPP
|
||||
#pragma once
|
||||
// Code taken and adapted from
|
||||
// https ://www.codeproject.com/Articles/820116/Embedding-Python-program-in-a-C-Cplusplus-code
|
||||
#include "boost/python/detail/wrap_python.hpp"
|
||||
#include <boost/python/numpy.hpp>
|
||||
#include <iostream>
|
||||
|
||||
namespace pywrap {
|
||||
namespace p = boost::python;
|
||||
namespace np = boost::python::numpy;
|
||||
class CPyInstance {
|
||||
public:
|
||||
CPyInstance()
|
||||
{
|
||||
Py_Initialize();
|
||||
np::initialize();
|
||||
}
|
||||
|
||||
~CPyInstance()
|
||||
{
|
||||
Py_Finalize();
|
||||
}
|
||||
};
|
||||
class CPyObject {
|
||||
private:
|
||||
PyObject* p;
|
||||
public:
|
||||
CPyObject() : p(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
CPyObject(PyObject* _p) : p(_p)
|
||||
{
|
||||
}
|
||||
~CPyObject()
|
||||
{
|
||||
Release();
|
||||
}
|
||||
PyObject* getObject()
|
||||
{
|
||||
return p;
|
||||
}
|
||||
PyObject* setObject(PyObject* _p)
|
||||
{
|
||||
return (p = _p);
|
||||
}
|
||||
PyObject* AddRef()
|
||||
{
|
||||
if (p) {
|
||||
Py_INCREF(p);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
void Release()
|
||||
{
|
||||
if (p) {
|
||||
Py_XDECREF(p);
|
||||
}
|
||||
|
||||
p = NULL;
|
||||
}
|
||||
PyObject* operator ->()
|
||||
{
|
||||
return p;
|
||||
}
|
||||
bool is()
|
||||
{
|
||||
return p ? true : false;
|
||||
}
|
||||
operator PyObject* ()
|
||||
{
|
||||
return p;
|
||||
}
|
||||
PyObject* operator = (PyObject* pp)
|
||||
{
|
||||
p = pp;
|
||||
return p;
|
||||
}
|
||||
operator bool()
|
||||
{
|
||||
return p ? true : false;
|
||||
}
|
||||
};
|
||||
} /* namespace pywrap */
|
||||
#endif
|
@@ -1,255 +0,0 @@
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <stdexcept>
|
||||
#include "PyWrap.h"
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <boost/python/numpy.hpp>
|
||||
#include <iostream>
|
||||
|
||||
namespace pywrap {
|
||||
namespace np = boost::python::numpy;
|
||||
PyWrap* PyWrap::wrapper = nullptr;
|
||||
std::mutex PyWrap::mutex;
|
||||
CPyInstance* PyWrap::pyInstance = nullptr;
|
||||
auto moduleClassMap = std::map<std::pair<std::string, std::string>, std::tuple<PyObject*, PyObject*, PyObject*>>();
|
||||
|
||||
PyWrap* PyWrap::GetInstance()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (wrapper == nullptr) {
|
||||
wrapper = new PyWrap();
|
||||
pyInstance = new CPyInstance();
|
||||
PyRun_SimpleString("import warnings;warnings.filterwarnings('ignore')");
|
||||
}
|
||||
return wrapper;
|
||||
}
|
||||
void PyWrap::RemoveInstance()
|
||||
{
|
||||
if (wrapper != nullptr) {
|
||||
if (pyInstance != nullptr) {
|
||||
delete pyInstance;
|
||||
}
|
||||
pyInstance = nullptr;
|
||||
if (wrapper != nullptr) {
|
||||
delete wrapper;
|
||||
}
|
||||
wrapper = nullptr;
|
||||
}
|
||||
}
|
||||
void PyWrap::importClass(const clfId_t id, const std::string& moduleName, const std::string& className)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
auto result = moduleClassMap.find(id);
|
||||
if (result != moduleClassMap.end()) {
|
||||
return;
|
||||
}
|
||||
PyObject* module = PyImport_ImportModule(moduleName.c_str());
|
||||
if (PyErr_Occurred()) {
|
||||
errorAbort("Couldn't import module " + moduleName);
|
||||
}
|
||||
PyObject* classObject = PyObject_GetAttrString(module, className.c_str());
|
||||
if (PyErr_Occurred()) {
|
||||
errorAbort("Couldn't find class " + className);
|
||||
}
|
||||
PyObject* instance = PyObject_CallObject(classObject, NULL);
|
||||
if (PyErr_Occurred()) {
|
||||
errorAbort("Couldn't create instance of class " + className);
|
||||
}
|
||||
moduleClassMap.insert({ id, { module, classObject, instance } });
|
||||
}
|
||||
void PyWrap::clean(const clfId_t id)
|
||||
{
|
||||
// Remove Python interpreter if no more modules imported left
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
auto result = moduleClassMap.find(id);
|
||||
if (result == moduleClassMap.end()) {
|
||||
return;
|
||||
}
|
||||
Py_DECREF(std::get<0>(result->second));
|
||||
Py_DECREF(std::get<1>(result->second));
|
||||
Py_DECREF(std::get<2>(result->second));
|
||||
moduleClassMap.erase(result);
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Print();
|
||||
errorAbort("Error cleaning module ");
|
||||
}
|
||||
// With boost you can't remove the interpreter
|
||||
// https://www.boost.org/doc/libs/1_83_0/libs/python/doc/html/tutorial/tutorial/embedding.html#tutorial.embedding.getting_started
|
||||
// if (moduleClassMap.empty()) {
|
||||
// RemoveInstance();
|
||||
// }
|
||||
}
|
||||
void PyWrap::errorAbort(const std::string& message)
|
||||
{
|
||||
std::cerr << message << std::endl;
|
||||
PyErr_Print();
|
||||
RemoveInstance();
|
||||
exit(1);
|
||||
}
|
||||
PyObject* PyWrap::getClass(const clfId_t id)
|
||||
{
|
||||
auto item = moduleClassMap.find(id);
|
||||
if (item == moduleClassMap.end()) {
|
||||
errorAbort("Module not found");
|
||||
}
|
||||
return std::get<2>(item->second);
|
||||
}
|
||||
std::string PyWrap::callMethodString(const clfId_t id, const std::string& method)
|
||||
{
|
||||
PyObject* instance = getClass(id);
|
||||
PyObject* result;
|
||||
try {
|
||||
if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL)))
|
||||
errorAbort("Couldn't call method " + method);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
errorAbort(e.what());
|
||||
}
|
||||
std::string value = PyUnicode_AsUTF8(result);
|
||||
Py_XDECREF(result);
|
||||
return value;
|
||||
}
|
||||
int PyWrap::callMethodInt(const clfId_t id, const std::string& method)
|
||||
{
|
||||
PyObject* instance = getClass(id);
|
||||
PyObject* result;
|
||||
try {
|
||||
if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL)))
|
||||
errorAbort("Couldn't call method " + method);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
errorAbort(e.what());
|
||||
}
|
||||
int value = PyLong_AsLong(result);
|
||||
Py_XDECREF(result);
|
||||
return value;
|
||||
}
|
||||
std::string PyWrap::sklearnVersion()
|
||||
{
|
||||
PyObject* sklearnModule = PyImport_ImportModule("sklearn");
|
||||
if (sklearnModule == nullptr) {
|
||||
errorAbort("Couldn't import sklearn");
|
||||
}
|
||||
PyObject* versionAttr = PyObject_GetAttrString(sklearnModule, "__version__");
|
||||
if (versionAttr == nullptr || !PyUnicode_Check(versionAttr)) {
|
||||
Py_XDECREF(sklearnModule);
|
||||
errorAbort("Couldn't get sklearn version");
|
||||
}
|
||||
std::string result = PyUnicode_AsUTF8(versionAttr);
|
||||
Py_XDECREF(versionAttr);
|
||||
Py_XDECREF(sklearnModule);
|
||||
return result;
|
||||
}
|
||||
std::string PyWrap::version(const clfId_t id)
|
||||
{
|
||||
return callMethodString(id, "version");
|
||||
}
|
||||
int PyWrap::callMethodSumOfItems(const clfId_t id, const std::string& method)
|
||||
{
|
||||
// Call method on each estimator and sum the results (made for RandomForest)
|
||||
PyObject* instance = getClass(id);
|
||||
PyObject* estimators = PyObject_GetAttrString(instance, "estimators_");
|
||||
if (estimators == nullptr) {
|
||||
errorAbort("Failed to get attribute: " + method);
|
||||
}
|
||||
int sumOfItems = 0;
|
||||
Py_ssize_t len = PyList_Size(estimators);
|
||||
for (Py_ssize_t i = 0; i < len; i++) {
|
||||
PyObject* estimator = PyList_GetItem(estimators, i);
|
||||
PyObject* result;
|
||||
if (method == "node_count") {
|
||||
PyObject* owner = PyObject_GetAttrString(estimator, "tree_");
|
||||
if (owner == nullptr) {
|
||||
Py_XDECREF(estimators);
|
||||
errorAbort("Failed to get attribute tree_ for: " + method);
|
||||
}
|
||||
result = PyObject_GetAttrString(owner, method.c_str());
|
||||
if (result == nullptr) {
|
||||
Py_XDECREF(estimators);
|
||||
Py_XDECREF(owner);
|
||||
errorAbort("Failed to get attribute node_count: " + method);
|
||||
}
|
||||
Py_DECREF(owner);
|
||||
} else {
|
||||
result = PyObject_CallMethod(estimator, method.c_str(), nullptr);
|
||||
if (result == nullptr) {
|
||||
Py_XDECREF(estimators);
|
||||
errorAbort("Failed to call method: " + method);
|
||||
}
|
||||
}
|
||||
sumOfItems += PyLong_AsLong(result);
|
||||
Py_DECREF(result);
|
||||
}
|
||||
Py_DECREF(estimators);
|
||||
return sumOfItems;
|
||||
}
|
||||
void PyWrap::setHyperparameters(const clfId_t id, const json& hyperparameters)
|
||||
{
|
||||
// Set hyperparameters as attributes of the class
|
||||
PyObject* pValue;
|
||||
PyObject* instance = getClass(id);
|
||||
for (const auto& [key, value] : hyperparameters.items()) {
|
||||
std::stringstream oss;
|
||||
oss << value.type_name();
|
||||
if (oss.str() == "string") {
|
||||
pValue = Py_BuildValue("s", value.get<std::string>().c_str());
|
||||
} else {
|
||||
if (value.is_number_integer()) {
|
||||
pValue = Py_BuildValue("i", value.get<int>());
|
||||
} else {
|
||||
pValue = Py_BuildValue("f", value.get<double>());
|
||||
}
|
||||
}
|
||||
int res = PyObject_SetAttrString(instance, key.c_str(), pValue);
|
||||
if (res == -1 && PyErr_Occurred()) {
|
||||
Py_XDECREF(pValue);
|
||||
errorAbort("Couldn't set attribute " + key + "=" + value.dump());
|
||||
}
|
||||
Py_XDECREF(pValue);
|
||||
}
|
||||
}
|
||||
void PyWrap::fit(const clfId_t id, CPyObject& X, CPyObject& y)
|
||||
{
|
||||
PyObject* instance = getClass(id);
|
||||
CPyObject result;
|
||||
CPyObject method = PyUnicode_FromString("fit");
|
||||
try {
|
||||
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL)))
|
||||
errorAbort("Couldn't call method fit");
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
errorAbort(e.what());
|
||||
}
|
||||
}
|
||||
PyObject* PyWrap::predict(const clfId_t id, CPyObject& X)
|
||||
{
|
||||
PyObject* instance = getClass(id);
|
||||
PyObject* result;
|
||||
CPyObject method = PyUnicode_FromString("predict");
|
||||
try {
|
||||
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), NULL)))
|
||||
errorAbort("Couldn't call method predict");
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
errorAbort(e.what());
|
||||
}
|
||||
Py_INCREF(result);
|
||||
return result; // Caller must free this object
|
||||
}
|
||||
double PyWrap::score(const clfId_t id, CPyObject& X, CPyObject& y)
|
||||
{
|
||||
PyObject* instance = getClass(id);
|
||||
CPyObject result;
|
||||
CPyObject method = PyUnicode_FromString("score");
|
||||
try {
|
||||
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL)))
|
||||
errorAbort("Couldn't call method score");
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
errorAbort(e.what());
|
||||
}
|
||||
double resultValue = PyFloat_AsDouble(result);
|
||||
return resultValue;
|
||||
}
|
||||
}
|
@@ -1,49 +0,0 @@
|
||||
#ifndef PYWRAP_H
|
||||
#define PYWRAP_H
|
||||
#include "boost/python/detail/wrap_python.hpp"
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <tuple>
|
||||
#include <mutex>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "PyHelper.hpp"
|
||||
#include "TypeId.h"
|
||||
#pragma once
|
||||
|
||||
|
||||
namespace pywrap {
|
||||
/*
|
||||
Singleton class to handle Python/numpy interpreter.
|
||||
*/
|
||||
using json = nlohmann::json;
|
||||
class PyWrap {
|
||||
public:
|
||||
PyWrap() = default;
|
||||
PyWrap(PyWrap& other) = delete;
|
||||
static PyWrap* GetInstance();
|
||||
void operator=(const PyWrap&) = delete;
|
||||
~PyWrap() = default;
|
||||
std::string callMethodString(const clfId_t id, const std::string& method);
|
||||
int callMethodInt(const clfId_t id, const std::string& method);
|
||||
std::string sklearnVersion();
|
||||
std::string version(const clfId_t id);
|
||||
int callMethodSumOfItems(const clfId_t id, const std::string& method);
|
||||
void setHyperparameters(const clfId_t id, const json& hyperparameters);
|
||||
void fit(const clfId_t id, CPyObject& X, CPyObject& y);
|
||||
PyObject* predict(const clfId_t id, CPyObject& X);
|
||||
double score(const clfId_t id, CPyObject& X, CPyObject& y);
|
||||
void clean(const clfId_t id);
|
||||
void importClass(const clfId_t id, const std::string& moduleName, const std::string& className);
|
||||
PyObject* getClass(const clfId_t id);
|
||||
private:
|
||||
// Only call RemoveInstance from clean method
|
||||
static void RemoveInstance();
|
||||
void errorAbort(const std::string& message);
|
||||
// No need to use static map here, since this class is a singleton
|
||||
std::map<clfId_t, std::tuple<PyObject*, PyObject*, PyObject*>> moduleClassMap;
|
||||
static CPyInstance* pyInstance;
|
||||
static PyWrap* wrapper;
|
||||
static std::mutex mutex;
|
||||
};
|
||||
} /* namespace pywrap */
|
||||
#endif /* PYWRAP_H */
|
@@ -1,18 +0,0 @@
|
||||
#include "PyClf.h"
|
||||
|
||||
namespace PyClassifiers {
|
||||
|
||||
PyClf::PyClf(const std::std::string& name) : name(name)
|
||||
{
|
||||
env = platform::DotEnv();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
PyClf::~PyClf()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
} /* namespace PyClassifiers */
|
@@ -1,20 +0,0 @@
|
||||
#include "RandomForest.h"
|
||||
|
||||
namespace pywrap {
|
||||
RandomForest::RandomForest() : PyClassifier("sklearn.ensemble", "RandomForestClassifier", true)
|
||||
{
|
||||
validHyperparameters = { "n_estimators", "n_jobs", "random_state" };
|
||||
}
|
||||
int RandomForest::getNumberOfEdges() const
|
||||
{
|
||||
return callMethodSumOfItems("get_n_leaves");
|
||||
}
|
||||
int RandomForest::getNumberOfStates() const
|
||||
{
|
||||
return callMethodSumOfItems("get_depth");
|
||||
}
|
||||
int RandomForest::getNumberOfNodes() const
|
||||
{
|
||||
return callMethodSumOfItems("node_count");
|
||||
}
|
||||
} /* namespace pywrap */
|
@@ -1,15 +0,0 @@
|
||||
#ifndef RANDOMFOREST_H
|
||||
#define RANDOMFOREST_H
|
||||
#include "PyClassifier.h"
|
||||
|
||||
namespace pywrap {
|
||||
class RandomForest : public PyClassifier {
|
||||
public:
|
||||
RandomForest();
|
||||
~RandomForest() = default;
|
||||
int getNumberOfEdges() const override;
|
||||
int getNumberOfStates() const override;
|
||||
int getNumberOfNodes() const override;
|
||||
};
|
||||
} /* namespace pywrap */
|
||||
#endif /* RANDOMFOREST_H */
|
@@ -1,24 +0,0 @@
|
||||
#include "STree.h"
|
||||
|
||||
namespace pywrap {
|
||||
STree::STree() : PyClassifier("stree", "Stree")
|
||||
{
|
||||
validHyperparameters = { "C", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy", "gamma", "max_features", "degree" };
|
||||
};
|
||||
int STree::getNumberOfNodes() const
|
||||
{
|
||||
return callMethodInt("get_nodes");
|
||||
}
|
||||
int STree::getNumberOfEdges() const
|
||||
{
|
||||
return callMethodInt("get_leaves");
|
||||
}
|
||||
int STree::getNumberOfStates() const
|
||||
{
|
||||
return callMethodInt("get_depth");
|
||||
}
|
||||
std::string STree::graph()
|
||||
{
|
||||
return callMethodString("graph");
|
||||
}
|
||||
} /* namespace pywrap */
|
@@ -1,17 +0,0 @@
|
||||
#ifndef STREE_H
|
||||
#define STREE_H
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "PyClassifier.h"
|
||||
|
||||
namespace pywrap {
|
||||
class STree : public PyClassifier {
|
||||
public:
|
||||
STree();
|
||||
~STree() = default;
|
||||
int getNumberOfNodes() const override;
|
||||
int getNumberOfEdges() const override;
|
||||
int getNumberOfStates() const override;
|
||||
std::string graph();
|
||||
};
|
||||
} /* namespace pywrap */
|
||||
#endif /* STREE_H */
|
@@ -1,8 +0,0 @@
|
||||
#include "SVC.h"
|
||||
|
||||
namespace pywrap {
|
||||
SVC::SVC() : PyClassifier("sklearn.svm", "SVC", true)
|
||||
{
|
||||
validHyperparameters = { "C", "gamma", "kernel", "random_state" };
|
||||
}
|
||||
} /* namespace pywrap */
|
@@ -1,13 +0,0 @@
|
||||
#ifndef SVC_H
|
||||
#define SVC_H
|
||||
#include "PyClassifier.h"
|
||||
|
||||
namespace pywrap {
|
||||
class SVC : public PyClassifier {
|
||||
public:
|
||||
SVC();
|
||||
~SVC() = default;
|
||||
};
|
||||
|
||||
} /* namespace pywrap */
|
||||
#endif /* SVC_H */
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user