Compare commits

..

73 Commits

Author SHA1 Message Date
65a96851ef Check min number of nested folds 2024-01-04 11:01:59 +01:00
722da7f781 Keep only mpi b_grid compute 2024-01-04 01:21:56 +01:00
b1833a5feb Add reset color to final progress bar 2024-01-03 22:45:16 +01:00
41a0bd4ddd fix dataset name mistakes 2024-01-03 17:15:57 +01:00
9ab4fc7d76 Fix some mistakes in methods 2024-01-03 11:53:46 +01:00
beadb7465f Complete first approach 2023-12-31 12:02:13 +01:00
652e5f623f Add todo comments 2023-12-28 23:32:24 +01:00
b7fef9a99d Remove kk file 2023-12-28 23:24:59 +01:00
343269d48c Fix syntax errors 2023-12-28 23:21:50 +01:00
21c4c6df51 Fix first mistakes in structure 2023-12-25 19:33:52 +01:00
702f086706 Update miniconda instructions 2023-12-23 19:54:00 +01:00
981bc8f98b Fix install message in readme 2023-12-23 01:00:55 +01:00
e0b7b2d316 Set structure & protocol of producer-consumer 2023-12-22 12:47:13 +01:00
9b9e91e856 Merge pull request 'mpi_grid' (#14) from mpi_grid into main
Reviewed-on: #14
2023-12-18 09:05:55 +00:00
18e8e84284 Add openmpi instructions for Oracle Linux 2023-12-17 12:19:50 +01:00
7de11b0e6d Fix format of duration 2023-12-17 01:45:04 +01:00
9b8db37a4b Fix duration of task not set 2023-12-16 19:31:45 +01:00
49b26bd04b fix duration output 2023-12-16 12:53:25 +01:00
b5b5b48864 Update grid progress bar output 2023-12-15 18:09:17 +01:00
19586a3a5a Fix pesky error allocating memory in workers 2023-12-15 01:54:13 +01:00
ffe6d37436 Add messages to control trace 2023-12-14 21:06:43 +01:00
b73f4be146 First try with complete algorithm 2023-12-14 15:55:08 +01:00
dbf2f35502 First compiling version 2023-12-12 18:57:57 +01:00
db9e80a70e Create build tasks 2023-12-12 12:15:22 +01:00
40ae4ad7f9 Include mpi in CMakeLists 2023-12-11 09:06:05 +01:00
234342f2de Add mpi parameter to b_grid 2023-12-10 22:33:17 +01:00
aa0936abd1 Add --exclude parameter to b_grid to exclude datasets 2023-12-08 12:09:08 +01:00
f0d6f0cc38 Fix sample building 2023-12-04 19:12:44 +01:00
cc316bb8d3 Add colors to results of gridsearch 2023-12-04 17:34:00 +01:00
0723564e66 Fix some output in gridsearch 2023-12-03 17:55:44 +01:00
2e95e8999d Complete nested gridsearch 2023-12-03 12:37:25 +01:00
fb9b395748 Begin output nested grid 2023-12-02 13:19:12 +01:00
03e4437fea refactor gridsearch to have only one go method 2023-12-02 10:59:05 +01:00
33cd32c639 Add header to grid output and report 2023-12-01 10:30:53 +01:00
c460ef46ed Refactor gridsearch method 2023-11-30 11:01:37 +01:00
dee9c674da Refactor grid input hyperparameter file 2023-11-29 18:24:34 +01:00
e3f6dc1e0b Fix tolerance hyperp error & gridsearch 2023-11-29 12:33:50 +01:00
460d20a402 Add reports to gridsearch 2023-11-29 00:26:48 +01:00
8dbbb65a2f Add only parameter to gridsearch 2023-11-28 10:08:40 +01:00
d06bf187b2 Implement Random Forest nodes/leaves/depth 2023-11-28 00:35:38 +01:00
4addaefb47 Implement sklearn version in PyWrap 2023-11-27 22:34:34 +01:00
82964190f6 Add nodes/leaves/depth to STree & ODTE 2023-11-27 10:57:57 +01:00
4fefe9a1d2 Add grid input info to grid output 2023-11-26 16:07:32 +01:00
7c12dd25e5 Fix upper case typo 2023-11-26 10:55:32 +01:00
c713c0b1df Add continue from parameter to gridsearch 2023-11-26 10:36:09 +01:00
64069a6cb7 Adapt b_main to the new hyperparam file format 2023-11-25 16:52:25 +01:00
ba2a3f9523 Merge pull request 'gridsearch' (#13) from gridsearch into main
Reviewed-on: #13
2023-11-25 11:16:13 +00:00
f94e2d6a27 Add quiet parameter 2023-11-24 21:16:20 +01:00
2121ba9b98 Refactor input grid parameters to json file 2023-11-24 09:57:29 +01:00
8b7b59d42b Complete first step 2023-11-23 12:59:21 +01:00
bbe5302ab1 Add info to output 2023-11-22 16:38:50 +01:00
c2eb727fc7 Complete output interface of gridsearch 2023-11-22 16:30:04 +01:00
fb347ed5b9 Begin gridsearch implementation 2023-11-22 12:22:30 +01:00
b657762c0c Generate combinations sample 2023-11-22 00:18:24 +01:00
495d8a8528 Begin implementing grid combinations 2023-11-21 13:11:14 +01:00
4628e48d3c Build gridsearch structure 2023-11-20 23:32:34 +01:00
5876be4b24 Add more install instructions of Boost to README 2023-11-20 20:39:22 +01:00
dc3400197f Add coment todo impelemt number of nodes 2023-11-20 01:14:13 +01:00
26d3a57782 Add info to invalid hyperparameter exception 2023-11-19 23:02:28 +01:00
4f3a04058f Refactor Hyperparameters management 2023-11-19 22:36:27 +01:00
89c4613591 Implement hyperparameters with json file 2023-11-18 11:56:10 +01:00
28f3d87e32 Add Python Classifiers
Add STree, Odte, SVC & RandomForest Classifiers
Remove using namespace ... in project
2023-11-17 11:11:05 +01:00
e8d2c9fc0b Set intolerant convergence 2023-11-17 10:26:25 +01:00
d3cb580387 Remove n_jobs from STree 2023-11-17 10:10:31 +01:00
f088df14fd Restore the Creation model position in experiment 2023-11-17 01:10:46 +01:00
e2249eace7 Disable Warning messages in python clfs
Disable removing Python env
2023-11-16 22:38:46 +01:00
64f5a7f14a Fix header in example 2023-11-16 17:03:40 +01:00
408db2aad5 Mark override fit funtcion 2023-11-14 18:59:41 +01:00
e03efb5f63 set tolerance=0 if feature selection in BoostAODE 2023-11-14 10:12:02 +01:00
f617886133 Add new models to example 2023-11-14 09:12:25 +01:00
69ad660040 Refactor version method in PyClassifier 2023-11-13 13:59:06 +01:00
431b3a3aa5 Fit PyWrap into BayesNet 2023-11-13 11:13:32 +01:00
6a23e2cc26 Add CMakelist integration 2023-11-12 22:14:29 +01:00
56 changed files with 2400 additions and 230 deletions

3
.gitignore vendored
View File

@@ -32,8 +32,7 @@
*.out *.out
*.app *.app
build/** build/**
build_debug/** build_*/**
build_release/**
*.dSYM/** *.dSYM/**
cmake-build*/** cmake-build*/**
.idea .idea

10
.gitmodules vendored
View File

@@ -1,15 +1,25 @@
[submodule "lib/mdlp"] [submodule "lib/mdlp"]
path = lib/mdlp path = lib/mdlp
url = https://github.com/rmontanana/mdlp url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"] [submodule "lib/catch2"]
path = lib/catch2 path = lib/catch2
main = v2.x
update = merge
url = https://github.com/catchorg/Catch2.git url = https://github.com/catchorg/Catch2.git
[submodule "lib/argparse"] [submodule "lib/argparse"]
path = lib/argparse path = lib/argparse
url = https://github.com/p-ranav/argparse url = https://github.com/p-ranav/argparse
master = master
update = merge
[submodule "lib/json"] [submodule "lib/json"]
path = lib/json path = lib/json
url = https://github.com/nlohmann/json.git url = https://github.com/nlohmann/json.git
master = master
update = merge
[submodule "lib/libxlsxwriter"] [submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git url = https://github.com/jmcnamara/libxlsxwriter.git
main = main
update = merge

65
.vscode/launch.json vendored
View File

@@ -5,7 +5,7 @@
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "sample", "name": "sample",
"program": "${workspaceFolder}/build/sample/BayesNetSample", "program": "${workspaceFolder}/build_debug/sample/BayesNetSample",
"args": [ "args": [
"-d", "-d",
"iris", "iris",
@@ -21,25 +21,58 @@
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "experiment", "name": "experimentPy",
"program": "${workspaceFolder}/build/src/Platform/b_main", "program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"STree",
"--stratified",
"-d",
"iris",
//"--discretize"
// "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "gridsearch",
"program": "${workspaceFolder}/build_debug/src/Platform/b_grid",
"args": [
"-m",
"KDB",
"--discretize",
"--continue",
"glass",
"--only",
"--compute"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "experimentBayes",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [ "args": [
"-m", "-m",
"TAN", "TAN",
"--stratified", "--stratified",
"--discretize",
"-d", "-d",
"zoo", "iris",
"--discretize" "--hyperparameters",
// "--hyperparameters", "{\"repeatSparent\": true, \"maxModels\": 12}"
// "{\"repeatSparent\": true, \"maxModels\": 12}"
], ],
"cwd": "/Users/rmontanana/Code/odtebench", "cwd": "/home/rmontanana/Code/discretizbench",
}, },
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "best", "name": "best",
"program": "${workspaceFolder}/build/src/Platform/b_best", "program": "${workspaceFolder}/build_debug/src/Platform/b_best",
"args": [ "args": [
"-m", "-m",
"BoostAODE", "BoostAODE",
@@ -47,33 +80,33 @@
"accuracy", "accuracy",
"--build", "--build",
], ],
"cwd": "/Users/rmontanana/Code/discretizbench", "cwd": "${workspaceFolder}/../discretizbench",
}, },
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "manage", "name": "manage",
"program": "${workspaceFolder}/build/src/Platform/b_manage", "program": "${workspaceFolder}/build_debug/src/Platform/b_manage",
"args": [ "args": [
"-n", "-n",
"20" "20"
], ],
"cwd": "/Users/rmontanana/Code/discretizbench", "cwd": "${workspaceFolder}/../discretizbench",
}, },
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "list", "name": "list",
"program": "${workspaceFolder}/build/src/Platform/b_list", "program": "${workspaceFolder}/build_debug/src/Platform/b_list",
"args": [], "args": [],
//"cwd": "/Users/rmontanana/Code/discretizbench", //"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "/home/rmontanana/Code/covbench", "cwd": "${workspaceFolder}/../discretizbench",
}, },
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",
"name": "test", "name": "test",
"program": "${workspaceFolder}/build/tests/unit_tests", "program": "${workspaceFolder}/build_debug/tests/unit_tests",
"args": [ "args": [
"-c=\"Metrics Test\"", "-c=\"Metrics Test\"",
// "-s", // "-s",
@@ -84,7 +117,7 @@
"name": "Build & debug active file", "name": "Build & debug active file",
"type": "cppdbg", "type": "cppdbg",
"request": "launch", "request": "launch",
"program": "${workspaceFolder}/build/bayesnet", "program": "${workspaceFolder}/build_debug/bayesnet",
"args": [], "args": [],
"stopAtEntry": false, "stopAtEntry": false,
"cwd": "${workspaceFolder}", "cwd": "${workspaceFolder}",

View File

@@ -25,23 +25,33 @@ set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# Options # Options
# ------- # -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF) option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF) option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF) option(CODE_COVERAGE "Collect coverage from test library" OFF)
option(MPI_ENABLED "Enable MPI options" ON)
if (MPI_ENABLED)
find_package(MPI REQUIRED)
message("MPI_CXX_LIBRARIES=${MPI_CXX_LIBRARIES}")
message("MPI_CXX_INCLUDE_DIRS=${MPI_CXX_INCLUDE_DIRS}")
endif (MPI_ENABLED)
# Boost Library # Boost Library
set(Boost_USE_STATIC_LIBS OFF) set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON) set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF) set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.66.0 REQUIRED) find_package(Boost 1.66.0 REQUIRED COMPONENTS python3 numpy3)
if(Boost_FOUND) if(Boost_FOUND)
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}") message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
include_directories(${Boost_INCLUDE_DIRS}) include_directories(${Boost_INCLUDE_DIRS})
endif() endif()
# Python
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}")
# CMakes modules # CMakes modules
# -------------- # --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
@@ -77,6 +87,7 @@ add_subdirectory(config)
add_subdirectory(lib/Files) add_subdirectory(lib/Files)
add_subdirectory(src/BayesNet) add_subdirectory(src/BayesNet)
add_subdirectory(src/Platform) add_subdirectory(src/Platform)
add_subdirectory(src/PyClassifiers)
add_subdirectory(sample) add_subdirectory(sample)
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h) file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h)

View File

@@ -4,7 +4,7 @@ SHELL := /bin/bash
f_release = build_release f_release = build_release
f_debug = build_debug f_debug = build_debug
app_targets = b_best b_list b_main b_manage app_targets = b_best b_list b_main b_manage b_grid
test_targets = unit_tests_bayesnet unit_tests_platform test_targets = unit_tests_bayesnet unit_tests_platform
n_procs = -j 16 n_procs = -j 16
@@ -35,11 +35,13 @@ dest ?= ${HOME}/bin
install: ## Copy binary files to bin folder install: ## Copy binary files to bin folder
@echo "Destination folder: $(dest)" @echo "Destination folder: $(dest)"
make buildr make buildr
@echo "*******************************************"
@echo ">>> Copying files to $(dest)" @echo ">>> Copying files to $(dest)"
@cp $(f_release)/src/Platform/b_main $(dest) @echo "*******************************************"
@cp $(f_release)/src/Platform/b_list $(dest) @for item in $(app_targets); do \
@cp $(f_release)/src/Platform/b_manage $(dest) echo ">>> Copying $$item" ; \
@cp $(f_release)/src/Platform/b_best $(dest) cp $(f_release)/src/Platform/$$item $(dest) ; \
done
dependency: ## Create a dependency graph diagram of the project (build/dependency.png) dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
@echo ">>> Creating dependency graph diagram of the project..."; @echo ">>> Creating dependency graph diagram of the project...";
@@ -47,10 +49,10 @@ dependency: ## Create a dependency graph diagram of the project (build/dependenc
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
buildd: ## Build the debug targets buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) $(n_procs) cmake --build $(f_debug) -t $(app_targets) BayesNetSample $(n_procs)
buildr: ## Build the release targets buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) $(n_procs) cmake --build $(f_release) -t $(app_targets) BayesNetSample $(n_procs)
clean: ## Clean the tests info clean: ## Clean the tests info
@echo ">>> Cleaning Debug BayesNet tests..."; @echo ">>> Cleaning Debug BayesNet tests...";

View File

@@ -8,6 +8,32 @@ Bayesian Network Classifier with libtorch from scratch
Before compiling BayesNet. Before compiling BayesNet.
### Miniconda
To be able to run Python Classifiers such as STree, ODTE, SVC, etc. it is needed to install Miniconda. To do so, download the installer from [Miniconda](https://docs.conda.io/en/latest/miniconda.html) and run it. It is recommended to install it in the home folder.
In Linux sometimes the library libstdc++ is mistaken from the miniconda installation and produces the next message when running the b_xxxx executables:
```bash
libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by b_xxxx)
```
The solution is to erase the libstdc++ library from the miniconda installation:
### MPI
In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
```bash
export MPI_HOME="/usr/lib64/openmpi"
```
In Mac OS X, install mpich with brew and if cmake doesn't find it, edit mpicxx wrapper to remove the ",-commons,use_dylibs" from final_ldflags
```bash
vi /opt/homebrew/bin/mpicx
```
### boost library ### boost library
[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>) [Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
@@ -18,12 +44,24 @@ The best option is install the packages that the Linux distribution have in its
sudo dnf install boost-devel sudo dnf install boost-devel
``` ```
If this is not possible and the compressed packaged is installed, the following environment variable has to be set: If this is not possible and the compressed packaged is installed, the following environment variable has to be set pointing to the folder where it was unzipped to:
```bash ```bash
export BOOST_ROOT=/path/to/library/ export BOOST_ROOT=/path/to/library/
``` ```
In some cases, it is needed to build the library, to do so:
```bash
cd /path/to/library
mkdir own
./bootstrap.sh --prefix=/path/to/library/own
./b2 install
export BOOST_ROOT=/path/to/library/own/
```
Don't forget to add the export BOOST_ROOT statement to .bashrc or wherever it is meant to be.
### libxlswriter ### libxlswriter
```bash ```bash

162
grid_stree.json Normal file
View File

@@ -0,0 +1,162 @@
{
"balance-scale": {
"C": 10000.0,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"balloons": {
"C": 7,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"breast-cancer-wisc-diag": {
"C": 0.2,
"max_iter": 10000
},
"breast-cancer-wisc-prog": {
"C": 0.2,
"max_iter": 10000
},
"breast-cancer-wisc": {},
"breast-cancer": {},
"cardiotocography-10clases": {},
"cardiotocography-3clases": {},
"conn-bench-sonar-mines-rocks": {},
"cylinder-bands": {},
"dermatology": {
"C": 55,
"max_iter": 10000
},
"echocardiogram": {
"C": 7,
"gamma": 0.1,
"kernel": "poly",
"max_features": "auto",
"max_iter": 10000
},
"fertility": {
"C": 0.05,
"max_features": "auto",
"max_iter": 10000
},
"haberman-survival": {},
"heart-hungarian": {
"C": 0.05,
"max_iter": 10000
},
"hepatitis": {
"C": 7,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"ilpd-indian-liver": {},
"ionosphere": {
"C": 7,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"iris": {},
"led-display": {},
"libras": {
"C": 0.08,
"max_iter": 10000
},
"low-res-spect": {
"C": 0.05,
"max_iter": 10000
},
"lymphography": {
"C": 0.05,
"max_iter": 10000
},
"mammographic": {},
"molec-biol-promoter": {
"C": 0.05,
"gamma": 0.1,
"kernel": "poly",
"max_iter": 10000
},
"musk-1": {
"C": 0.05,
"gamma": 0.1,
"kernel": "poly",
"max_iter": 10000
},
"oocytes_merluccius_nucleus_4d": {
"C": 8.25,
"gamma": 0.1,
"kernel": "poly"
},
"oocytes_merluccius_states_2f": {},
"oocytes_trisopterus_nucleus_2f": {},
"oocytes_trisopterus_states_5b": {
"C": 0.11,
"max_iter": 10000
},
"parkinsons": {},
"pima": {},
"pittsburg-bridges-MATERIAL": {
"C": 7,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"pittsburg-bridges-REL-L": {},
"pittsburg-bridges-SPAN": {
"C": 0.05,
"max_iter": 10000
},
"pittsburg-bridges-T-OR-D": {},
"planning": {
"C": 7,
"gamma": 10.0,
"kernel": "rbf",
"max_iter": 10000
},
"post-operative": {
"C": 55,
"degree": 5,
"gamma": 0.1,
"kernel": "poly",
"max_iter": 10000
},
"seeds": {
"C": 10000.0,
"max_iter": 10000
},
"statlog-australian-credit": {
"C": 0.05,
"max_features": "auto",
"max_iter": 10000
},
"statlog-german-credit": {},
"statlog-heart": {},
"statlog-image": {
"C": 7,
"max_iter": 10000
},
"statlog-vehicle": {},
"synthetic-control": {
"C": 0.55,
"max_iter": 10000
},
"tic-tac-toe": {
"C": 0.2,
"gamma": 0.1,
"kernel": "poly",
"max_iter": 10000
},
"vertebral-column-2clases": {},
"wine": {
"C": 0.55,
"max_iter": 10000
},
"zoo": {
"C": 0.1,
"max_iter": 10000
}
}

View File

@@ -1,8 +1,10 @@
include_directories(${BayesNet_SOURCE_DIR}/src/Platform) include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${Python3_INCLUDE_DIRS})
include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap)

View File

@@ -1,10 +1,10 @@
#include <iostream> #include <iostream>
#include <torch/torch.h> #include <torch/torch.h>
#include <std::string> #include <string>
#include <map> #include <map>
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "ArffFiles.h"v #include "ArffFiles.h"
#include "BayesMetrics.h" #include "BayesMetrics.h"
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
#include "Folding.h" #include "Folding.h"
@@ -29,7 +29,7 @@ pair<std::vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<
return { Xd, maxes }; return { Xd, maxes };
} }
bool file_exists(const std::std::std::string& name) bool file_exists(const std::string& name)
{ {
if (FILE* file = fopen(name.c_str(), "r")) { if (FILE* file = fopen(name.c_str(), "r")) {
fclose(file); fclose(file);
@@ -72,7 +72,7 @@ int main(int argc, char** argv)
argparse::ArgumentParser program("BayesNetSample"); argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-d", "--dataset") program.add_argument("-d", "--dataset")
.help("Dataset file name") .help("Dataset file name")
.action([valid_datasets](const std::std::std::string& value) { .action([valid_datasets](const std::string& value) {
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
return value; return value;
} }
@@ -84,20 +84,20 @@ int main(int argc, char** argv)
.default_value(std::string{ PATH } .default_value(std::string{ PATH }
); );
program.add_argument("-m", "--model") program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostd::string()) .help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::std::std::string& value) { .action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames(); static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) { if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value; return value;
} }
throw runtime_error("Model must be one of " + platform::Models::instance()->tostd::string()); throw runtime_error("Model must be one of " + platform::Models::instance()->tostring());
} }
); );
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true); program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const std::std::string& value) { program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const std::string& value) {
try { try {
auto k = stoi(value); auto k = stoi(value);
if (k < 2) { if (k < 2) {
@@ -184,8 +184,8 @@ int main(int argc, char** argv)
file.close(); file.close();
std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl; std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl; std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
std::string stratified_std::string = stratified ? " Stratified" : ""; std::string stratified_string = stratified ? " Stratified" : "";
std::cout << nFolds << " Folds" << stratified_std::string << " Cross validation" << std::endl; std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
std::cout << "==========================================" << std::endl; std::cout << "==========================================" << std::endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32); torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32); torch::Tensor yt = torch::tensor(y, torch::kInt32);

View File

@@ -1,5 +1,4 @@
#include "AODELd.h" #include "AODELd.h"
#include "Models.h"
namespace bayesnet { namespace bayesnet {
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {} AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}

View File

@@ -6,8 +6,6 @@
namespace bayesnet { namespace bayesnet {
enum status_t { NORMAL, WARNING, ERROR }; enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier { class BaseClassifier {
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
public: public:
// X is nxm std::vector, y is nx1 std::vector // X is nxm std::vector, y is nx1 std::vector
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0; virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
@@ -26,10 +24,14 @@ namespace bayesnet {
int virtual getNumberOfStates() const = 0; int virtual getNumberOfStates() const = 0;
std::vector<std::string> virtual show() const = 0; std::vector<std::string> virtual show() const = 0;
std::vector<std::string> virtual graph(const std::string& title = "") const = 0; std::vector<std::string> virtual graph(const std::string& title = "") const = 0;
const std::string inline getVersion() const { return "0.2.0"; }; virtual std::string getVersion() = 0;
std::vector<std::string> virtual topological_order() = 0; std::vector<std::string> virtual topological_order() = 0;
void virtual dump_cpt()const = 0; void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0; virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
std::vector<std::string> validHyperparameters;
}; };
} }
#endif #endif

View File

@@ -10,7 +10,11 @@
#include "IWSS.h" #include "IWSS.h"
namespace bayesnet { namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() {} BoostAODE::BoostAODE() : Ensemble()
{
validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance" };
}
void BoostAODE::buildModel(const torch::Tensor& weights) void BoostAODE::buildModel(const torch::Tensor& weights)
{ {
// Models shall be built in trainModel // Models shall be built in trainModel
@@ -43,25 +47,32 @@ namespace bayesnet {
y_train = y_; y_train = y_;
} }
} }
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters) void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{ {
// Check if hyperparameters are valid auto hyperparameters = hyperparameters_;
const std::vector<std::string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("repeatSparent")) { if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"]; repeatSparent = hyperparameters["repeatSparent"];
hyperparameters.erase("repeatSparent");
} }
if (hyperparameters.contains("maxModels")) { if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"]; maxModels = hyperparameters["maxModels"];
hyperparameters.erase("maxModels");
} }
if (hyperparameters.contains("ascending")) { if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"]; ascending = hyperparameters["ascending"];
hyperparameters.erase("ascending");
} }
if (hyperparameters.contains("convergence")) { if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"]; convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence");
} }
if (hyperparameters.contains("threshold")) { if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"]; threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold");
}
if (hyperparameters.contains("tolerance")) {
tolerance = hyperparameters["tolerance"];
hyperparameters.erase("tolerance");
} }
if (hyperparameters.contains("select_features")) { if (hyperparameters.contains("select_features")) {
auto selectedAlgorithm = hyperparameters["select_features"]; auto selectedAlgorithm = hyperparameters["select_features"];
@@ -71,6 +82,10 @@ namespace bayesnet {
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) { if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
throw std::invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]"); throw std::invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]");
} }
hyperparameters.erase("select_features");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
} }
} }
std::unordered_set<int> BoostAODE::initializeModels() std::unordered_set<int> BoostAODE::initializeModels()
@@ -119,7 +134,6 @@ namespace bayesnet {
double priorAccuracy = 0.0; double priorAccuracy = 0.0;
double delta = 1.0; double delta = 1.0;
double threshold = 1e-4; double threshold = 1e-4;
int tolerance = 5; // number of times the accuracy can be lower than the threshold
int count = 0; // number of times the accuracy is lower than the threshold int count = 0; // number of times the accuracy is lower than the threshold
fitted = true; // to enable predict fitted = true; // to enable predict
// Step 0: Set the finish condition // Step 0: Set the finish condition

View File

@@ -8,9 +8,9 @@ namespace bayesnet {
class BoostAODE : public Ensemble { class BoostAODE : public Ensemble {
public: public:
BoostAODE(); BoostAODE();
virtual ~BoostAODE() {}; virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override; std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override; void setHyperparameters(const nlohmann::json& hyperparameters) override;
protected: protected:
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
@@ -21,6 +21,7 @@ namespace bayesnet {
// Hyperparameters // Hyperparameters
bool repeatSparent = false; // if true, a feature can be selected more than once bool repeatSparent = false; // if true, a feature can be selected more than once
int maxModels = 0; int maxModels = 0;
int tolerance = 0;
bool ascending = false; //Process KBest features ascending or descending order bool ascending = false; //Process KBest features ascending or descending order
bool convergence = false; //if true, stop when the model does not improve bool convergence = false; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection bool selectFeatures = false; // if true, use feature selection

View File

@@ -3,6 +3,9 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform) include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${Python3_INCLUDE_DIRS})
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)

View File

@@ -153,18 +153,8 @@ namespace bayesnet {
{ {
model.dump_cpt(); model.dump_cpt();
} }
void Classifier::checkHyperparameters(const std::vector<std::string>& validKeys, nlohmann::json& hyperparameters) void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
{ {
for (const auto& item : hyperparameters.items()) { //For classifiers that don't have hyperparameters
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
void Classifier::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const std::vector<std::string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
} }
} }

View File

@@ -22,7 +22,6 @@ namespace bayesnet {
void checkFitParameters(); void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0; virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override; void trainModel(const torch::Tensor& weights) override;
void checkHyperparameters(const std::vector<std::string>& validKeys, nlohmann::json& hyperparameters);
void buildDataset(torch::Tensor& y); void buildDataset(torch::Tensor& y);
public: public:
Classifier(Network model); Classifier(Network model);
@@ -37,13 +36,14 @@ namespace bayesnet {
int getNumberOfStates() const override; int getNumberOfStates() const override;
torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
status_t getStatus() const override { return status; } status_t getStatus() const override { return status; }
std::string getVersion() override { return "0.2.0"; };
std::vector<int> predict(std::vector<std::vector<int>>& X) override; std::vector<int> predict(std::vector<std::vector<int>>& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override; float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override; float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
std::vector<std::string> show() const override; std::vector<std::string> show() const override;
std::vector<std::string> topological_order() override; std::vector<std::string> topological_order() override;
void dump_cpt() const override; void dump_cpt() const override;
void setHyperparameters(nlohmann::json& hyperparameters) override; void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
}; };
} }
#endif #endif

View File

@@ -1,12 +1,13 @@
#include "KDB.h" #include "KDB.h"
namespace bayesnet { namespace bayesnet {
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {} KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta)
void KDB::setHyperparameters(nlohmann::json& hyperparameters) {
validHyperparameters = { "k", "theta" };
}
void KDB::setHyperparameters(const nlohmann::json& hyperparameters)
{ {
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "k", "theta" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("k")) { if (hyperparameters.contains("k")) {
k = hyperparameters["k"]; k = hyperparameters["k"];
} }

View File

@@ -13,8 +13,8 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
public: public:
explicit KDB(int k, float theta = 0.03); explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {}; virtual ~KDB() = default;
void setHyperparameters(nlohmann::json& hyperparameters) override; void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override; std::vector<std::string> graph(const std::string& name = "KDB") const override;
}; };
} }

View File

@@ -10,7 +10,7 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
public: public:
explicit SPODE(int root); explicit SPODE(int root);
virtual ~SPODE() {}; virtual ~SPODE() = default;
std::vector<std::string> graph(const std::string& name = "SPODE") const override; std::vector<std::string> graph(const std::string& name = "SPODE") const override;
}; };
} }

View File

@@ -8,7 +8,7 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override; void buildModel(const torch::Tensor& weights) override;
public: public:
TAN(); TAN();
virtual ~TAN() {}; virtual ~TAN() = default;
std::vector<std::string> graph(const std::string& name = "TAN") const override; std::vector<std::string> graph(const std::string& name = "TAN") const override;
}; };
} }

View File

@@ -1,17 +1,22 @@
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform) include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
include_directories(${Python3_INCLUDE_DIRS})
include_directories(${MPI_CXX_INCLUDE_DIRS})
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc HyperParameters.cc Folding.cc Datasets.cc Dataset.cc)
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc)
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp) target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp)
target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(b_grid BayesNet PyWrap ${MPI_CXX_LIBRARIES})
target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap)
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)

View File

@@ -9,6 +9,7 @@ public:
static std::string YELLOW() { return "\033[1;33m"; } static std::string YELLOW() { return "\033[1;33m"; }
static std::string RED() { return "\033[1;31m"; } static std::string RED() { return "\033[1;31m"; }
static std::string WHITE() { return "\033[1;37m"; } static std::string WHITE() { return "\033[1;37m"; }
static std::string IBLUE() { return "\033[0;94m"; }
static std::string RESET() { return "\033[0m"; } static std::string RESET() { return "\033[0m"; }
}; };
#endif // COLORS_H #endif // COLORS_H

View File

@@ -26,7 +26,6 @@ namespace platform {
oss << std::put_time(timeinfo, "%H:%M:%S"); oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str(); return oss.str();
} }
Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
std::string Experiment::get_file_name() std::string Experiment::get_file_name()
{ {
std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json"; std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
@@ -134,7 +133,7 @@ namespace platform {
} }
void Experiment::cross_validation(const std::string& fileName, bool quiet) void Experiment::cross_validation(const std::string& fileName, bool quiet)
{ {
auto datasets = platform::Datasets(discretized, Paths::datasets()); auto datasets = Datasets(discretized, Paths::datasets());
// Get dataset // Get dataset
auto [X, y] = datasets.getTensors(fileName); auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName); auto states = datasets.getStates(fileName);
@@ -148,7 +147,7 @@ namespace platform {
auto result = Result(); auto result = Result();
auto [values, counts] = at::_unique(y); auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0)); result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters); result.setHyperparameters(hyperparameters.get(fileName));
// Initialize results std::vectors // Initialize results std::vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size()); int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64); auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
@@ -171,9 +170,9 @@ namespace platform {
for (int nfold = 0; nfold < nfolds; nfold++) { for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model); auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion()); setModelVersion(clf->getVersion());
if (hyperparameters.size() != 0) { auto valid = clf->getValidHyperparameters();
clf->setHyperparameters(hyperparameters); hyperparameters.check(valid, fileName);
} clf->setHyperparameters(hyperparameters.get(fileName));
// Split train - test dataset // Split train - test dataset
train_timer.start(); train_timer.start();
auto [train, test] = fold->getFold(nfold); auto [train, test] = fold->getFold(nfold);
@@ -211,7 +210,6 @@ namespace platform {
result.addTimeTrain(train_time[item].item<double>()); result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>()); result.addTimeTest(test_time[item].item<double>());
item++; item++;
clf.reset();
} }
if (!quiet) if (!quiet)
std::cout << "end. " << flush; std::cout << "end. " << flush;

View File

@@ -3,29 +3,16 @@
#include <torch/torch.h> #include <torch/torch.h>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include <string> #include <string>
#include <chrono>
#include "Folding.h" #include "Folding.h"
#include "BaseClassifier.h" #include "BaseClassifier.h"
#include "HyperParameters.h"
#include "TAN.h" #include "TAN.h"
#include "KDB.h" #include "KDB.h"
#include "AODE.h" #include "AODE.h"
#include "Timer.h"
namespace platform { namespace platform {
using json = nlohmann::json; using json = nlohmann::json;
class Timer {
private:
std::chrono::high_resolution_clock::time_point begin;
public:
Timer() = default;
~Timer() = default;
void start() { begin = std::chrono::high_resolution_clock::now(); }
double getDuration()
{
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (end - begin);
return time_span.count();
}
};
class Result { class Result {
private: private:
std::string dataset, model_version; std::string dataset, model_version;
@@ -80,17 +67,8 @@ namespace platform {
const std::vector<double>& getTimesTest() const { return times_test; } const std::vector<double>& getTimesTest() const { return times_test; }
}; };
class Experiment { class Experiment {
private:
std::string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
std::vector<Result> results;
std::vector<int> randomSeeds;
json hyperparameters = "{}";
int nfolds{ 0 };
float duration{ 0 };
json build_json();
public: public:
Experiment(); Experiment() = default;
Experiment& setTitle(const std::string& title) { this->title = title; return *this; } Experiment& setTitle(const std::string& title) { this->title = title; return *this; }
Experiment& setModel(const std::string& model) { this->model = model; return *this; } Experiment& setModel(const std::string& model) { this->model = model; return *this; }
Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; } Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; }
@@ -104,13 +82,22 @@ namespace platform {
Experiment& addResult(Result result) { results.push_back(result); return *this; } Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; } Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; } Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; } Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
std::string get_file_name(); std::string get_file_name();
void save(const std::string& path); void save(const std::string& path);
void cross_validation(const std::string& fileName, bool quiet); void cross_validation(const std::string& fileName, bool quiet);
void go(std::vector<std::string> filesToProcess, bool quiet); void go(std::vector<std::string> filesToProcess, bool quiet);
void show(); void show();
void report(); void report();
private:
std::string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
std::vector<Result> results;
std::vector<int> randomSeeds;
HyperParameters hyperparameters;
int nfolds{ 0 };
float duration{ 0 };
json build_json();
}; };
} }
#endif #endif

75
src/Platform/GridData.cc Normal file
View File

@@ -0,0 +1,75 @@
#include "GridData.h"
#include <fstream>
namespace platform {
GridData::GridData(const std::string& fileName)
{
json grid_file;
std::ifstream resultData(fileName);
if (resultData.is_open()) {
grid_file = json::parse(resultData);
} else {
throw std::invalid_argument("Unable to open input file. [" + fileName + "]");
}
for (const auto& item : grid_file.items()) {
auto key = item.key();
auto value = item.value();
grid[key] = value;
}
}
int GridData::computeNumCombinations(const json& line)
{
int numCombinations = 1;
for (const auto& item : line.items()) {
numCombinations *= item.value().size();
}
return numCombinations;
}
int GridData::getNumCombinations(const std::string& dataset)
{
int numCombinations = 0;
auto selected = decide_dataset(dataset);
for (const auto& line : grid.at(selected)) {
numCombinations += computeNumCombinations(line);
}
return numCombinations;
}
json GridData::generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination)
{
if (index == last) {
// If we reached the end of input, store the current combination
output.push_back(currentCombination);
return currentCombination;
}
const auto& key = index.key();
const auto& values = index.value();
for (const auto& value : values) {
auto combination = currentCombination;
combination[key] = value;
json::iterator nextIndex = index;
generateCombinations(++nextIndex, last, output, combination);
}
return currentCombination;
}
std::vector<json> GridData::getGrid(const std::string& dataset)
{
auto selected = decide_dataset(dataset);
auto result = std::vector<json>();
for (json line : grid.at(selected)) {
generateCombinations(line.begin(), line.end(), result, json({}));
}
return result;
}
json& GridData::getInputGrid(const std::string& dataset)
{
auto selected = decide_dataset(dataset);
return grid.at(selected);
}
std::string GridData::decide_dataset(const std::string& dataset)
{
if (grid.find(dataset) != grid.end())
return dataset;
return ALL_DATASETS;
}
} /* namespace platform */

26
src/Platform/GridData.h Normal file
View File

@@ -0,0 +1,26 @@
#ifndef GRIDDATA_H
#define GRIDDATA_H
#include <string>
#include <vector>
#include <map>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
const std::string ALL_DATASETS = "all";
class GridData {
public:
explicit GridData(const std::string& fileName);
~GridData() = default;
std::vector<json> getGrid(const std::string& dataset = ALL_DATASETS);
int getNumCombinations(const std::string& dataset = ALL_DATASETS);
json& getInputGrid(const std::string& dataset = ALL_DATASETS);
std::map<std::string, json>& getGridFile() { return grid; }
private:
std::string decide_dataset(const std::string& dataset);
json generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination);
int computeNumCombinations(const json& line);
std::map<std::string, json> grid;
};
} /* namespace platform */
#endif /* GRIDDATA_H */

441
src/Platform/GridSearch.cc Normal file
View File

@@ -0,0 +1,441 @@
#include <iostream>
#include <cstddef>
#include <torch/torch.h>
#include "GridSearch.h"
#include "Models.h"
#include "Paths.h"
#include "Folding.h"
#include "Colors.h"
namespace platform {
std::string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
std::string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
std::string get_color_rank(int rank)
{
auto colors = { Colors::WHITE(), Colors::RED(), Colors::GREEN(), Colors::BLUE(), Colors::MAGENTA(), Colors::CYAN() };
return *(colors.begin() + rank % colors.size());
}
GridSearch::GridSearch(struct ConfigGrid& config) : config(config)
{
}
json GridSearch::loadResults()
{
std::ifstream file(Paths::grid_output(config.model));
if (file.is_open()) {
return json::parse(file);
}
return json();
}
std::vector<std::string> GridSearch::filterDatasets(Datasets& datasets) const
{
// Load datasets
auto datasets_names = datasets.getNames();
if (config.continue_from != NO_CONTINUE()) {
// Continue previous execution:
if (std::find(datasets_names.begin(), datasets_names.end(), config.continue_from) == datasets_names.end()) {
throw std::invalid_argument("Dataset " + config.continue_from + " not found");
}
// Remove datasets already processed
std::vector<string>::iterator it = datasets_names.begin();
while (it != datasets_names.end()) {
if (*it != config.continue_from) {
it = datasets_names.erase(it);
} else {
if (config.only)
++it;
else
break;
}
}
}
// Exclude datasets
for (const auto& name : config.excluded) {
auto dataset = name.get<std::string>();
auto it = std::find(datasets_names.begin(), datasets_names.end(), dataset);
if (it == datasets_names.end()) {
throw std::invalid_argument("Dataset " + dataset + " already excluded or doesn't exist!");
}
datasets_names.erase(it);
}
return datasets_names;
}
json GridSearch::build_tasks_mpi(int rank)
{
auto tasks = json::array();
auto grid = GridData(Paths::grid_input(config.model));
auto datasets = Datasets(false, Paths::datasets());
auto all_datasets = datasets.getNames();
auto datasets_names = filterDatasets(datasets);
for (int idx_dataset = 0; idx_dataset < datasets_names.size(); ++idx_dataset) {
auto dataset = datasets_names[idx_dataset];
for (const auto& seed : config.seeds) {
auto combinations = grid.getGrid(dataset);
for (int n_fold = 0; n_fold < config.n_folds; n_fold++) {
json task = {
{ "dataset", dataset },
{ "idx_dataset", idx_dataset},
{ "seed", seed },
{ "fold", n_fold},
};
tasks.push_back(task);
}
}
}
// Shuffle the array so heavy datasets are spread across the workers
std::mt19937 g{ 271 }; // Use fixed seed to obtain the same shuffle
std::shuffle(tasks.begin(), tasks.end(), g);
std::cout << get_color_rank(rank) << "* Number of tasks: " << tasks.size() << std::endl;
std::cout << "|";
for (int i = 0; i < tasks.size(); ++i) {
std::cout << (i + 1) % 10;
}
std::cout << "|" << std::endl << "|" << std::flush;
return tasks;
}
void process_task_mpi_consumer(struct ConfigGrid& config, struct ConfigMPI& config_mpi, json& tasks, int n_task, Datasets& datasets, Task_Result* result)
{
// initialize
Timer timer;
timer.start();
json task = tasks[n_task];
auto model = config.model;
auto grid = GridData(Paths::grid_input(model));
auto dataset = task["dataset"].get<std::string>();
auto idx_dataset = task["idx_dataset"].get<int>();
auto seed = task["seed"].get<int>();
auto n_fold = task["fold"].get<int>();
bool stratified = config.stratified;
// Generate the hyperparamters combinations
auto combinations = grid.getGrid(dataset);
auto [X, y] = datasets.getTensors(dataset);
auto states = datasets.getStates(dataset);
auto features = datasets.getFeatures(dataset);
auto className = datasets.getClassName(dataset);
//
// Start working on task
//
Fold* fold;
if (stratified)
fold = new StratifiedKFold(config.n_folds, y, seed);
else
fold = new KFold(config.n_folds, y.size(0), seed);
auto [train, test] = fold->getFold(n_fold);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
auto X_train = X.index({ "...", train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
double best_fold_score = 0.0;
int best_idx_combination = -1;
json best_fold_hyper;
for (int idx_combination = 0; idx_combination < combinations.size(); ++idx_combination) {
auto hyperparam_line = combinations[idx_combination];
auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line);
Fold* nested_fold;
if (config.stratified)
nested_fold = new StratifiedKFold(config.nested, y_train, seed);
else
nested_fold = new KFold(config.nested, y_train.size(0), seed);
double score = 0.0;
for (int n_nested_fold = 0; n_nested_fold < config.nested; n_nested_fold++) {
// Nested level fold
auto [train_nested, test_nested] = nested_fold->getFold(n_nested_fold);
auto train_nested_t = torch::tensor(train_nested);
auto test_nested_t = torch::tensor(test_nested);
auto X_nested_train = X_train.index({ "...", train_nested_t });
auto y_nested_train = y_train.index({ train_nested_t });
auto X_nested_test = X_train.index({ "...", test_nested_t });
auto y_nested_test = y_train.index({ test_nested_t });
// Build Classifier with selected hyperparameters
auto clf = Models::instance()->create(config.model);
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, dataset);
clf->setHyperparameters(hyperparameters.get(dataset));
// Train model
clf->fit(X_nested_train, y_nested_train, features, className, states);
// Test model
score += clf->score(X_nested_test, y_nested_test);
}
delete nested_fold;
score /= config.nested;
if (score > best_fold_score) {
best_fold_score = score;
best_idx_combination = idx_combination;
best_fold_hyper = hyperparam_line;
}
}
delete fold;
// Build Classifier with the best hyperparameters to obtain the best score
auto hyperparameters = platform::HyperParameters(datasets.getNames(), best_fold_hyper);
auto clf = Models::instance()->create(config.model);
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, dataset);
clf->setHyperparameters(best_fold_hyper);
clf->fit(X_train, y_train, features, className, states);
best_fold_score = clf->score(X_test, y_test);
// Return the result
result->idx_dataset = task["idx_dataset"].get<int>();
result->idx_combination = best_idx_combination;
result->score = best_fold_score;
result->n_fold = n_fold;
result->time = timer.getDuration();
// Update progress bar
std::cout << get_color_rank(config_mpi.rank) << "*" << std::flush;
}
json store_result(std::vector<std::string>& names, Task_Result& result, json& results)
{
json json_result = {
{ "score", result.score },
{ "combination", result.idx_combination },
{ "fold", result.n_fold },
{ "time", result.time },
{ "dataset", result.idx_dataset }
};
auto name = names[result.idx_dataset];
if (!results.contains(name)) {
results[name] = json::array();
}
results[name].push_back(json_result);
return results;
}
json producer(std::vector<std::string>& names, json& tasks, struct ConfigMPI& config_mpi, MPI_Datatype& MPI_Result)
{
Task_Result result;
json results;
int num_tasks = tasks.size();
//
// 2a.1 Producer will loop to send all the tasks to the consumers and receive the results
//
for (int i = 0; i < num_tasks; ++i) {
MPI_Status status;
MPI_Recv(&result, 1, MPI_Result, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == TAG_RESULT) {
//Store result
store_result(names, result, results);
}
MPI_Send(&i, 1, MPI_INT, status.MPI_SOURCE, TAG_TASK, MPI_COMM_WORLD);
}
//
// 2a.2 Producer will send the end message to all the consumers
//
for (int i = 0; i < config_mpi.n_procs - 1; ++i) {
MPI_Status status;
MPI_Recv(&result, 1, MPI_Result, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == TAG_RESULT) {
//Store result
store_result(names, result, results);
}
MPI_Send(&i, 1, MPI_INT, status.MPI_SOURCE, TAG_END, MPI_COMM_WORLD);
}
return results;
}
void select_best_results_folds(json& results, json& all_results, std::string& model)
{
Timer timer;
auto grid = GridData(Paths::grid_input(model));
//
// Select the best result of the computed outer folds
//
for (const auto& result : all_results.items()) {
// each result has the results of all the outer folds as each one were a different task
double best_score = 0.0;
json best;
for (const auto& result_fold : result.value()) {
double score = result_fold["score"].get<double>();
if (score > best_score) {
best_score = score;
best = result_fold;
}
}
auto dataset = result.key();
auto combinations = grid.getGrid(dataset);
json json_best = {
{ "score", best_score },
{ "hyperparameters", combinations[best["combination"].get<int>()] },
{ "date", get_date() + " " + get_time() },
{ "grid", grid.getInputGrid(dataset) },
{ "duration", timer.translate2String(best["time"].get<double>()) }
};
results[dataset] = json_best;
}
}
void consumer(Datasets& datasets, json& tasks, struct ConfigGrid& config, struct ConfigMPI& config_mpi, MPI_Datatype& MPI_Result)
{
Task_Result result;
//
// 2b.1 Consumers announce to the producer that they are ready to receive a task
//
MPI_Send(&result, 1, MPI_Result, config_mpi.manager, TAG_QUERY, MPI_COMM_WORLD);
int task;
while (true) {
MPI_Status status;
//
// 2b.2 Consumers receive the task from the producer and process it
//
MPI_Recv(&task, 1, MPI_INT, config_mpi.manager, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == TAG_END) {
break;
}
process_task_mpi_consumer(config, config_mpi, tasks, task, datasets, &result);
//
// 2b.3 Consumers send the result to the producer
//
MPI_Send(&result, 1, MPI_Result, config_mpi.manager, TAG_RESULT, MPI_COMM_WORLD);
}
}
void GridSearch::go(struct ConfigMPI& config_mpi)
{
/*
* Each task is a json object with the following structure:
* {
* "dataset": "dataset_name",
* "idx_dataset": idx_dataset, // used to identify the dataset in the results
* // this index is relative to the used datasets in the actual run not to the whole datasets
* "seed": # of seed to use,
* "Fold": # of fold to process
* }
*
* The overall process consists in these steps:
* 0. Create the MPI result type & tasks
* 0.1 Create the MPI result type
* 0.2 Manager creates the tasks
* 1. Manager will broadcast the tasks to all the processes
* 1.1 Broadcast the number of tasks
* 1.2 Broadcast the length of the following string
* 1.2 Broadcast the tasks as a char* string
* 2a. Producer delivers the tasks to the consumers
* 2a.1 Producer will loop to send all the tasks to the consumers and receive the results
* 2a.2 Producer will send the end message to all the consumers
* 2b. Consumers process the tasks and send the results to the producer
* 2b.1 Consumers announce to the producer that they are ready to receive a task
* 2b.2 Consumers receive the task from the producer and process it
* 2b.3 Consumers send the result to the producer
* 3. Manager select the bests sccores for each dataset
* 3.1 Loop thru all the results obtained from each outer fold (task) and select the best
* 3.2 Save the results
*/
//
// 0.1 Create the MPI result type
//
Task_Result result;
int tasks_size;
MPI_Datatype MPI_Result;
MPI_Datatype type[5] = { MPI_UNSIGNED, MPI_UNSIGNED, MPI_INT, MPI_DOUBLE, MPI_DOUBLE };
int blocklen[5] = { 1, 1, 1, 1, 1 };
MPI_Aint disp[5];
disp[0] = offsetof(Task_Result, idx_dataset);
disp[1] = offsetof(Task_Result, idx_combination);
disp[2] = offsetof(Task_Result, n_fold);
disp[3] = offsetof(Task_Result, score);
disp[4] = offsetof(Task_Result, time);
MPI_Type_create_struct(5, blocklen, disp, type, &MPI_Result);
MPI_Type_commit(&MPI_Result);
//
// 0.2 Manager creates the tasks
//
char* msg;
json tasks;
if (config_mpi.rank == config_mpi.manager) {
timer.start();
tasks = build_tasks_mpi(config_mpi.rank);
auto tasks_str = tasks.dump();
tasks_size = tasks_str.size();
msg = new char[tasks_size + 1];
strcpy(msg, tasks_str.c_str());
}
//
// 1. Manager will broadcast the tasks to all the processes
//
MPI_Bcast(&tasks_size, 1, MPI_INT, config_mpi.manager, MPI_COMM_WORLD);
if (config_mpi.rank != config_mpi.manager) {
msg = new char[tasks_size + 1];
}
MPI_Bcast(msg, tasks_size + 1, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD);
tasks = json::parse(msg);
delete[] msg;
auto datasets = Datasets(config.discretize, Paths::datasets());
if (config_mpi.rank == config_mpi.manager) {
//
// 2a. Producer delivers the tasks to the consumers
//
auto datasets_names = filterDatasets(datasets);
json all_results = producer(datasets_names, tasks, config_mpi, MPI_Result);
std::cout << get_color_rank(config_mpi.rank) << "|" << std::endl;
//
// 3. Manager select the bests sccores for each dataset
//
auto results = initializeResults();
select_best_results_folds(results, all_results, config.model);
//
// 3.2 Save the results
//
save(results);
} else {
//
// 2b. Consumers process the tasks and send the results to the producer
//
consumer(datasets, tasks, config, config_mpi, MPI_Result);
}
}
json GridSearch::initializeResults()
{
// Load previous results if continue is set
json results;
if (config.continue_from != NO_CONTINUE()) {
if (!config.quiet)
std::cout << "* Loading previous results" << std::endl;
try {
std::ifstream file(Paths::grid_output(config.model));
if (file.is_open()) {
results = json::parse(file);
results = results["results"];
}
}
catch (const std::exception& e) {
std::cerr << "* There were no previous results" << std::endl;
std::cerr << "* Initizalizing new results" << std::endl;
results = json();
}
}
return results;
}
void GridSearch::save(json& results)
{
std::ofstream file(Paths::grid_output(config.model));
json output = {
{ "model", config.model },
{ "score", config.score },
{ "discretize", config.discretize },
{ "stratified", config.stratified },
{ "n_folds", config.n_folds },
{ "seeds", config.seeds },
{ "date", get_date() + " " + get_time()},
{ "nested", config.nested},
{ "platform", config.platform },
{ "duration", timer.getDurationString(true)},
{ "results", results }
};
file << output.dump(4);
}
} /* namespace platform */

60
src/Platform/GridSearch.h Normal file
View File

@@ -0,0 +1,60 @@
#ifndef GRIDSEARCH_H
#define GRIDSEARCH_H
#include <string>
#include <map>
#include <mpi.h>
#include <nlohmann/json.hpp>
#include "Datasets.h"
#include "HyperParameters.h"
#include "GridData.h"
#include "Timer.h"
namespace platform {
using json = nlohmann::json;
struct ConfigGrid {
std::string model;
std::string score;
std::string continue_from;
std::string platform;
bool quiet;
bool only; // used with continue_from to only compute that dataset
bool discretize;
bool stratified;
int nested;
int n_folds;
json excluded;
std::vector<int> seeds;
};
struct ConfigMPI {
int rank;
int n_procs;
int manager;
};
typedef struct {
uint idx_dataset;
uint idx_combination;
int n_fold;
double score;
double time;
} Task_Result;
const int TAG_QUERY = 1;
const int TAG_RESULT = 2;
const int TAG_TASK = 3;
const int TAG_END = 4;
class GridSearch {
public:
explicit GridSearch(struct ConfigGrid& config);
void go(struct ConfigMPI& config_mpi);
~GridSearch() = default;
json loadResults();
static inline std::string NO_CONTINUE() { return "NO_CONTINUE"; }
private:
void save(json& results);
json initializeResults();
std::vector<std::string> filterDatasets(Datasets& datasets) const;
struct ConfigGrid config;
json build_tasks_mpi(int rank);
Timer timer; // used to measure the time of the whole process
};
} /* namespace platform */
#endif /* GRIDSEARCH_H */

View File

@@ -0,0 +1,55 @@
#include "HyperParameters.h"
#include <fstream>
#include <sstream>
#include <iostream>
namespace platform {
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_)
{
// Initialize all datasets with the given hyperparameters
for (const auto& item : datasets) {
hyperparameters[item] = hyperparameters_;
}
}
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/
std::string join(std::vector<std::string> const& strings, std::string delim)
{
std::stringstream ss;
std::copy(strings.begin(), strings.end(),
std::ostream_iterator<std::string>(ss, delim.c_str()));
return ss.str();
}
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file)
{
// Check if file exists
std::ifstream file(hyperparameters_file);
if (!file.is_open()) {
throw std::runtime_error("File " + hyperparameters_file + " not found");
}
// Check if file is a json
json input_hyperparameters = json::parse(file);
// Check if hyperparameters are valid
for (const auto& dataset : datasets) {
if (!input_hyperparameters.contains(dataset)) {
std::cerr << "*Warning: Dataset " << dataset << " not found in hyperparameters file" << " assuming default hyperparameters" << std::endl;
hyperparameters[dataset] = json({});
continue;
}
hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get<json>();
}
}
void HyperParameters::check(const std::vector<std::string>& valid, const std::string& fileName)
{
json result = hyperparameters.at(fileName);
for (const auto& item : result.items()) {
if (find(valid.begin(), valid.end(), item.key()) == valid.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid. Passed Hyperparameters are: "
+ result.dump(4) + "\n Valid hyperparameters are: {" + join(valid, ",") + "}");
}
}
}
json HyperParameters::get(const std::string& fileName)
{
return hyperparameters.at(fileName);
}
} /* namespace platform */

View File

@@ -0,0 +1,23 @@
#ifndef HYPERPARAMETERS_H
#define HYPERPARAMETERS_H
#include <string>
#include <map>
#include <vector>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
class HyperParameters {
public:
HyperParameters() = default;
explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_);
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file);
~HyperParameters() = default;
bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); }
void check(const std::vector<std::string>& valid, const std::string& fileName);
json get(const std::string& fileName);
private:
std::map<std::string, json> hyperparameters;
};
} /* namespace platform */
#endif /* HYPERPARAMETERS_H */

View File

@@ -11,6 +11,10 @@
#include "SPODELd.h" #include "SPODELd.h"
#include "AODELd.h" #include "AODELd.h"
#include "BoostAODE.h" #include "BoostAODE.h"
#include "STree.h"
#include "ODTE.h"
#include "SVC.h"
#include "RandomForest.h"
namespace platform { namespace platform {
class Models { class Models {
private: private:

View File

@@ -1,6 +1,7 @@
#ifndef PATHS_H #ifndef PATHS_H
#define PATHS_H #define PATHS_H
#include <string> #include <string>
#include <filesystem>
#include "DotEnv.h" #include "DotEnv.h"
namespace platform { namespace platform {
class Paths { class Paths {
@@ -8,13 +9,31 @@ namespace platform {
static std::string results() { return "results/"; } static std::string results() { return "results/"; }
static std::string hiddenResults() { return "hidden_results/"; } static std::string hiddenResults() { return "hidden_results/"; }
static std::string excel() { return "excel/"; } static std::string excel() { return "excel/"; }
static std::string cfs() { return "cfs/"; } static std::string grid() { return "grid/"; }
static std::string datasets() static std::string datasets()
{ {
auto env = platform::DotEnv(); auto env = platform::DotEnv();
return env.get("source_data"); return env.get("source_data");
} }
static void createPath(const std::string& path)
{
// Create directory if it does not exist
try {
std::filesystem::create_directory(path);
}
catch (std::exception& e) {
throw std::runtime_error("Could not create directory " + path);
}
}
static std::string excelResults() { return "some_results.xlsx"; } static std::string excelResults() { return "some_results.xlsx"; }
static std::string grid_input(const std::string& model)
{
return grid() + "grid_" + model + "_input.json";
}
static std::string grid_output(const std::string& model)
{
return grid() + "grid_" + model + "_output.json";
}
}; };
} }
#endif #endif

View File

@@ -32,5 +32,4 @@ namespace platform {
bool complete; bool complete;
}; };
}; };
#endif #endif

View File

@@ -7,7 +7,6 @@
#include "Result.h" #include "Result.h"
namespace platform { namespace platform {
using json = nlohmann::json; using json = nlohmann::json;
class Results { class Results {
public: public:
Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial); Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial);
@@ -34,5 +33,4 @@ namespace platform {
void load(); // Loads the list of results void load(); // Loads the list of results
}; };
}; };
#endif #endif

43
src/Platform/Timer.h Normal file
View File

@@ -0,0 +1,43 @@
#ifndef TIMER_H
#define TIMER_H
#include <chrono>
#include <string>
#include <sstream>
namespace platform {
class Timer {
private:
std::chrono::high_resolution_clock::time_point begin;
std::chrono::high_resolution_clock::time_point end;
public:
Timer() = default;
~Timer() = default;
void start() { begin = std::chrono::high_resolution_clock::now(); }
void stop() { end = std::chrono::high_resolution_clock::now(); }
double getDuration()
{
stop();
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (end - begin);
return time_span.count();
}
double getLapse()
{
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (std::chrono::high_resolution_clock::now() - begin);
return time_span.count();
}
std::string getDurationString(bool lapse = false)
{
double duration = lapse ? getLapse() : getDuration();
return translate2String(duration);
}
std::string translate2String(double duration)
{
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
std::stringstream ss;
ss << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit;
return ss.str();
}
};
} /* namespace platform */
#endif /* TIMER_H */

View File

@@ -5,9 +5,8 @@
#include "Colors.h" #include "Colors.h"
argparse::ArgumentParser manageArguments(int argc, char** argv) void manageArguments(argparse::ArgumentParser& program, int argc, char** argv)
{ {
argparse::ArgumentParser program("best");
program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)"); program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied"); program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true); program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
@@ -28,12 +27,12 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
catch (...) { catch (...) {
throw std::runtime_error("Number of folds must be an decimal number"); throw std::runtime_error("Number of folds must be an decimal number");
}}); }});
return program;
} }
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
auto program = manageArguments(argc, argv); argparse::ArgumentParser program("b_sbest");
manageArguments(program, argc, argv);
std::string model, score; std::string model, score;
bool build, report, friedman, excel; bool build, report, friedman, excel;
double level; double level;

231
src/Platform/b_grid.cc Normal file
View File

@@ -0,0 +1,231 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include <map>
#include <nlohmann/json.hpp>
#include <mpi.h>
#include "DotEnv.h"
#include "Models.h"
#include "modelRegister.h"
#include "GridSearch.h"
#include "Paths.h"
#include "Timer.h"
#include "Colors.h"
using json = nlohmann::json;
const int MAXL = 133;
void manageArguments(argparse::ArgumentParser& program)
{
auto env = platform::DotEnv();
auto& group = program.add_mutually_exclusive_group(true);
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring());
}
);
group.add_argument("--dump").help("Show the grid combinations").default_value(false).implicit_value(true);
group.add_argument("--report").help("Report the computed hyperparameters").default_value(false).implicit_value(true);
group.add_argument("--compute").help("Perform computation of the grid output hyperparameters").default_value(false).implicit_value(true);
program.add_argument("--discretize").help("Discretize input datasets").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
program.add_argument("--continue").help("Continue computing from that dataset").default_value(platform::GridSearch::NO_CONTINUE());
program.add_argument("--only").help("Used with continue to compute that dataset only").default_value(false).implicit_value(true);
program.add_argument("--exclude").default_value("[]").help("Datasets to exclude in json format, e.g. [\"dataset1\", \"dataset2\"]");
program.add_argument("--nested").help("Set the double/nested cross validation number of folds").default_value(5).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of nested folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of nested folds must be an integer");
}});
program.add_argument("--score").help("Score used in gridsearch").default_value("accuracy");
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an integer");
}});
auto seed_values = env.getSeeds();
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
}
void list_dump(std::string& model)
{
auto data = platform::GridData(platform::Paths::grid_input(model));
std::cout << Colors::MAGENTA() << "Listing configuration input file (Grid)" << std::endl << std::endl;
int index = 0;
int max_hyper = 15;
int max_dataset = 7;
auto combinations = data.getGridFile();
for (auto const& item : combinations) {
if (item.first.size() > max_dataset) {
max_dataset = item.first.size();
}
if (item.second.dump().size() > max_hyper) {
max_hyper = item.second.dump().size();
}
}
std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. "
<< setw(max_hyper) << "Hyperparameters" << std::endl;
std::cout << "=== " << string(max_dataset, '=') << " ===== " << string(max_hyper, '=') << std::endl;
bool odd = true;
for (auto const& item : combinations) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
std::cout << color;
auto num_combinations = data.getNumCombinations(item.first);
std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first
<< " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << item.second.dump() << std::endl;
odd = !odd;
}
std::cout << Colors::RESET() << std::endl;
}
std::string headerLine(const std::string& text, int utf = 0)
{
int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n;
return "* " + text + std::string(n + utf, ' ') + "*\n";
}
void list_results(json& results, std::string& model)
{
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
std::cout << headerLine("Listing computed hyperparameters for model " + model);
std::cout << headerLine("Date & time: " + results["date"].get<std::string>() + " Duration: " + results["duration"].get<std::string>());
std::cout << headerLine("Score: " + results["score"].get<std::string>());
std::cout << headerLine(
"Random seeds: " + results["seeds"].dump()
+ " Discretized: " + (results["discretize"].get<bool>() ? "True" : "False")
+ " Stratified: " + (results["stratified"].get<bool>() ? "True" : "False")
+ " #Folds: " + std::to_string(results["n_folds"].get<int>())
+ " Nested: " + (results["nested"].get<int>() == 0 ? "False" : to_string(results["nested"].get<int>()))
);
std::cout << std::string(MAXL, '*') << std::endl;
int spaces = 7;
int hyperparameters_spaces = 15;
for (const auto& item : results["results"].items()) {
auto key = item.key();
auto value = item.value();
if (key.size() > spaces) {
spaces = key.size();
}
if (value["hyperparameters"].dump().size() > hyperparameters_spaces) {
hyperparameters_spaces = value["hyperparameters"].dump().size();
}
}
std::cout << Colors::GREEN() << " # " << left << setw(spaces) << "Dataset" << " " << setw(19) << "Date" << " "
<< "Duration " << setw(8) << "Score" << " " << "Hyperparameters" << std::endl;
std::cout << "=== " << string(spaces, '=') << " " << string(19, '=') << " " << string(8, '=') << " "
<< string(8, '=') << " " << string(hyperparameters_spaces, '=') << std::endl;
bool odd = true;
int index = 0;
for (const auto& item : results["results"].items()) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
auto value = item.value();
std::cout << color;
std::cout << std::setw(3) << std::right << index++ << " ";
std::cout << left << setw(spaces) << item.key() << " " << value["date"].get<string>()
<< " " << setw(8) << right << value["duration"].get<string>() << " " << setw(8) << setprecision(6)
<< fixed << right << value["score"].get<double>() << " " << value["hyperparameters"].dump() << std::endl;
odd = !odd;
}
std::cout << Colors::RESET() << std::endl;
}
/*
* Main
*/
int main(int argc, char** argv)
{
argparse::ArgumentParser program("b_grid");
manageArguments(program);
struct platform::ConfigGrid config;
bool dump, compute;
try {
program.parse_args(argc, argv);
config.model = program.get<std::string>("model");
config.score = program.get<std::string>("score");
config.discretize = program.get<bool>("discretize");
config.stratified = program.get<bool>("stratified");
config.n_folds = program.get<int>("folds");
config.quiet = program.get<bool>("quiet");
config.only = program.get<bool>("only");
config.seeds = program.get<std::vector<int>>("seeds");
config.nested = program.get<int>("nested");
config.continue_from = program.get<std::string>("continue");
if (config.continue_from == platform::GridSearch::NO_CONTINUE() && config.only) {
throw std::runtime_error("Cannot use --only without --continue");
}
dump = program.get<bool>("dump");
compute = program.get<bool>("compute");
if (dump && (config.continue_from != platform::GridSearch::NO_CONTINUE() || config.only)) {
throw std::runtime_error("Cannot use --dump with --continue or --only");
}
auto excluded = program.get<std::string>("exclude");
config.excluded = json::parse(excluded);
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
/*
* Begin Processing
*/
auto env = platform::DotEnv();
config.platform = env.get("platform");
platform::Paths::createPath(platform::Paths::grid());
auto grid_search = platform::GridSearch(config);
platform::Timer timer;
timer.start();
if (dump) {
list_dump(config.model);
} else {
if (compute) {
struct platform::ConfigMPI mpi_config;
mpi_config.manager = 0; // which process is the manager
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_config.rank);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_config.n_procs);
if (mpi_config.n_procs < 2) {
throw std::runtime_error("Cannot use --compute with less than 2 mpi processes, try mpirun -np 2 ...");
}
grid_search.go(mpi_config);
if (mpi_config.rank == mpi_config.manager) {
auto results = grid_search.loadResults();
list_results(results, config.model);
std::cout << "Process took " << timer.getDurationString() << std::endl;
}
MPI_Finalize();
} else {
// List results
auto results = grid_search.loadResults();
if (results.empty()) {
std::cout << "** No results found" << std::endl;
} else {
list_results(results, config.model);
}
}
}
std::cout << "Done!" << std::endl;
return 0;
}

View File

@@ -11,12 +11,13 @@
using json = nlohmann::json; using json = nlohmann::json;
argparse::ArgumentParser manageArguments() void manageArguments(argparse::ArgumentParser& program)
{ {
auto env = platform::DotEnv(); auto env = platform::DotEnv();
argparse::ArgumentParser program("main");
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name"); program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment"); program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
program.add_argument("-m", "--model") program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostring()) .help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) { .action([](const std::string& value) {
@@ -48,18 +49,18 @@ argparse::ArgumentParser manageArguments()
}}); }});
auto seed_values = env.getSeeds(); auto seed_values = env.getSeeds();
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
return program;
} }
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
std::string file_name, model_name, title; argparse::ArgumentParser program("b_main");
manageArguments(program);
std::string file_name, model_name, title, hyperparameters_file;
json hyperparameters_json; json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet; bool discretize_dataset, stratified, saveResults, quiet;
std::vector<int> seeds; std::vector<int> seeds;
std::vector<std::string> filesToTest; std::vector<std::string> filesToTest;
int n_folds; int n_folds;
auto program = manageArguments();
try { try {
program.parse_args(argc, argv); program.parse_args(argc, argv);
file_name = program.get<std::string>("dataset"); file_name = program.get<std::string>("dataset");
@@ -71,6 +72,10 @@ int main(int argc, char** argv)
seeds = program.get<std::vector<int>>("seeds"); seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<std::string>("hyperparameters"); auto hyperparameters = program.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters); hyperparameters_json = json::parse(hyperparameters);
hyperparameters_file = program.get<std::string>("hyper-file");
if (hyperparameters_file != "" && hyperparameters != "{}") {
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
}
title = program.get<std::string>("title"); title = program.get<std::string>("title");
if (title == "" && file_name == "") { if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided"); throw runtime_error("title is mandatory if dataset is not provided");
@@ -96,15 +101,22 @@ int main(int argc, char** argv)
filesToTest = datasets.getNames(); filesToTest = datasets.getNames();
saveResults = true; saveResults = true;
} }
platform::HyperParameters test_hyperparams;
if (hyperparameters_file != "") {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file);
} else {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
}
/* /*
* Begin Processing * Begin Processing
*/ */
auto env = platform::DotEnv(); auto env = platform::DotEnv();
auto experiment = platform::Experiment(); auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3"); experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy"); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
experiment.setHyperparameters(hyperparameters_json); experiment.setHyperparameters(test_hyperparams);
for (auto seed : seeds) { for (auto seed : seeds) {
experiment.addRandomSeed(seed); experiment.addRandomSeed(seed);
} }

View File

@@ -3,9 +3,8 @@
#include "ManageResults.h" #include "ManageResults.h"
argparse::ArgumentParser manageArguments(int argc, char** argv) void manageArguments(argparse::ArgumentParser& program, int argc, char** argv)
{ {
argparse::ArgumentParser program("manage");
program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>(); program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>();
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)"); program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied"); program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
@@ -29,12 +28,12 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
std::cerr << program; std::cerr << program;
exit(1); exit(1);
} }
return program;
} }
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
auto program = manageArguments(argc, argv); auto program = argparse::ArgumentParser("b_manage");
manageArguments(program, argc, argv);
int number = program.get<int>("number"); int number = program.get<int>("number");
std::string model = program.get<std::string>("model"); std::string model = program.get<std::string>("model");
std::string score = program.get<std::string>("score"); std::string score = program.get<std::string>("score");

View File

@@ -18,4 +18,12 @@ static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
static platform::Registrar registrarBA("BoostAODE", static platform::Registrar registrarBA("BoostAODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
static platform::Registrar registrarSt("STree",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::STree();});
static platform::Registrar registrarOdte("Odte",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::ODTE();});
static platform::Registrar registrarSvc("SVC",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::SVC();});
static platform::Registrar registrarRaF("RandomForest",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();});
#endif #endif

View File

@@ -1,5 +1,6 @@
include_directories(${PyWrap_SOURCE_DIR}/lib/Files) include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${PyWrap_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${Python3_INCLUDE_DIRS}) include_directories(${Python3_INCLUDE_DIRS})
include_directories(${TORCH_INCLUDE_DIRS}) include_directories(${TORCH_INCLUDE_DIRS})

View File

@@ -1,25 +0,0 @@
#ifndef CLASSIFIER_H
#define CLASSIFIER_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <string>
#include <map>
#include <vector>
namespace pywrap {
class Classifier {
public:
Classifier() = default;
virtual ~Classifier() = default;
virtual Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual Classifier& fit(torch::Tensor& X, torch::Tensor& y) = 0;
virtual torch::Tensor predict(torch::Tensor& X) = 0;
virtual double score(torch::Tensor& X, torch::Tensor& y) = 0;
virtual std::string version() = 0;
virtual std::string sklearnVersion() = 0;
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
protected:
virtual void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters) = 0;
};
} /* namespace pywrap */
#endif /* CLASSIFIER_H */

View File

@@ -1,15 +1,24 @@
#include "ODTE.h" #include "ODTE.h"
namespace pywrap { namespace pywrap {
ODTE::ODTE() : PyClassifier("odte", "Odte")
{
validHyperparameters = { "n_jobs", "n_estimators", "random_state" };
}
int ODTE::getNumberOfNodes() const
{
return callMethodInt("get_nodes");
}
int ODTE::getNumberOfEdges() const
{
return callMethodInt("get_leaves");
}
int ODTE::getNumberOfStates() const
{
return callMethodInt("get_depth");
}
std::string ODTE::graph() std::string ODTE::graph()
{ {
return callMethodString("graph"); return callMethodString("graph");
} }
void ODTE::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "n_jobs", "n_estimators", "random_state" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */ } /* namespace pywrap */

View File

@@ -6,10 +6,12 @@
namespace pywrap { namespace pywrap {
class ODTE : public PyClassifier { class ODTE : public PyClassifier {
public: public:
ODTE() : PyClassifier("odte", "Odte") {}; ODTE();
~ODTE() = default; ~ODTE() = default;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
std::string graph(); std::string graph();
void setHyperparameters(const nlohmann::json& hyperparameters) override;
}; };
} /* namespace pywrap */ } /* namespace pywrap */
#endif /* ODTE_H */ #endif /* ODTE_H */

View File

@@ -2,7 +2,7 @@
namespace pywrap { namespace pywrap {
namespace bp = boost::python; namespace bp = boost::python;
namespace np = boost::python::numpy; namespace np = boost::python::numpy;
PyClassifier::PyClassifier(const std::string& module, const std::string& className) : module(module), className(className), fitted(false) PyClassifier::PyClassifier(const std::string& module, const std::string& className, bool sklearn) : module(module), className(className), sklearn(sklearn), fitted(false)
{ {
// This id allows to have more than one instance of the same module/class // This id allows to have more than one instance of the same module/class
id = reinterpret_cast<clfId_t>(this); id = reinterpret_cast<clfId_t>(this);
@@ -29,16 +29,23 @@ namespace pywrap {
} }
std::string PyClassifier::version() std::string PyClassifier::version()
{ {
if (sklearn) {
return pyWrap->sklearnVersion();
}
return pyWrap->version(id); return pyWrap->version(id);
} }
std::string PyClassifier::sklearnVersion()
{
return pyWrap->sklearnVersion();
}
std::string PyClassifier::callMethodString(const std::string& method) std::string PyClassifier::callMethodString(const std::string& method)
{ {
return pyWrap->callMethodString(id, method); return pyWrap->callMethodString(id, method);
} }
int PyClassifier::callMethodSumOfItems(const std::string& method) const
{
return pyWrap->callMethodSumOfItems(id, method);
}
int PyClassifier::callMethodInt(const std::string& method) const
{
return pyWrap->callMethodInt(id, method);
}
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y) PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y)
{ {
if (!fitted && hyperparameters.size() > 0) { if (!fitted && hyperparameters.size() > 0) {
@@ -74,27 +81,16 @@ namespace pywrap {
Py_XDECREF(incoming); Py_XDECREF(incoming);
return resultTensor; return resultTensor;
} }
double PyClassifier::score(torch::Tensor& X, torch::Tensor& y) float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{ {
auto [Xn, yn] = tensors2numpy(X, y); auto [Xn, yn] = tensors2numpy(X, y);
CPyObject Xp = bp::incref(bp::object(Xn).ptr()); CPyObject Xp = bp::incref(bp::object(Xn).ptr());
CPyObject yp = bp::incref(bp::object(yn).ptr()); CPyObject yp = bp::incref(bp::object(yn).ptr());
auto result = pyWrap->score(id, Xp, yp); float result = pyWrap->score(id, Xp, yp);
return result; return result;
} }
void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters) void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters)
{ {
// Check if hyperparameters are valid, default is no hyperparameters
const std::vector<std::string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters; this->hyperparameters = hyperparameters;
} }
void PyClassifier::checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
} /* namespace pywrap */ } /* namespace pywrap */

View File

@@ -13,25 +13,42 @@
#include "TypeId.h" #include "TypeId.h"
namespace pywrap { namespace pywrap {
class PyClassifier : public Classifier { class PyClassifier : public bayesnet::BaseClassifier {
public: public:
PyClassifier(const std::string& module, const std::string& className); PyClassifier(const std::string& module, const std::string& className, const bool sklearn = false);
virtual ~PyClassifier(); virtual ~PyClassifier();
PyClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override { return *this; };
// X is nxm tensor, y is nx1 tensor
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override; PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y) override; PyClassifier& fit(torch::Tensor& X, torch::Tensor& y);
PyClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override { return *this; };
PyClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override { return *this; };
torch::Tensor predict(torch::Tensor& X) override; torch::Tensor predict(torch::Tensor& X) override;
double score(torch::Tensor& X, torch::Tensor& y) override; std::vector<int> predict(std::vector<std::vector<int >>& X) override { return std::vector<int>(); };
std::string version() override; float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override { return 0.0; };
std::string sklearnVersion() override; float score(torch::Tensor& X, torch::Tensor& y) override;
std::string version();
std::string callMethodString(const std::string& method); std::string callMethodString(const std::string& method);
int callMethodSumOfItems(const std::string& method) const;
int callMethodInt(const std::string& method) const;
std::string getVersion() override { return this->version(); };
int getNumberOfNodes() const override { return 0; };
int getNumberOfEdges() const override { return 0; };
int getNumberOfStates() const override { return 0; };
std::vector<std::string> show() const override { return std::vector<std::string>(); }
std::vector<std::string> graph(const std::string& title = "") const override { return std::vector<std::string>(); }
bayesnet::status_t getStatus() const override { return bayesnet::NORMAL; };
std::vector<std::string> topological_order() override { return std::vector<std::string>(); }
void dump_cpt() const override {};
void setHyperparameters(const nlohmann::json& hyperparameters) override; void setHyperparameters(const nlohmann::json& hyperparameters) override;
protected: protected:
void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters) override;
nlohmann::json hyperparameters; nlohmann::json hyperparameters;
void trainModel(const torch::Tensor& weights) override {};
private: private:
PyWrap* pyWrap; PyWrap* pyWrap;
std::string module; std::string module;
std::string className; std::string className;
bool sklearn;
clfId_t id; clfId_t id;
bool fitted; bool fitted;
}; };

View File

@@ -5,6 +5,7 @@
#include <map> #include <map>
#include <sstream> #include <sstream>
#include <boost/python/numpy.hpp> #include <boost/python/numpy.hpp>
#include <iostream>
namespace pywrap { namespace pywrap {
namespace np = boost::python::numpy; namespace np = boost::python::numpy;
@@ -19,6 +20,7 @@ namespace pywrap {
if (wrapper == nullptr) { if (wrapper == nullptr) {
wrapper = new PyWrap(); wrapper = new PyWrap();
pyInstance = new CPyInstance(); pyInstance = new CPyInstance();
PyRun_SimpleString("import warnings;warnings.filterwarnings('ignore')");
} }
return wrapper; return wrapper;
} }
@@ -72,9 +74,11 @@ namespace pywrap {
PyErr_Print(); PyErr_Print();
errorAbort("Error cleaning module "); errorAbort("Error cleaning module ");
} }
if (moduleClassMap.empty()) { // With boost you can't remove the interpreter
RemoveInstance(); // https://www.boost.org/doc/libs/1_83_0/libs/python/doc/html/tutorial/tutorial/embedding.html#tutorial.embedding.getting_started
} // if (moduleClassMap.empty()) {
// RemoveInstance();
// }
} }
void PyWrap::errorAbort(const std::string& message) void PyWrap::errorAbort(const std::string& message)
{ {
@@ -106,17 +110,80 @@ namespace pywrap {
Py_XDECREF(result); Py_XDECREF(result);
return value; return value;
} }
int PyWrap::callMethodInt(const clfId_t id, const std::string& method)
{
PyObject* instance = getClass(id);
PyObject* result;
try {
if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL)))
errorAbort("Couldn't call method " + method);
}
catch (const std::exception& e) {
errorAbort(e.what());
}
int value = PyLong_AsLong(result);
Py_XDECREF(result);
return value;
}
std::string PyWrap::sklearnVersion() std::string PyWrap::sklearnVersion()
{ {
return "1.0"; PyObject* sklearnModule = PyImport_ImportModule("sklearn");
// CPyObject data = PyRun_SimpleString("import sklearn;return sklearn.__version__"); if (sklearnModule == nullptr) {
// std::string result = PyUnicode_AsUTF8(data); errorAbort("Couldn't import sklearn");
// return result; }
PyObject* versionAttr = PyObject_GetAttrString(sklearnModule, "__version__");
if (versionAttr == nullptr || !PyUnicode_Check(versionAttr)) {
Py_XDECREF(sklearnModule);
errorAbort("Couldn't get sklearn version");
}
std::string result = PyUnicode_AsUTF8(versionAttr);
Py_XDECREF(versionAttr);
Py_XDECREF(sklearnModule);
return result;
} }
std::string PyWrap::version(const clfId_t id) std::string PyWrap::version(const clfId_t id)
{ {
return callMethodString(id, "version"); return callMethodString(id, "version");
} }
int PyWrap::callMethodSumOfItems(const clfId_t id, const std::string& method)
{
// Call method on each estimator and sum the results (made for RandomForest)
PyObject* instance = getClass(id);
PyObject* estimators = PyObject_GetAttrString(instance, "estimators_");
if (estimators == nullptr) {
errorAbort("Failed to get attribute: " + method);
}
int sumOfItems = 0;
Py_ssize_t len = PyList_Size(estimators);
for (Py_ssize_t i = 0; i < len; i++) {
PyObject* estimator = PyList_GetItem(estimators, i);
PyObject* result;
if (method == "node_count") {
PyObject* owner = PyObject_GetAttrString(estimator, "tree_");
if (owner == nullptr) {
Py_XDECREF(estimators);
errorAbort("Failed to get attribute tree_ for: " + method);
}
result = PyObject_GetAttrString(owner, method.c_str());
if (result == nullptr) {
Py_XDECREF(estimators);
Py_XDECREF(owner);
errorAbort("Failed to get attribute node_count: " + method);
}
Py_DECREF(owner);
} else {
result = PyObject_CallMethod(estimator, method.c_str(), nullptr);
if (result == nullptr) {
Py_XDECREF(estimators);
errorAbort("Failed to call method: " + method);
}
}
sumOfItems += PyLong_AsLong(result);
Py_DECREF(result);
}
Py_DECREF(estimators);
return sumOfItems;
}
void PyWrap::setHyperparameters(const clfId_t id, const json& hyperparameters) void PyWrap::setHyperparameters(const clfId_t id, const json& hyperparameters)
{ {
// Set hyperparameters as attributes of the class // Set hyperparameters as attributes of the class

View File

@@ -24,8 +24,10 @@ namespace pywrap {
void operator=(const PyWrap&) = delete; void operator=(const PyWrap&) = delete;
~PyWrap() = default; ~PyWrap() = default;
std::string callMethodString(const clfId_t id, const std::string& method); std::string callMethodString(const clfId_t id, const std::string& method);
int callMethodInt(const clfId_t id, const std::string& method);
std::string sklearnVersion(); std::string sklearnVersion();
std::string version(const clfId_t id); std::string version(const clfId_t id);
int callMethodSumOfItems(const clfId_t id, const std::string& method);
void setHyperparameters(const clfId_t id, const json& hyperparameters); void setHyperparameters(const clfId_t id, const json& hyperparameters);
void fit(const clfId_t id, CPyObject& X, CPyObject& y); void fit(const clfId_t id, CPyObject& X, CPyObject& y);
PyObject* predict(const clfId_t id, CPyObject& X); PyObject* predict(const clfId_t id, CPyObject& X);

View File

@@ -1,8 +1,20 @@
#include "RandomForest.h" #include "RandomForest.h"
namespace pywrap { namespace pywrap {
std::string RandomForest::version() RandomForest::RandomForest() : PyClassifier("sklearn.ensemble", "RandomForestClassifier", true)
{ {
return sklearnVersion(); validHyperparameters = { "n_estimators", "n_jobs", "random_state" };
}
int RandomForest::getNumberOfEdges() const
{
return callMethodSumOfItems("get_n_leaves");
}
int RandomForest::getNumberOfStates() const
{
return callMethodSumOfItems("get_depth");
}
int RandomForest::getNumberOfNodes() const
{
return callMethodSumOfItems("node_count");
} }
} /* namespace pywrap */ } /* namespace pywrap */

View File

@@ -5,9 +5,11 @@
namespace pywrap { namespace pywrap {
class RandomForest : public PyClassifier { class RandomForest : public PyClassifier {
public: public:
RandomForest() : PyClassifier("sklearn.ensemble", "RandomForestClassifier") {}; RandomForest();
~RandomForest() = default; ~RandomForest() = default;
std::string version(); int getNumberOfEdges() const override;
int getNumberOfStates() const override;
int getNumberOfNodes() const override;
}; };
} /* namespace pywrap */ } /* namespace pywrap */
#endif /* RANDOMFOREST_H */ #endif /* RANDOMFOREST_H */

View File

@@ -1,15 +1,24 @@
#include "STree.h" #include "STree.h"
namespace pywrap { namespace pywrap {
STree::STree() : PyClassifier("stree", "Stree")
{
validHyperparameters = { "C", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy", "gamma", "max_features", "degree" };
};
int STree::getNumberOfNodes() const
{
return callMethodInt("get_nodes");
}
int STree::getNumberOfEdges() const
{
return callMethodInt("get_leaves");
}
int STree::getNumberOfStates() const
{
return callMethodInt("get_depth");
}
std::string STree::graph() std::string STree::graph()
{ {
return callMethodString("graph"); return callMethodString("graph");
} }
void STree::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "C", "n_jobs", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */ } /* namespace pywrap */

View File

@@ -6,10 +6,12 @@
namespace pywrap { namespace pywrap {
class STree : public PyClassifier { class STree : public PyClassifier {
public: public:
STree() : PyClassifier("stree", "Stree") {}; STree();
~STree() = default; ~STree() = default;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
std::string graph(); std::string graph();
void setHyperparameters(const nlohmann::json& hyperparameters) override;
}; };
} /* namespace pywrap */ } /* namespace pywrap */
#endif /* STREE_H */ #endif /* STREE_H */

View File

@@ -1,15 +1,8 @@
#include "SVC.h" #include "SVC.h"
namespace pywrap { namespace pywrap {
std::string SVC::version() SVC::SVC() : PyClassifier("sklearn.svm", "SVC", true)
{ {
return sklearnVersion(); validHyperparameters = { "C", "gamma", "kernel", "random_state" };
}
void SVC::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "C", "gamma", "kernel", "random_state" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
} }
} /* namespace pywrap */ } /* namespace pywrap */

View File

@@ -5,11 +5,9 @@
namespace pywrap { namespace pywrap {
class SVC : public PyClassifier { class SVC : public PyClassifier {
public: public:
SVC() : PyClassifier("sklearn.svm", "SVC") {}; SVC();
~SVC() = default; ~SVC() = default;
std::string version();
void setHyperparameters(const nlohmann::json& hyperparameters) override;
}; };
} /* namespace pywrap */ } /* namespace pywrap */
#endif /* STREE_H */ #endif /* SVC_H */

835
stree_results.json Normal file
View File

@@ -0,0 +1,835 @@
[
{
"date": "2021-04-11",
"time": "18:46:29",
"type": "crossval",
"classifier": "stree",
"dataset": "balance-scale",
"accuracy": "0.97056",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 10000.0, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}",
"time_spent": "0.0135214",
"time_spent_std": "0.00111213",
"accuracy_std": "0.0150468",
"nodes": "7.0",
"leaves": "4.0",
"depth": "3.0"
},
{
"date": "2021-04-11",
"time": "18:46:29",
"type": "crossval",
"classifier": "stree",
"dataset": "balloons",
"accuracy": "0.86",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}",
"time_spent": "0.000804768",
"time_spent_std": "7.74797e-05",
"accuracy_std": "0.285015",
"nodes": "3.0",
"leaves": "2.0",
"depth": "2.0"
},
{
"date": "2021-04-11",
"time": "18:46:29",
"type": "crossval",
"classifier": "stree",
"dataset": "breast-cancer-wisc-diag",
"accuracy": "0.972764",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.2, \"max_iter\": 10000.0}",
"time_spent": "0.00380772",
"time_spent_std": "0.000638676",
"accuracy_std": "0.0173132",
"nodes": "3.24",
"leaves": "2.12",
"depth": "2.12"
},
{
"date": "2021-04-11",
"time": "18:46:30",
"type": "crossval",
"classifier": "stree",
"dataset": "breast-cancer-wisc-prog",
"accuracy": "0.811128",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.2, \"max_iter\": 10000.0}",
"time_spent": "0.00767535",
"time_spent_std": "0.00148114",
"accuracy_std": "0.0584601",
"nodes": "5.84",
"leaves": "3.42",
"depth": "3.24"
},
{
"date": "2021-04-11",
"time": "18:46:31",
"type": "crossval",
"classifier": "stree",
"dataset": "breast-cancer-wisc",
"accuracy": "0.966661",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.00652217",
"time_spent_std": "0.000726579",
"accuracy_std": "0.0139421",
"nodes": "8.88",
"leaves": "4.94",
"depth": "4.08"
},
{
"date": "2021-04-11",
"time": "18:46:32",
"type": "crossval",
"classifier": "stree",
"dataset": "breast-cancer",
"accuracy": "0.734211",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.023475",
"time_spent_std": "0.00584447",
"accuracy_std": "0.0479774",
"nodes": "21.72",
"leaves": "11.36",
"depth": "5.86"
},
{
"date": "2021-04-11",
"time": "18:49:08",
"type": "crossval",
"classifier": "stree",
"dataset": "cardiotocography-10clases",
"accuracy": "0.791487",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "3.10582",
"time_spent_std": "0.339218",
"accuracy_std": "0.0192082",
"nodes": "160.76",
"leaves": "80.88",
"depth": "22.86"
},
{
"date": "2021-04-11",
"time": "18:50:01",
"type": "crossval",
"classifier": "stree",
"dataset": "cardiotocography-3clases",
"accuracy": "0.900613",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "1.05228",
"time_spent_std": "0.138768",
"accuracy_std": "0.0154004",
"nodes": "47.68",
"leaves": "24.34",
"depth": "8.84"
},
{
"date": "2021-04-11",
"time": "18:50:01",
"type": "crossval",
"classifier": "stree",
"dataset": "conn-bench-sonar-mines-rocks",
"accuracy": "0.755528",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.011577",
"time_spent_std": "0.00341148",
"accuracy_std": "0.0678424",
"nodes": "6.08",
"leaves": "3.54",
"depth": "2.86"
},
{
"date": "2021-04-11",
"time": "18:50:17",
"type": "crossval",
"classifier": "stree",
"dataset": "cylinder-bands",
"accuracy": "0.715049",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.301143",
"time_spent_std": "0.109773",
"accuracy_std": "0.0367646",
"nodes": "26.2",
"leaves": "13.6",
"depth": "6.82"
},
{
"date": "2021-04-11",
"time": "18:50:19",
"type": "crossval",
"classifier": "stree",
"dataset": "dermatology",
"accuracy": "0.971833",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 55, \"max_iter\": 10000.0}",
"time_spent": "0.0377538",
"time_spent_std": "0.010726",
"accuracy_std": "0.0206883",
"nodes": "11.0",
"leaves": "6.0",
"depth": "6.0"
},
{
"date": "2021-04-11",
"time": "18:50:19",
"type": "crossval",
"classifier": "stree",
"dataset": "echocardiogram",
"accuracy": "0.814758",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_features\": \"auto\", \"max_iter\": 10000.0}",
"time_spent": "0.00333449",
"time_spent_std": "0.000964686",
"accuracy_std": "0.0998078",
"nodes": "7.0",
"leaves": "4.0",
"depth": "3.54"
},
{
"date": "2021-04-11",
"time": "18:50:20",
"type": "crossval",
"classifier": "stree",
"dataset": "fertility",
"accuracy": "0.88",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.05, \"max_features\": \"auto\", \"max_iter\": 10000.0}",
"time_spent": "0.00090271",
"time_spent_std": "8.96446e-05",
"accuracy_std": "0.0547723",
"nodes": "1.0",
"leaves": "1.0",
"depth": "1.0"
},
{
"date": "2021-04-11",
"time": "18:50:21",
"type": "crossval",
"classifier": "stree",
"dataset": "haberman-survival",
"accuracy": "0.735637",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.0171611",
"time_spent_std": "0.00334945",
"accuracy_std": "0.0434614",
"nodes": "23.4",
"leaves": "12.2",
"depth": "5.98"
},
{
"date": "2021-04-11",
"time": "18:50:21",
"type": "crossval",
"classifier": "stree",
"dataset": "heart-hungarian",
"accuracy": "0.827522",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.05, \"max_iter\": 10000.0}",
"time_spent": "0.00493946",
"time_spent_std": "0.000738198",
"accuracy_std": "0.0505283",
"nodes": "10.16",
"leaves": "5.58",
"depth": "4.0"
},
{
"date": "2021-04-11",
"time": "18:50:21",
"type": "crossval",
"classifier": "stree",
"dataset": "hepatitis",
"accuracy": "0.824516",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}",
"time_spent": "0.0021534",
"time_spent_std": "0.000133715",
"accuracy_std": "0.0738872",
"nodes": "3.0",
"leaves": "2.0",
"depth": "2.0"
},
{
"date": "2021-04-11",
"time": "18:50:23",
"type": "crossval",
"classifier": "stree",
"dataset": "ilpd-indian-liver",
"accuracy": "0.723498",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.0345243",
"time_spent_std": "0.015789",
"accuracy_std": "0.0384886",
"nodes": "16.04",
"leaves": "8.52",
"depth": "5.28"
},
{
"date": "2021-04-11",
"time": "18:50:24",
"type": "crossval",
"classifier": "stree",
"dataset": "ionosphere",
"accuracy": "0.953276",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}",
"time_spent": "0.00881722",
"time_spent_std": "0.000843108",
"accuracy_std": "0.0238537",
"nodes": "3.16",
"leaves": "2.08",
"depth": "2.08"
},
{
"date": "2021-04-11",
"time": "18:50:24",
"type": "crossval",
"classifier": "stree",
"dataset": "iris",
"accuracy": "0.965333",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.00357342",
"time_spent_std": "0.000400509",
"accuracy_std": "0.0319444",
"nodes": "5.0",
"leaves": "3.0",
"depth": "3.0"
},
{
"date": "2021-04-11",
"time": "18:50:36",
"type": "crossval",
"classifier": "stree",
"dataset": "led-display",
"accuracy": "0.703",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.222106",
"time_spent_std": "0.0116922",
"accuracy_std": "0.0291204",
"nodes": "47.16",
"leaves": "24.08",
"depth": "17.76"
},
{
"date": "2021-04-11",
"time": "18:51:18",
"type": "crossval",
"classifier": "stree",
"dataset": "libras",
"accuracy": "0.788611",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.08, \"max_iter\": 10000.0}",
"time_spent": "0.841714",
"time_spent_std": "0.0830966",
"accuracy_std": "0.0516913",
"nodes": "82.28",
"leaves": "41.64",
"depth": "28.84"
},
{
"date": "2021-04-11",
"time": "18:51:41",
"type": "crossval",
"classifier": "stree",
"dataset": "low-res-spect",
"accuracy": "0.883782",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.05, \"max_iter\": 10000.0}",
"time_spent": "0.446301",
"time_spent_std": "0.0411822",
"accuracy_std": "0.0324593",
"nodes": "27.4",
"leaves": "14.2",
"depth": "10.74"
},
{
"date": "2021-04-11",
"time": "18:51:41",
"type": "crossval",
"classifier": "stree",
"dataset": "lymphography",
"accuracy": "0.835034",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.05, \"max_iter\": 10000.0}",
"time_spent": "0.00539465",
"time_spent_std": "0.000754365",
"accuracy_std": "0.0590649",
"nodes": "9.04",
"leaves": "5.02",
"depth": "4.48"
},
{
"date": "2021-04-11",
"time": "18:51:43",
"type": "crossval",
"classifier": "stree",
"dataset": "mammographic",
"accuracy": "0.81915",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.0227931",
"time_spent_std": "0.00328533",
"accuracy_std": "0.0222517",
"nodes": "7.4",
"leaves": "4.2",
"depth": "4.0"
},
{
"date": "2021-04-11",
"time": "18:51:43",
"type": "crossval",
"classifier": "stree",
"dataset": "molec-biol-promoter",
"accuracy": "0.767056",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.05, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_iter\": 10000.0}",
"time_spent": "0.00130273",
"time_spent_std": "0.000105772",
"accuracy_std": "0.0910923",
"nodes": "3.0",
"leaves": "2.0",
"depth": "2.0"
},
{
"date": "2021-04-11",
"time": "18:51:44",
"type": "crossval",
"classifier": "stree",
"dataset": "musk-1",
"accuracy": "0.916388",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.05, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_iter\": 10000.0}",
"time_spent": "0.0116367",
"time_spent_std": "0.000331845",
"accuracy_std": "0.0275208",
"nodes": "3.0",
"leaves": "2.0",
"depth": "2.0"
},
{
"date": "2021-04-11",
"time": "18:51:55",
"type": "crossval",
"classifier": "stree",
"dataset": "oocytes_merluccius_nucleus_4d",
"accuracy": "0.835125",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 8.25, \"gamma\": 0.1, \"kernel\": \"poly\"}",
"time_spent": "0.208895",
"time_spent_std": "0.0270573",
"accuracy_std": "0.0220961",
"nodes": "10.52",
"leaves": "5.76",
"depth": "4.42"
},
{
"date": "2021-04-11",
"time": "18:52:04",
"type": "crossval",
"classifier": "stree",
"dataset": "oocytes_merluccius_states_2f",
"accuracy": "0.915365",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.182198",
"time_spent_std": "0.0294267",
"accuracy_std": "0.020396",
"nodes": "18.04",
"leaves": "9.52",
"depth": "5.3"
},
{
"date": "2021-04-11",
"time": "18:52:41",
"type": "crossval",
"classifier": "stree",
"dataset": "oocytes_trisopterus_nucleus_2f",
"accuracy": "0.800986",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.717113",
"time_spent_std": "0.209608",
"accuracy_std": "0.0218449",
"nodes": "29.88",
"leaves": "15.44",
"depth": "7.38"
},
{
"date": "2021-04-11",
"time": "18:52:44",
"type": "crossval",
"classifier": "stree",
"dataset": "oocytes_trisopterus_states_5b",
"accuracy": "0.922249",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.11, \"max_iter\": 10000.0}",
"time_spent": "0.0545047",
"time_spent_std": "0.00853014",
"accuracy_std": "0.0179203",
"nodes": "7.44",
"leaves": "4.22",
"depth": "3.6"
},
{
"date": "2021-04-11",
"time": "18:52:44",
"type": "crossval",
"classifier": "stree",
"dataset": "parkinsons",
"accuracy": "0.882051",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.00795048",
"time_spent_std": "0.00176761",
"accuracy_std": "0.0478327",
"nodes": "8.48",
"leaves": "4.74",
"depth": "3.76"
},
{
"date": "2021-04-11",
"time": "18:52:48",
"type": "crossval",
"classifier": "stree",
"dataset": "pima",
"accuracy": "0.766651",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.0750048",
"time_spent_std": "0.0213995",
"accuracy_std": "0.0297203",
"nodes": "17.4",
"leaves": "9.2",
"depth": "5.66"
},
{
"date": "2021-04-11",
"time": "18:52:48",
"type": "crossval",
"classifier": "stree",
"dataset": "pittsburg-bridges-MATERIAL",
"accuracy": "0.867749",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}",
"time_spent": "0.00293318",
"time_spent_std": "0.000331469",
"accuracy_std": "0.0712226",
"nodes": "5.16",
"leaves": "3.08",
"depth": "3.02"
},
{
"date": "2021-04-11",
"time": "18:52:49",
"type": "crossval",
"classifier": "stree",
"dataset": "pittsburg-bridges-REL-L",
"accuracy": "0.632238",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.0136311",
"time_spent_std": "0.00322964",
"accuracy_std": "0.101211",
"nodes": "16.32",
"leaves": "8.66",
"depth": "5.96"
},
{
"date": "2021-04-11",
"time": "18:52:50",
"type": "crossval",
"classifier": "stree",
"dataset": "pittsburg-bridges-SPAN",
"accuracy": "0.659766",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.05, \"max_iter\": 10000.0}",
"time_spent": "0.00524256",
"time_spent_std": "0.00158822",
"accuracy_std": "0.1165",
"nodes": "9.84",
"leaves": "5.42",
"depth": "4.58"
},
{
"date": "2021-04-11",
"time": "18:52:50",
"type": "crossval",
"classifier": "stree",
"dataset": "pittsburg-bridges-T-OR-D",
"accuracy": "0.861619",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.00295627",
"time_spent_std": "0.000578594",
"accuracy_std": "0.0693747",
"nodes": "4.56",
"leaves": "2.78",
"depth": "2.68"
},
{
"date": "2021-04-11",
"time": "18:52:50",
"type": "crossval",
"classifier": "stree",
"dataset": "planning",
"accuracy": "0.73527",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 7, \"gamma\": 10.0, \"kernel\": \"rbf\", \"max_iter\": 10000.0}",
"time_spent": "0.0030475",
"time_spent_std": "0.000172266",
"accuracy_std": "0.0669776",
"nodes": "3.0",
"leaves": "2.0",
"depth": "2.0"
},
{
"date": "2021-04-11",
"time": "18:52:51",
"type": "crossval",
"classifier": "stree",
"dataset": "post-operative",
"accuracy": "0.711111",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 55, \"degree\": 5, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_iter\": 10000.0}",
"time_spent": "0.0018727",
"time_spent_std": "0.000481977",
"accuracy_std": "0.0753592",
"nodes": "2.64",
"leaves": "1.82",
"depth": "1.82"
},
{
"date": "2021-04-11",
"time": "18:52:52",
"type": "crossval",
"classifier": "stree",
"dataset": "seeds",
"accuracy": "0.952857",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 10000.0, \"max_iter\": 10000.0}",
"time_spent": "0.0203492",
"time_spent_std": "0.00518065",
"accuracy_std": "0.0279658",
"nodes": "9.88",
"leaves": "5.44",
"depth": "4.44"
},
{
"date": "2021-04-11",
"time": "18:52:52",
"type": "crossval",
"classifier": "stree",
"dataset": "statlog-australian-credit",
"accuracy": "0.678261",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.05, \"max_features\": \"auto\", \"max_iter\": 10000.0}",
"time_spent": "0.00205337",
"time_spent_std": "0.00083162",
"accuracy_std": "0.0390498",
"nodes": "1.32",
"leaves": "1.16",
"depth": "1.16"
},
{
"date": "2021-04-11",
"time": "18:53:07",
"type": "crossval",
"classifier": "stree",
"dataset": "statlog-german-credit",
"accuracy": "0.7625",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.290754",
"time_spent_std": "0.0653152",
"accuracy_std": "0.0271892",
"nodes": "21.24",
"leaves": "11.12",
"depth": "6.18"
},
{
"date": "2021-04-11",
"time": "18:53:09",
"type": "crossval",
"classifier": "stree",
"dataset": "statlog-heart",
"accuracy": "0.822963",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.0138923",
"time_spent_std": "0.00323664",
"accuracy_std": "0.044004",
"nodes": "14.56",
"leaves": "7.78",
"depth": "5.0"
},
{
"date": "2021-04-11",
"time": "18:56:43",
"type": "crossval",
"classifier": "stree",
"dataset": "statlog-image",
"accuracy": "0.955931",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 7, \"max_iter\": 10000.0}",
"time_spent": "4.27584",
"time_spent_std": "0.200362",
"accuracy_std": "0.00956073",
"nodes": "36.92",
"leaves": "18.96",
"depth": "10.8"
},
{
"date": "2021-04-11",
"time": "18:56:57",
"type": "crossval",
"classifier": "stree",
"dataset": "statlog-vehicle",
"accuracy": "0.793028",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.278833",
"time_spent_std": "0.0392173",
"accuracy_std": "0.030104",
"nodes": "23.88",
"leaves": "12.44",
"depth": "7.06"
},
{
"date": "2021-04-11",
"time": "18:57:07",
"type": "crossval",
"classifier": "stree",
"dataset": "synthetic-control",
"accuracy": "0.95",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.55, \"max_iter\": 10000.0}",
"time_spent": "0.205184",
"time_spent_std": "0.040793",
"accuracy_std": "0.0253859",
"nodes": "12.48",
"leaves": "6.74",
"depth": "6.5"
},
{
"date": "2021-04-11",
"time": "18:57:08",
"type": "crossval",
"classifier": "stree",
"dataset": "tic-tac-toe",
"accuracy": "0.984444",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.2, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_iter\": 10000.0}",
"time_spent": "0.0123015",
"time_spent_std": "0.000423728",
"accuracy_std": "0.00838747",
"nodes": "3.0",
"leaves": "2.0",
"depth": "2.0"
},
{
"date": "2021-04-11",
"time": "18:57:09",
"type": "crossval",
"classifier": "stree",
"dataset": "vertebral-column-2clases",
"accuracy": "0.852903",
"norm": 1,
"stand": 0,
"parameters": "{}",
"time_spent": "0.00576833",
"time_spent_std": "0.000910332",
"accuracy_std": "0.0408851",
"nodes": "6.04",
"leaves": "3.52",
"depth": "3.34"
},
{
"date": "2021-04-11",
"time": "18:57:09",
"type": "crossval",
"classifier": "stree",
"dataset": "wine",
"accuracy": "0.979159",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.55, \"max_iter\": 10000.0}",
"time_spent": "0.0019741",
"time_spent_std": "0.000137745",
"accuracy_std": "0.022427",
"nodes": "5.0",
"leaves": "3.0",
"depth": "3.0"
},
{
"date": "2021-04-11",
"time": "18:57:10",
"type": "crossval",
"classifier": "stree",
"dataset": "zoo",
"accuracy": "0.957524",
"norm": 1,
"stand": 0,
"parameters": "{\"C\": 0.1, \"max_iter\": 10000.0}",
"time_spent": "0.00556221",
"time_spent_std": "0.000230106",
"accuracy_std": "0.0454615",
"nodes": "13.04",
"leaves": "7.02",
"depth": "7.02"
}
]