Compare commits

...

75 Commits

Author SHA1 Message Date
f3b8150e2c Add notes to Classifier & Changelog 2024-02-12 10:58:20 +01:00
03f8b8653b Add getNotes test 2024-02-09 12:06:19 +01:00
2163e95c4a add getNotes method 2024-02-09 10:57:19 +01:00
b33da34655 Add notes to Classifier & use them in BoostAODE 2024-02-08 18:01:09 +01:00
e17aee7bdb Remove argparse module 2024-01-09 18:02:17 +01:00
37c31ee4c2 Update libraries 2024-01-08 17:45:11 +01:00
80afdc06f7 Remove unneeded argparse module 2024-01-08 00:55:16 +01:00
Ricardo Montañana Gómez
666782217e Merge pull request #1 from rmontanana/library
Remove other projects' sources
2024-01-07 20:01:37 +01:00
55af0714cd Remove other projects' sources 2024-01-07 19:58:22 +01:00
6ef5ca541a Add app version to command line utils 2024-01-06 22:38:34 +01:00
4364317411 Merge pull request 'Refactor mpi grid search process using the producer consumer pattern' (#15) from producer_consumer into main
Reviewed-on: #15
2024-01-04 15:24:48 +00:00
65a96851ef Check min number of nested folds 2024-01-04 11:01:59 +01:00
722da7f781 Keep only mpi b_grid compute 2024-01-04 01:21:56 +01:00
b1833a5feb Add reset color to final progress bar 2024-01-03 22:45:16 +01:00
41a0bd4ddd fix dataset name mistakes 2024-01-03 17:15:57 +01:00
9ab4fc7d76 Fix some mistakes in methods 2024-01-03 11:53:46 +01:00
beadb7465f Complete first approach 2023-12-31 12:02:13 +01:00
652e5f623f Add todo comments 2023-12-28 23:32:24 +01:00
b7fef9a99d Remove kk file 2023-12-28 23:24:59 +01:00
343269d48c Fix syntax errors 2023-12-28 23:21:50 +01:00
21c4c6df51 Fix first mistakes in structure 2023-12-25 19:33:52 +01:00
702f086706 Update miniconda instructions 2023-12-23 19:54:00 +01:00
981bc8f98b Fix install message in readme 2023-12-23 01:00:55 +01:00
e0b7b2d316 Set structure & protocol of producer-consumer 2023-12-22 12:47:13 +01:00
9b9e91e856 Merge pull request 'mpi_grid' (#14) from mpi_grid into main
Reviewed-on: #14
2023-12-18 09:05:55 +00:00
18e8e84284 Add openmpi instructions for Oracle Linux 2023-12-17 12:19:50 +01:00
7de11b0e6d Fix format of duration 2023-12-17 01:45:04 +01:00
9b8db37a4b Fix duration of task not set 2023-12-16 19:31:45 +01:00
49b26bd04b fix duration output 2023-12-16 12:53:25 +01:00
b5b5b48864 Update grid progress bar output 2023-12-15 18:09:17 +01:00
19586a3a5a Fix pesky error allocating memory in workers 2023-12-15 01:54:13 +01:00
ffe6d37436 Add messages to control trace 2023-12-14 21:06:43 +01:00
b73f4be146 First try with complete algorithm 2023-12-14 15:55:08 +01:00
dbf2f35502 First compiling version 2023-12-12 18:57:57 +01:00
db9e80a70e Create build tasks 2023-12-12 12:15:22 +01:00
40ae4ad7f9 Include mpi in CMakeLists 2023-12-11 09:06:05 +01:00
234342f2de Add mpi parameter to b_grid 2023-12-10 22:33:17 +01:00
aa0936abd1 Add --exclude parameter to b_grid to exclude datasets 2023-12-08 12:09:08 +01:00
f0d6f0cc38 Fix sample building 2023-12-04 19:12:44 +01:00
cc316bb8d3 Add colors to results of gridsearch 2023-12-04 17:34:00 +01:00
0723564e66 Fix some output in gridsearch 2023-12-03 17:55:44 +01:00
2e95e8999d Complete nested gridsearch 2023-12-03 12:37:25 +01:00
fb9b395748 Begin output nested grid 2023-12-02 13:19:12 +01:00
03e4437fea refactor gridsearch to have only one go method 2023-12-02 10:59:05 +01:00
33cd32c639 Add header to grid output and report 2023-12-01 10:30:53 +01:00
c460ef46ed Refactor gridsearch method 2023-11-30 11:01:37 +01:00
dee9c674da Refactor grid input hyperparameter file 2023-11-29 18:24:34 +01:00
e3f6dc1e0b Fix tolerance hyperp error & gridsearch 2023-11-29 12:33:50 +01:00
460d20a402 Add reports to gridsearch 2023-11-29 00:26:48 +01:00
8dbbb65a2f Add only parameter to gridsearch 2023-11-28 10:08:40 +01:00
d06bf187b2 Implement Random Forest nodes/leaves/depth 2023-11-28 00:35:38 +01:00
4addaefb47 Implement sklearn version in PyWrap 2023-11-27 22:34:34 +01:00
82964190f6 Add nodes/leaves/depth to STree & ODTE 2023-11-27 10:57:57 +01:00
4fefe9a1d2 Add grid input info to grid output 2023-11-26 16:07:32 +01:00
7c12dd25e5 Fix upper case typo 2023-11-26 10:55:32 +01:00
c713c0b1df Add continue from parameter to gridsearch 2023-11-26 10:36:09 +01:00
64069a6cb7 Adapt b_main to the new hyperparam file format 2023-11-25 16:52:25 +01:00
ba2a3f9523 Merge pull request 'gridsearch' (#13) from gridsearch into main
Reviewed-on: #13
2023-11-25 11:16:13 +00:00
f94e2d6a27 Add quiet parameter 2023-11-24 21:16:20 +01:00
2121ba9b98 Refactor input grid parameters to json file 2023-11-24 09:57:29 +01:00
8b7b59d42b Complete first step 2023-11-23 12:59:21 +01:00
bbe5302ab1 Add info to output 2023-11-22 16:38:50 +01:00
c2eb727fc7 Complete output interface of gridsearch 2023-11-22 16:30:04 +01:00
fb347ed5b9 Begin gridsearch implementation 2023-11-22 12:22:30 +01:00
b657762c0c Generate combinations sample 2023-11-22 00:18:24 +01:00
495d8a8528 Begin implementing grid combinations 2023-11-21 13:11:14 +01:00
4628e48d3c Build gridsearch structure 2023-11-20 23:32:34 +01:00
5876be4b24 Add more install instructions of Boost to README 2023-11-20 20:39:22 +01:00
dc3400197f Add coment todo impelemt number of nodes 2023-11-20 01:14:13 +01:00
26d3a57782 Add info to invalid hyperparameter exception 2023-11-19 23:02:28 +01:00
4f3a04058f Refactor Hyperparameters management 2023-11-19 22:36:27 +01:00
89c4613591 Implement hyperparameters with json file 2023-11-18 11:56:10 +01:00
28f3d87e32 Add Python Classifiers
Add STree, Odte, SVC & RandomForest Classifiers
Remove using namespace ... in project
2023-11-17 11:11:05 +01:00
64f5a7f14a Fix header in example 2023-11-16 17:03:40 +01:00
e03efb5f63 set tolerance=0 if feature selection in BoostAODE 2023-11-14 10:12:02 +01:00
111 changed files with 249 additions and 7576 deletions

View File

@@ -1,31 +0,0 @@
compilation_database_dir: build
output_directory: puml
diagrams:
BayesNet:
type: class
glob:
- src/BayesNet/*.cc
- src/Platform/*.cc
using_namespace: bayesnet
include:
namespaces:
- bayesnet
- platform
plantuml:
after:
- "note left of {{ alias(\"MyProjectMain\") }}: Main class of myproject library."
sequence:
type: sequence
glob:
- src/Platform/main.cc
combine_free_functions_into_file_participants: true
using_namespace:
- std
- bayesnet
- platform
include:
paths:
- src/BayesNet
- src/Platform
start_from:
- function: main(int,const char **)

3
.gitignore vendored
View File

@@ -32,8 +32,7 @@
*.out
*.app
build/**
build_debug/**
build_release/**
build_*/**
*.dSYM/**
cmake-build*/**
.idea

15
.gitmodules vendored
View File

@@ -1,15 +1,18 @@
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"]
path = lib/catch2
main = v2.x
update = merge
url = https://github.com/catchorg/Catch2.git
[submodule "lib/argparse"]
path = lib/argparse
url = https://github.com/p-ranav/argparse
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
[submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git
master = master
update = merge
[submodule "lib/folding"]
path = lib/folding
url = https://github.com/rmontanana/folding

43
.vscode/launch.json vendored
View File

@@ -14,14 +14,14 @@
"-s",
"271",
"-p",
"/home/rmontanana/Code/discretizbench/datasets/",
"/Users/rmontanana/Code/discretizbench/datasets/",
],
//"cwd": "${workspaceFolder}/build/sample/",
},
{
"type": "lldb",
"request": "launch",
"name": "experiment",
"name": "experimentPy",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
@@ -33,6 +33,39 @@
// "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "gridsearch",
"program": "${workspaceFolder}/build_debug/src/Platform/b_grid",
"args": [
"-m",
"KDB",
"--discretize",
"--continue",
"glass",
"--only",
"--compute"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "experimentBayes",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"TAN",
"--stratified",
"--discretize",
"-d",
"iris",
"--hyperparameters",
"{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/home/rmontanana/Code/discretizbench",
},
{
@@ -47,7 +80,7 @@
"accuracy",
"--build",
],
"cwd": "/home/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
@@ -58,7 +91,7 @@
"-n",
"20"
],
"cwd": "/home/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
@@ -67,7 +100,7 @@
"program": "${workspaceFolder}/build_debug/src/Platform/b_list",
"args": [],
//"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "/home/rmontanana/Code/covbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",

23
CHANGELOG.md Normal file
View File

@@ -0,0 +1,23 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.0.1] - 2024-02-12
### Added
- Notes in Classifier class
- BoostAODE: Add note with used features in initialization with feature selection
- BoostAODE: Add note with the number of models
- BoostAODE: Add note with the number of features used to create models if not all features are used
- Test version number in TestBayesModels
- Add tests with feature_select and notes on BoostAODE
### Fixed
- Network predict test
- Network predict_proba test
- Network score test

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20)
project(BayesNet
VERSION 0.2.0
VERSION 1.0.1
DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX
@@ -25,27 +25,12 @@ set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# Options
# -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
# Boost Library
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.66.0 REQUIRED COMPONENTS python3 numpy3)
if(Boost_FOUND)
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
include_directories(${Boost_INCLUDE_DIRS})
endif()
# Python
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}")
# CMakes modules
# --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
@@ -67,26 +52,16 @@ endif (ENABLE_CLANG_TIDY)
# ---------------------------------------------
# include(FetchContent)
add_git_submodule("lib/mdlp")
add_git_submodule("lib/argparse")
add_git_submodule("lib/json")
find_library(XLSXWRITER_LIB NAMES libxlsxwriter.dylib libxlsxwriter.so PATHS ${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/lib)
message("XLSXWRITER_LIB=${XLSXWRITER_LIB}")
# Subdirectories
# --------------
add_subdirectory(config)
add_subdirectory(lib/Files)
add_subdirectory(src/BayesNet)
add_subdirectory(src/Platform)
add_subdirectory(src/PyClassifiers)
add_subdirectory(sample)
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h)
file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cc ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cpp)
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform/*.cc ${BayesNet_SOURCE_DIR}/src/Platform/*.cpp)
# Testing
# -------

View File

@@ -1,11 +1,11 @@
SHELL := /bin/bash
.DEFAULT_GOAL := help
.PHONY: coverage setup help build test clean debug release
.PHONY: coverage setup help buildr buildd test clean debug release
f_release = build_release
f_debug = build_debug
app_targets = b_best b_list b_main b_manage
test_targets = unit_tests_bayesnet unit_tests_platform
app_targets = BayesNet
test_targets = unit_tests_bayesnet
n_procs = -j 16
define ClearTests
@@ -31,16 +31,6 @@ setup: ## Install dependencies for tests and coverage
pip install gcovr; \
fi
dest ?= ${HOME}/bin
install: ## Copy binary files to bin folder
@echo "Destination folder: $(dest)"
make buildr
@echo ">>> Copying files to $(dest)"
@cp $(f_release)/src/Platform/b_main $(dest)
@cp $(f_release)/src/Platform/b_list $(dest)
@cp $(f_release)/src/Platform/b_manage $(dest)
@cp $(f_release)/src/Platform/b_best $(dest)
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
@echo ">>> Creating dependency graph diagram of the project...";
$(MAKE) debug
@@ -57,9 +47,6 @@ clean: ## Clean the tests info
$(call ClearTests)
@echo ">>> Done";
clang-uml: ## Create uml class and sequence diagrams
clang-uml -p --add-compile-flag -I /usr/lib/gcc/x86_64-redhat-linux/8/include/
debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet...";
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@@ -87,27 +74,10 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu
done
@echo ">>> Done";
opt = ""
testp: ## Run platform tests (opt="-s") to verbose output the tests, (opt="-c='Stratified Fold Test'") to run only that section
@echo ">>> Running Platform tests...";
@$(MAKE) clean
@cmake --build $(f_debug) --target unit_tests_platform $(n_procs)
@if [ -f $(f_debug)/tests/unit_tests_platform ]; then cd $(f_debug)/tests ; ./unit_tests_platform $(opt) ; fi ;
@echo ">>> Done";
opt = ""
testb: ## Run BayesNet tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet tests...";
@$(MAKE) clean
@cmake --build $(f_debug) --target unit_tests_bayesnet $(n_procs)
@if [ -f $(f_debug)/tests/unit_tests_bayesnet ]; then cd $(f_debug)/tests ; ./unit_tests_bayesnet $(opt) ; fi ;
@echo ">>> Done";
coverage: ## Run tests and generate coverage report (build/index.html)
@echo ">>> Building tests with coverage...";
@echo ">>> Building tests with coverage..."
@$(MAKE) test
@cd $(f_debug) ; \
gcovr --config ../gcovr.cfg tests ;
@gcovr $(f_debug)/tests
@echo ">>> Done";

View File

@@ -2,52 +2,21 @@
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
Bayesian Network Classifier with libtorch from scratch
## 0. Setup
Before compiling BayesNet.
### boost library
[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
The best option is install the packages that the Linux distribution have in its repository. If this is the case:
```bash
sudo dnf install boost-devel
```
If this is not possible and the compressed packaged is installed, the following environment variable has to be set:
```bash
export BOOST_ROOT=/path/to/library/
```
### libxlswriter
```bash
cd lib/libxlsxwriter
make
make install DESTDIR=/home/rmontanana/Code PREFIX=
```
Environment variable has to be set:
```bash
export LD_LIBRARY_PATH=/usr/local/lib
```
Bayesian Network Classifiers using libtorch from scratch
### Release
```bash
make release
make buildr
```
### Debug & Tests
```bash
make debug
make test
make coverage
```
## 1. Introduction

View File

@@ -7,7 +7,8 @@
#define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR @
#define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH @
static constexpr std::string_view project_name = " @PROJECT_NAME@ ";
static constexpr std::string_view project_name = "@PROJECT_NAME@";
static constexpr std::string_view project_version = "@PROJECT_VERSION@";
static constexpr std::string_view project_description = "@PROJECT_DESCRIPTION@";
static constexpr std::string_view git_sha = "@GIT_SHA@";
static constexpr std::string_view data_path = "@BayesNet_SOURCE_DIR@/tests/data/";

View File

@@ -1,25 +0,0 @@
Type Si
Type Fe
Type RI
Type Na
Type Ba
Type Ca
Type Al
Type K
Type Mg
Fe RI
Fe Ba
Fe Ca
RI Na
RI Ba
RI Ca
RI Al
RI K
RI Mg
Ba Ca
Ba Al
Ca Al
Ca K
Ca Mg
Al K
K Mg

View File

@@ -1,645 +0,0 @@
class att215
class att25
class att131
class att95
class att122
class att17
class att28
class att5
class att121
class att214
class att197
class att116
class att182
class att60
class att168
class att178
class att206
class att89
class att77
class att209
class att73
class att126
class att16
class att74
class att27
class att61
class att20
class att101
class att85
class att76
class att137
class att211
class att143
class att14
class att40
class att210
class att155
class att170
class att160
class att23
class att162
class att203
class att164
class att107
class att62
class att42
class att71
class att128
class att138
class att83
class att171
class att92
class att163
class att49
class att161
class att158
class att176
class att11
class att145
class att4
class att172
class att196
class att58
class att68
class att169
class att80
class att32
class att175
class att87
class att88
class att159
class att18
class att52
class att98
class att136
class att150
class att156
class att110
class att100
class att63
class att148
class att90
class att167
class att35
class att205
class att51
class att21
class att142
class att46
class att134
class att39
class att102
class att208
class att130
class att149
class att96
class att75
class att118
class att78
class att213
class att112
class att38
class att174
class att189
class att70
class att179
class att59
class att79
class att15
class att47
class att124
class att34
class att54
class att191
class att86
class att56
class att151
class att66
class att173
class att44
class att198
class att139
class att216
class att129
class att152
class att69
class att81
class att50
class att153
class att41
class att204
class att188
class att26
class att13
class att117
class att114
class att10
class att64
class att200
class att9
class att3
class att119
class att45
class att104
class att140
class att30
class att183
class att146
class att141
class att202
class att194
class att24
class att147
class att8
class att212
class att123
class att166
class att187
class att127
class att190
class att105
class att106
class att184
class att82
class att2
class att135
class att154
class att111
class att115
class att99
class att22
class att84
class att207
class att94
class att177
class att103
class att93
class att201
class att43
class att36
class att12
class att125
class att165
class att180
class att195
class att157
class att48
class att6
class att113
class att193
class att91
class att72
class att31
class att132
class att33
class att57
class att144
class att192
class att185
class att37
class att53
class att120
class att186
class att199
class att65
class att108
class att133
class att29
class att19
class att7
class att97
class att67
class att55
class att1
class att109
class att181
att215 att25
att215 att131
att215 att95
att25 att131
att25 att121
att25 att73
att25 att61
att25 att85
att25 att169
att25 att13
att131 att95
att131 att122
att131 att17
att131 att28
att131 att121
att131 att214
att131 att116
att131 att126
att131 att143
att95 att122
att95 att17
att95 att28
att95 att5
att95 att214
att95 att116
att95 att60
att95 att143
att95 att155
att95 att71
att122 att182
att122 att170
att17 att5
att17 att197
att17 att89
att17 att77
att17 att161
att28 att206
att28 att16
att28 att76
att28 att172
att28 att124
att28 att64
att5 att197
att5 att89
att5 att209
att121 att73
att214 att178
att214 att58
att214 att142
att197 att209
att197 att101
att116 att182
att116 att60
att116 att168
att116 att178
att116 att206
att116 att126
att116 att16
att116 att27
att116 att20
att116 att211
att116 att164
att116 att128
att182 att27
att182 att14
att60 att168
att60 att156
att168 att156
att168 att96
att178 att20
att178 att58
att178 att142
att178 att130
att206 att74
att206 att170
att206 att158
att89 att77
att89 att137
att89 att149
att89 att173
att77 att137
att77 att161
att209 att101
att209 att41
att73 att61
att73 att157
att126 att162
att126 att138
att126 att150
att16 att74
att16 att76
att16 att40
att16 att4
att74 att14
att74 att62
att27 att171
att61 att85
att61 att169
att20 att211
att20 att210
att20 att164
att20 att176
att101 att41
att85 att13
att76 att40
att76 att160
att137 att149
att211 att210
att211 att162
att211 att171
att211 att163
att211 att175
att211 att79
att143 att155
att143 att23
att143 att71
att143 att83
att143 att11
att14 att98
att40 att160
att40 att4
att40 att196
att40 att52
att210 att42
att210 att114
att155 att23
att155 att203
att155 att107
att155 att11
att170 att158
att160 att52
att23 att203
att162 att138
att162 att18
att162 att150
att162 att90
att162 att174
att203 att107
att203 att49
att203 att59
att203 att191
att203 att119
att164 att62
att164 att42
att164 att128
att164 att92
att164 att163
att164 att176
att164 att145
att164 att68
att164 att80
att164 att98
att164 att110
att164 att205
att164 att21
att164 att213
att164 att112
att164 att38
att164 att56
att164 att44
att107 att59
att107 att47
att107 att191
att71 att83
att71 att167
att71 att35
att128 att92
att138 att18
att83 att167
att171 att87
att171 att159
att171 att63
att171 att51
att171 att39
att171 att75
att163 att49
att163 att175
att163 att87
att163 att79
att163 att151
att163 att139
att163 att187
att163 att91
att161 att173
att176 att145
att176 att172
att176 att68
att176 att80
att176 att32
att176 att110
att176 att205
att176 att21
att176 att134
att176 att56
att4 att196
att4 att88
att4 att136
att4 att100
att4 att148
att4 att208
att172 att112
att172 att184
att196 att88
att196 att136
att196 att100
att196 att208
att58 att46
att68 att32
att32 att200
att87 att159
att87 att63
att87 att75
att87 att15
att87 att99
att159 att195
att18 att90
att18 att102
att18 att78
att18 att198
att52 att124
att98 att86
att150 att174
att150 att66
att156 att96
att156 att216
att156 att204
att156 att24
att156 att84
att100 att148
att63 att51
att63 att3
att63 att183
att90 att102
att90 att78
att167 att35
att167 att179
att35 att179
att51 att39
att51 att3
att21 att134
att21 att213
att21 att38
att21 att189
att21 att129
att21 att81
att21 att117
att21 att9
att142 att46
att142 att130
att142 att118
att142 att10
att142 att202
att142 att190
att142 att106
att46 att70
att46 att34
att46 att166
att134 att2
att102 att54
att130 att118
att130 att10
att130 att202
att149 att125
att96 att216
att96 att24
att75 att15
att75 att99
att118 att70
att78 att198
att213 att189
att38 att50
att38 att26
att174 att54
att174 att66
att174 att30
att189 att86
att189 att129
att189 att69
att189 att81
att189 att153
att189 att117
att189 att9
att189 att45
att189 att105
att70 att34
att59 att47
att79 att151
att79 att139
att79 att187
att79 att127
att79 att103
att79 att43
att79 att91
att79 att19
att124 att64
att54 att114
att54 att30
att191 att119
att86 att194
att56 att44
att56 att152
att56 att50
att56 att188
att56 att26
att56 att104
att56 att140
att56 att146
att56 att194
att56 att8
att56 att2
att56 att133
att56 att1
att173 att125
att173 att113
att44 att152
att44 att188
att44 att200
att44 att212
att44 att1
att139 att103
att139 att43
att139 att31
att139 att199
att139 att7
att216 att204
att216 att36
att216 att12
att216 att180
att216 att108
att129 att69
att152 att140
att69 att153
att81 att45
att153 att141
att41 att53
att204 att12
att13 att157
att114 att6
att114 att186
att10 att190
att64 att184
att200 att104
att9 att146
att9 att141
att9 att177
att9 att37
att9 att133
att9 att109
att9 att181
att3 att183
att3 att147
att3 att123
att3 att135
att3 att111
att45 att105
att45 att177
att45 att93
att45 att201
att45 att193
att45 att37
att45 att97
att140 att8
att30 att6
att183 att147
att183 att123
att202 att166
att202 att106
att202 att82
att24 att84
att24 att36
att147 att135
att8 att212
att166 att82
att187 att127
att187 att115
att127 att115
att105 att93
att106 att154
att82 att154
att82 att22
att135 att111
att135 att207
att154 att22
att154 att94
att111 att207
att22 att94
att84 att48
att177 att165
att103 att195
att103 att109
att93 att201
att93 att165
att93 att193
att93 att33
att201 att33
att201 att57
att36 att180
att36 att72
att36 att132
att36 att144
att125 att113
att125 att185
att125 att65
att125 att29
att180 att48
att180 att72
att180 att192
att180 att108
att6 att186
att113 att185
att113 att53
att193 att97
att91 att31
att91 att19
att72 att132
att72 att192
att31 att199
att31 att67
att132 att144
att132 att120
att33 att57
att144 att120
att185 att65
att199 att7
att199 att67
att199 att55
att65 att29
att67 att55
att109 att181

View File

@@ -1,859 +0,0 @@
class att215
class att25
class att131
class att95
class att122
class att17
class att28
class att5
class att121
class att214
class att197
class att116
class att182
class att60
class att168
class att178
class att206
class att89
class att77
class att209
class att73
class att126
class att16
class att74
class att27
class att61
class att20
class att101
class att85
class att76
class att137
class att211
class att143
class att14
class att40
class att210
class att155
class att170
class att160
class att23
class att162
class att203
class att164
class att107
class att62
class att42
class att71
class att128
class att138
class att83
class att171
class att92
class att163
class att49
class att161
class att158
class att176
class att11
class att145
class att4
class att172
class att196
class att58
class att68
class att169
class att80
class att32
class att175
class att87
class att88
class att159
class att18
class att52
class att98
class att136
class att150
class att156
class att110
class att100
class att63
class att148
class att90
class att167
class att35
class att205
class att51
class att21
class att142
class att46
class att134
class att39
class att102
class att208
class att130
class att149
class att96
class att75
class att118
class att78
class att213
class att112
class att38
class att174
class att189
class att70
class att179
class att59
class att79
class att15
class att47
class att124
class att34
class att54
class att191
class att86
class att56
class att151
class att66
class att173
class att44
class att198
class att139
class att216
class att129
class att152
class att69
class att81
class att50
class att153
class att41
class att204
class att188
class att26
class att13
class att117
class att114
class att10
class att64
class att200
class att9
class att3
class att119
class att45
class att104
class att140
class att30
class att183
class att146
class att141
class att202
class att194
class att24
class att147
class att8
class att212
class att123
class att166
class att187
class att127
class att190
class att105
class att106
class att184
class att82
class att2
class att135
class att154
class att111
class att115
class att99
class att22
class att84
class att207
class att94
class att177
class att103
class att93
class att201
class att43
class att36
class att12
class att125
class att165
class att180
class att195
class att157
class att48
class att6
class att113
class att193
class att91
class att72
class att31
class att132
class att33
class att57
class att144
class att192
class att185
class att37
class att53
class att120
class att186
class att199
class att65
class att108
class att133
class att29
class att19
class att7
class att97
class att67
class att55
class att1
class att109
class att181
att215 att25
att215 att131
att215 att95
att215 att17
att215 att214
att215 att143
att25 att131
att25 att95
att25 att122
att25 att121
att25 att73
att25 att61
att25 att85
att25 att169
att25 att13
att25 att157
att131 att95
att131 att122
att131 att17
att131 att28
att131 att5
att131 att121
att131 att214
att131 att116
att131 att182
att131 att60
att131 att126
att131 att16
att131 att27
att131 att20
att131 att143
att131 att155
att95 att122
att95 att17
att95 att28
att95 att5
att95 att121
att95 att214
att95 att197
att95 att116
att95 att60
att95 att168
att95 att178
att95 att143
att95 att155
att95 att23
att95 att71
att95 att167
att122 att28
att122 att182
att122 att170
att17 att5
att17 att197
att17 att89
att17 att77
att17 att209
att17 att137
att17 att161
att17 att41
att28 att206
att28 att16
att28 att76
att28 att40
att28 att210
att28 att160
att28 att172
att28 att124
att28 att64
att5 att197
att5 att89
att5 att77
att5 att209
att5 att101
att121 att73
att121 att61
att214 att116
att214 att178
att214 att206
att214 att58
att214 att142
att214 att46
att197 att89
att197 att209
att197 att101
att116 att182
att116 att60
att116 att168
att116 att178
att116 att206
att116 att73
att116 att126
att116 att16
att116 att74
att116 att27
att116 att20
att116 att211
att116 att164
att116 att128
att116 att92
att116 att176
att116 att68
att182 att27
att182 att14
att60 att168
att60 att156
att60 att96
att168 att126
att168 att156
att168 att96
att168 att216
att178 att20
att178 att211
att178 att58
att178 att142
att178 att130
att178 att166
att206 att74
att206 att170
att206 att158
att89 att77
att89 att137
att89 att149
att89 att173
att77 att137
att77 att161
att77 att149
att209 att101
att209 att41
att73 att61
att73 att85
att73 att13
att73 att157
att126 att162
att126 att138
att126 att18
att126 att150
att16 att74
att16 att76
att16 att40
att16 att4
att16 att196
att16 att136
att74 att14
att74 att62
att27 att171
att27 att63
att61 att85
att61 att169
att20 att76
att20 att211
att20 att210
att20 att170
att20 att164
att20 att128
att20 att176
att20 att80
att101 att41
att85 att169
att85 att13
att76 att14
att76 att40
att76 att160
att76 att4
att76 att52
att137 att161
att137 att149
att137 att173
att137 att125
att211 att210
att211 att162
att211 att164
att211 att62
att211 att42
att211 att171
att211 att163
att211 att175
att211 att79
att211 att151
att211 att43
att143 att155
att143 att23
att143 att203
att143 att71
att143 att83
att143 att11
att14 att98
att40 att160
att40 att4
att40 att196
att40 att88
att40 att52
att210 att162
att210 att42
att210 att114
att155 att23
att155 att203
att155 att107
att155 att11
att170 att158
att160 att52
att160 att124
att23 att203
att23 att107
att23 att71
att23 att11
att162 att138
att162 att18
att162 att150
att162 att90
att162 att102
att162 att174
att162 att66
att203 att107
att203 att49
att203 att59
att203 att47
att203 att191
att203 att119
att164 att62
att164 att42
att164 att128
att164 att171
att164 att92
att164 att163
att164 att158
att164 att176
att164 att145
att164 att172
att164 att58
att164 att68
att164 att80
att164 att32
att164 att98
att164 att156
att164 att110
att164 att205
att164 att21
att164 att134
att164 att213
att164 att112
att164 att38
att164 att189
att164 att56
att164 att44
att164 att152
att164 att8
att107 att83
att107 att49
att107 att59
att107 att47
att107 att191
att42 att138
att42 att54
att42 att114
att71 att83
att71 att167
att71 att35
att71 att179
att128 att92
att128 att112
att138 att18
att138 att150
att83 att167
att83 att35
att171 att87
att171 att159
att171 att63
att171 att51
att171 att39
att171 att75
att92 att163
att92 att145
att92 att56
att163 att49
att163 att175
att163 att87
att163 att79
att163 att151
att163 att139
att163 att187
att163 att127
att163 att103
att163 att91
att49 att37
att161 att173
att161 att113
att176 att145
att176 att172
att176 att68
att176 att80
att176 att32
att176 att175
att176 att98
att176 att110
att176 att205
att176 att21
att176 att134
att176 att213
att176 att56
att4 att196
att4 att88
att4 att136
att4 att100
att4 att148
att4 att208
att172 att112
att172 att184
att196 att88
att196 att136
att196 att100
att196 att148
att196 att208
att58 att142
att58 att46
att58 att34
att68 att32
att80 att38
att32 att110
att32 att21
att32 att44
att32 att200
att175 att87
att175 att159
att175 att79
att175 att187
att175 att115
att87 att159
att87 att63
att87 att51
att87 att75
att87 att15
att87 att99
att159 att75
att159 att15
att159 att195
att18 att90
att18 att102
att18 att78
att18 att198
att52 att124
att52 att64
att98 att86
att136 att100
att136 att208
att150 att90
att150 att174
att150 att66
att156 att205
att156 att96
att156 att216
att156 att204
att156 att24
att156 att84
att156 att36
att156 att12
att156 att108
att100 att148
att63 att51
att63 att39
att63 att3
att63 att183
att63 att147
att90 att102
att90 att78
att167 att35
att167 att179
att35 att179
att51 att39
att51 att3
att51 att183
att21 att134
att21 att213
att21 att38
att21 att189
att21 att129
att21 att81
att21 att153
att21 att117
att21 att9
att142 att46
att142 att130
att142 att118
att142 att70
att142 att10
att142 att202
att142 att190
att142 att106
att46 att130
att46 att118
att46 att70
att46 att34
att46 att166
att46 att82
att134 att2
att39 att3
att102 att78
att102 att174
att102 att54
att102 att198
att130 att118
att130 att10
att130 att202
att130 att190
att130 att106
att149 att125
att96 att216
att96 att204
att96 att24
att75 att15
att75 att99
att118 att70
att118 att10
att118 att202
att78 att198
att213 att189
att213 att129
att213 att69
att213 att81
att38 att50
att38 att26
att174 att54
att174 att66
att174 att30
att189 att86
att189 att129
att189 att69
att189 att81
att189 att153
att189 att117
att189 att9
att189 att45
att189 att141
att189 att105
att70 att34
att70 att154
att179 att59
att59 att47
att59 att191
att59 att119
att79 att86
att79 att151
att79 att139
att79 att187
att79 att127
att79 att103
att79 att43
att79 att193
att79 att91
att79 att19
att124 att64
att54 att114
att54 att30
att54 att6
att191 att119
att86 att194
att56 att44
att56 att152
att56 att50
att56 att188
att56 att26
att56 att200
att56 att104
att56 att140
att56 att146
att56 att194
att56 att8
att56 att2
att56 att133
att56 att1
att151 att139
att66 att30
att173 att125
att173 att113
att173 att185
att44 att152
att44 att50
att44 att188
att44 att200
att44 att104
att44 att140
att44 att194
att44 att212
att44 att1
att139 att26
att139 att99
att139 att103
att139 att43
att139 att91
att139 att31
att139 att199
att139 att7
att216 att204
att216 att24
att216 att84
att216 att36
att216 att12
att216 att180
att216 att108
att129 att69
att152 att188
att152 att140
att69 att153
att69 att9
att69 att177
att81 att45
att81 att105
att153 att117
att153 att141
att41 att53
att204 att12
att204 att180
att188 att146
att188 att212
att13 att157
att114 att6
att114 att186
att10 att190
att64 att184
att200 att104
att9 att45
att9 att146
att9 att141
att9 att177
att9 att37
att9 att133
att9 att109
att9 att181
att3 att183
att3 att147
att3 att123
att3 att135
att3 att111
att45 att105
att45 att177
att45 att93
att45 att201
att45 att165
att45 att193
att45 att33
att45 att37
att45 att133
att45 att97
att140 att8
att30 att6
att30 att186
att183 att147
att183 att123
att183 att135
att146 att2
att202 att166
att202 att106
att202 att82
att24 att84
att24 att36
att24 att132
att147 att123
att147 att135
att147 att111
att147 att207
att8 att212
att166 att82
att166 att22
att166 att94
att187 att127
att187 att115
att127 att115
att105 att184
att105 att93
att105 att201
att106 att154
att82 att154
att82 att22
att135 att111
att135 att207
att154 att22
att154 att94
att111 att207
att99 att195
att22 att94
att84 att48
att177 att93
att177 att165
att177 att181
att103 att195
att103 att97
att103 att109
att93 att201
att93 att165
att93 att193
att93 att33
att93 att57
att201 att33
att201 att57
att43 att31
att36 att180
att36 att48
att36 att72
att36 att132
att36 att144
att125 att113
att125 att185
att125 att65
att125 att29
att180 att48
att180 att72
att180 att192
att180 att108
att48 att72
att6 att186
att113 att185
att113 att53
att113 att65
att193 att97
att91 att31
att91 att199
att91 att19
att72 att132
att72 att144
att72 att192
att72 att120
att31 att199
att31 att7
att31 att67
att31 att55
att31 att1
att132 att144
att132 att120
att33 att57
att144 att192
att144 att120
att185 att53
att185 att65
att185 att29
att199 att19
att199 att7
att199 att67
att199 att55
att199 att109
att65 att29
att7 att67
att67 att55
att109 att181

View File

@@ -1,859 +0,0 @@
class att215
class att25
class att131
class att95
class att122
class att17
class att28
class att5
class att121
class att214
class att197
class att116
class att182
class att60
class att168
class att178
class att206
class att89
class att77
class att209
class att73
class att126
class att16
class att74
class att27
class att61
class att20
class att101
class att85
class att76
class att137
class att211
class att143
class att14
class att40
class att210
class att155
class att170
class att160
class att23
class att162
class att203
class att164
class att107
class att62
class att42
class att71
class att128
class att138
class att83
class att171
class att92
class att163
class att49
class att161
class att158
class att176
class att11
class att145
class att4
class att172
class att196
class att58
class att68
class att169
class att80
class att32
class att175
class att87
class att88
class att159
class att18
class att52
class att98
class att136
class att150
class att156
class att110
class att100
class att63
class att148
class att90
class att167
class att35
class att205
class att51
class att21
class att142
class att46
class att134
class att39
class att102
class att208
class att130
class att149
class att96
class att75
class att118
class att78
class att213
class att112
class att38
class att174
class att189
class att70
class att179
class att59
class att79
class att15
class att47
class att124
class att34
class att54
class att191
class att86
class att56
class att151
class att66
class att173
class att44
class att198
class att139
class att216
class att129
class att152
class att69
class att81
class att50
class att153
class att41
class att204
class att188
class att26
class att13
class att117
class att114
class att10
class att64
class att200
class att9
class att3
class att119
class att45
class att104
class att140
class att30
class att183
class att146
class att141
class att202
class att194
class att24
class att147
class att8
class att212
class att123
class att166
class att187
class att127
class att190
class att105
class att106
class att184
class att82
class att2
class att135
class att154
class att111
class att115
class att99
class att22
class att84
class att207
class att94
class att177
class att103
class att93
class att201
class att43
class att36
class att12
class att125
class att165
class att180
class att195
class att157
class att48
class att6
class att113
class att193
class att91
class att72
class att31
class att132
class att33
class att57
class att144
class att192
class att185
class att37
class att53
class att120
class att186
class att199
class att65
class att108
class att133
class att29
class att19
class att7
class att97
class att67
class att55
class att1
class att109
class att181
att215 att25
att215 att131
att215 att95
att215 att17
att215 att214
att215 att143
att25 att131
att25 att95
att25 att122
att25 att121
att25 att73
att25 att61
att25 att85
att25 att169
att25 att13
att25 att157
att131 att95
att131 att122
att131 att17
att131 att28
att131 att5
att131 att121
att131 att214
att131 att116
att131 att182
att131 att60
att131 att126
att131 att16
att131 att27
att131 att20
att131 att143
att131 att155
att95 att122
att95 att17
att95 att28
att95 att5
att95 att121
att95 att214
att95 att197
att95 att116
att95 att60
att95 att168
att95 att178
att95 att143
att95 att155
att95 att23
att95 att71
att95 att167
att122 att28
att122 att182
att122 att170
att17 att5
att17 att197
att17 att89
att17 att77
att17 att209
att17 att137
att17 att161
att17 att41
att28 att206
att28 att16
att28 att76
att28 att40
att28 att210
att28 att160
att28 att172
att28 att124
att28 att64
att5 att197
att5 att89
att5 att77
att5 att209
att5 att101
att121 att73
att121 att61
att214 att116
att214 att178
att214 att206
att214 att58
att214 att142
att214 att46
att197 att89
att197 att209
att197 att101
att116 att182
att116 att60
att116 att168
att116 att178
att116 att206
att116 att73
att116 att126
att116 att16
att116 att74
att116 att27
att116 att20
att116 att211
att116 att164
att116 att128
att116 att92
att116 att176
att116 att68
att182 att27
att182 att14
att60 att168
att60 att156
att60 att96
att168 att126
att168 att156
att168 att96
att168 att216
att178 att20
att178 att211
att178 att58
att178 att142
att178 att130
att178 att166
att206 att74
att206 att170
att206 att158
att89 att77
att89 att137
att89 att149
att89 att173
att77 att137
att77 att161
att77 att149
att209 att101
att209 att41
att73 att61
att73 att85
att73 att13
att73 att157
att126 att162
att126 att138
att126 att18
att126 att150
att16 att74
att16 att76
att16 att40
att16 att4
att16 att196
att16 att136
att74 att14
att74 att62
att27 att171
att27 att63
att61 att85
att61 att169
att20 att76
att20 att211
att20 att210
att20 att170
att20 att164
att20 att128
att20 att176
att20 att80
att101 att41
att85 att169
att85 att13
att76 att14
att76 att40
att76 att160
att76 att4
att76 att52
att137 att161
att137 att149
att137 att173
att137 att125
att211 att210
att211 att162
att211 att164
att211 att62
att211 att42
att211 att171
att211 att163
att211 att175
att211 att79
att211 att151
att211 att43
att143 att155
att143 att23
att143 att203
att143 att71
att143 att83
att143 att11
att14 att98
att40 att160
att40 att4
att40 att196
att40 att88
att40 att52
att210 att162
att210 att42
att210 att114
att155 att23
att155 att203
att155 att107
att155 att11
att170 att158
att160 att52
att160 att124
att23 att203
att23 att107
att23 att71
att23 att11
att162 att138
att162 att18
att162 att150
att162 att90
att162 att102
att162 att174
att162 att66
att203 att107
att203 att49
att203 att59
att203 att47
att203 att191
att203 att119
att164 att62
att164 att42
att164 att128
att164 att171
att164 att92
att164 att163
att164 att158
att164 att176
att164 att145
att164 att172
att164 att58
att164 att68
att164 att80
att164 att32
att164 att98
att164 att156
att164 att110
att164 att205
att164 att21
att164 att134
att164 att213
att164 att112
att164 att38
att164 att189
att164 att56
att164 att44
att164 att152
att164 att8
att107 att83
att107 att49
att107 att59
att107 att47
att107 att191
att42 att138
att42 att54
att42 att114
att71 att83
att71 att167
att71 att35
att71 att179
att128 att92
att128 att112
att138 att18
att138 att150
att83 att167
att83 att35
att171 att87
att171 att159
att171 att63
att171 att51
att171 att39
att171 att75
att92 att163
att92 att145
att92 att56
att163 att49
att163 att175
att163 att87
att163 att79
att163 att151
att163 att139
att163 att187
att163 att127
att163 att103
att163 att91
att49 att37
att161 att173
att161 att113
att176 att145
att176 att172
att176 att68
att176 att80
att176 att32
att176 att175
att176 att98
att176 att110
att176 att205
att176 att21
att176 att134
att176 att213
att176 att56
att4 att196
att4 att88
att4 att136
att4 att100
att4 att148
att4 att208
att172 att112
att172 att184
att196 att88
att196 att136
att196 att100
att196 att148
att196 att208
att58 att142
att58 att46
att58 att34
att68 att32
att80 att38
att32 att110
att32 att21
att32 att44
att32 att200
att175 att87
att175 att159
att175 att79
att175 att187
att175 att115
att87 att159
att87 att63
att87 att51
att87 att75
att87 att15
att87 att99
att159 att75
att159 att15
att159 att195
att18 att90
att18 att102
att18 att78
att18 att198
att52 att124
att52 att64
att98 att86
att136 att100
att136 att208
att150 att90
att150 att174
att150 att66
att156 att205
att156 att96
att156 att216
att156 att204
att156 att24
att156 att84
att156 att36
att156 att12
att156 att108
att100 att148
att63 att51
att63 att39
att63 att3
att63 att183
att63 att147
att90 att102
att90 att78
att167 att35
att167 att179
att35 att179
att51 att39
att51 att3
att51 att183
att21 att134
att21 att213
att21 att38
att21 att189
att21 att129
att21 att81
att21 att153
att21 att117
att21 att9
att142 att46
att142 att130
att142 att118
att142 att70
att142 att10
att142 att202
att142 att190
att142 att106
att46 att130
att46 att118
att46 att70
att46 att34
att46 att166
att46 att82
att134 att2
att39 att3
att102 att78
att102 att174
att102 att54
att102 att198
att130 att118
att130 att10
att130 att202
att130 att190
att130 att106
att149 att125
att96 att216
att96 att204
att96 att24
att75 att15
att75 att99
att118 att70
att118 att10
att118 att202
att78 att198
att213 att189
att213 att129
att213 att69
att213 att81
att38 att50
att38 att26
att174 att54
att174 att66
att174 att30
att189 att86
att189 att129
att189 att69
att189 att81
att189 att153
att189 att117
att189 att9
att189 att45
att189 att141
att189 att105
att70 att34
att70 att154
att179 att59
att59 att47
att59 att191
att59 att119
att79 att86
att79 att151
att79 att139
att79 att187
att79 att127
att79 att103
att79 att43
att79 att193
att79 att91
att79 att19
att124 att64
att54 att114
att54 att30
att54 att6
att191 att119
att86 att194
att56 att44
att56 att152
att56 att50
att56 att188
att56 att26
att56 att200
att56 att104
att56 att140
att56 att146
att56 att194
att56 att8
att56 att2
att56 att133
att56 att1
att151 att139
att66 att30
att173 att125
att173 att113
att173 att185
att44 att152
att44 att50
att44 att188
att44 att200
att44 att104
att44 att140
att44 att194
att44 att212
att44 att1
att139 att26
att139 att99
att139 att103
att139 att43
att139 att91
att139 att31
att139 att199
att139 att7
att216 att204
att216 att24
att216 att84
att216 att36
att216 att12
att216 att180
att216 att108
att129 att69
att152 att188
att152 att140
att69 att153
att69 att9
att69 att177
att81 att45
att81 att105
att153 att117
att153 att141
att41 att53
att204 att12
att204 att180
att188 att146
att188 att212
att13 att157
att114 att6
att114 att186
att10 att190
att64 att184
att200 att104
att9 att45
att9 att146
att9 att141
att9 att177
att9 att37
att9 att133
att9 att109
att9 att181
att3 att183
att3 att147
att3 att123
att3 att135
att3 att111
att45 att105
att45 att177
att45 att93
att45 att201
att45 att165
att45 att193
att45 att33
att45 att37
att45 att133
att45 att97
att140 att8
att30 att6
att30 att186
att183 att147
att183 att123
att183 att135
att146 att2
att202 att166
att202 att106
att202 att82
att24 att84
att24 att36
att24 att132
att147 att123
att147 att135
att147 att111
att147 att207
att8 att212
att166 att82
att166 att22
att166 att94
att187 att127
att187 att115
att127 att115
att105 att184
att105 att93
att105 att201
att106 att154
att82 att154
att82 att22
att135 att111
att135 att207
att154 att22
att154 att94
att111 att207
att99 att195
att22 att94
att84 att48
att177 att93
att177 att165
att177 att181
att103 att195
att103 att97
att103 att109
att93 att201
att93 att165
att93 att193
att93 att33
att93 att57
att201 att33
att201 att57
att43 att31
att36 att180
att36 att48
att36 att72
att36 att132
att36 att144
att125 att113
att125 att185
att125 att65
att125 att29
att180 att48
att180 att72
att180 att192
att180 att108
att48 att72
att6 att186
att113 att185
att113 att53
att113 att65
att193 att97
att91 att31
att91 att199
att91 att19
att72 att132
att72 att144
att72 att192
att72 att120
att31 att199
att31 att7
att31 att67
att31 att55
att31 att1
att132 att144
att132 att120
att33 att57
att144 att192
att144 att120
att185 att53
att185 att65
att185 att29
att199 att19
att199 att7
att199 att67
att199 att55
att199 att109
att65 att29
att7 att67
att67 att55
att109 att181

View File

@@ -1,4 +1,4 @@
filter = src/
exclude-directories = build/lib/
exclude-directories = build_debug/lib/
print-summary = yes
sort-percentage = yes

Submodule lib/argparse deleted from b0930ab028

1
lib/folding Submodule

Submodule lib/folding added at 37316a54e0

View File

@@ -1,8 +0,0 @@
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,235 +0,0 @@
#include <iostream>
#include <torch/torch.h>
#include <std::string>
#include <map>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "ArffFiles.h"v
#include "BayesMetrics.h"
#include "CPPFImdlp.h"
#include "Folding.h"
#include "Models.h"
#include "modelRegister.h"
#include <fstream>
const std::string PATH = "../../data/";
pair<std::vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<std::string> features)
{
std::vector<mdlp::labels_t>Xd;
map<std::string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xd.push_back(xd);
}
return { Xd, maxes };
}
bool file_exists(const std::std::std::string& name)
{
if (FILE* file = fopen(name.c_str(), "r")) {
fclose(file);
return true;
} else {
return false;
}
}
pair<std::vector<std::vector<int>>, std::vector<int>> extract_indices(std::vector<int> indices, std::vector<std::vector<int>> X, std::vector<int> y)
{
std::vector<std::vector<int>> Xr; // nxm
std::vector<int> yr;
for (int col = 0; col < X.size(); ++col) {
Xr.push_back(std::vector<int>());
}
for (auto index : indices) {
for (int col = 0; col < X.size(); ++col) {
Xr[col].push_back(X[col][index]);
}
yr.push_back(y[index]);
}
return { Xr, yr };
}
int main(int argc, char** argv)
{
map<std::string, bool> datasets = {
{"diabetes", true},
{"ecoli", true},
{"glass", true},
{"iris", true},
{"kdd_JapaneseVowels", false},
{"letter", true},
{"liver-disorders", true},
{"mfeat-factors", true},
};
auto valid_datasets = std::vector<std::string>();
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
[](const pair<std::string, bool>& pair) { return pair.first; });
argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-d", "--dataset")
.help("Dataset file name")
.action([valid_datasets](const std::std::std::string& value) {
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
return value;
}
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
}
);
program.add_argument("-p", "--path")
.help(" folder where the data files are located, default")
.default_value(std::string{ PATH }
);
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostd::string())
.action([](const std::std::std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw runtime_error("Model must be one of " + platform::Models::instance()->tostd::string());
}
);
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const std::std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw runtime_error(err.what());
}
catch (...) {
throw runtime_error("Number of folds must be an integer");
}});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors, dump_cpt;
std::string model_name, file_name, path, complete_file_name;
int nFolds, seed;
try {
program.parse_args(argc, argv);
file_name = program.get<std::string>("dataset");
path = program.get<std::string>("path");
model_name = program.get<std::string>("model");
complete_file_name = path + file_name + ".arff";
stratified = program.get<bool>("stratified");
tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds");
seed = program.get<int>("seed");
dump_cpt = program.get<bool>("dumpcpt");
class_last = datasets[file_name];
if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
}
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
/*
* Begin Processing
*/
auto handler = ArffFiles();
handler.load(complete_file_name, class_last);
// Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
std::vector<std::string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features),
[](const pair<std::string, std::string>& item) { return item.first; });
// Discretize Dataset
auto [Xd, maxes] = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<std::string, std::vector<int>> states;
for (auto feature : features) {
states[feature] = std::vector<int>(maxes[feature]);
}
states[className] = std::vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states);
if (dump_cpt) {
std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt();
}
auto lines = clf->show();
for (auto line : lines) {
std::cout << line << std::endl;
}
std::cout << "--- Topological Order ---" << std::endl;
auto order = clf->topological_order();
for (auto name : order) {
std::cout << name << ", ";
}
std::cout << "end." << std::endl;
auto score = clf->score(Xd, y);
std::cout << "Score: " << score << std::endl;
auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot");
file << graph;
file.close();
std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
std::string stratified_std::string = stratified ? " Stratified" : "";
std::cout << nFolds << " Folds" << stratified_std::string << " Cross validation" << std::endl;
std::cout << "==========================================" << std::endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
}
float total_score = 0, total_score_train = 0, score_train, score_test;
platform::Fold* fold;
if (stratified)
fold = new platform::StratifiedKFold(nFolds, y, seed);
else
fold = new platform::KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i);
std::cout << "Fold: " << i + 1 << std::endl;
if (tensors) {
auto ttrain = torch::tensor(train, torch::kInt64);
auto ttest = torch::tensor(test, torch::kInt64);
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
torch::Tensor ytraint = yt.index({ ttrain });
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest });
clf->fit(Xtraint, ytraint, features, className, states);
auto temp = clf->predict(Xtraint);
score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt);
} else {
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
auto [Xtest, ytest] = extract_indices(test, Xd, y);
clf->fit(Xtrain, ytrain, features, className, states);
score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest);
}
if (dump_cpt) {
std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt();
}
total_score_train += score_train;
total_score += score_test;
std::cout << "Score Train: " << score_train << std::endl;
std::cout << "Score Test : " << score_test << std::endl;
std::cout << "-------------------------------------------------------------------------------" << std::endl;
}
std::cout << "**********************************************************************************" << std::endl;
std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
}

View File

@@ -6,8 +6,6 @@
namespace bayesnet {
enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier {
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
public:
// X is nxm std::vector, y is nx1 std::vector
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
@@ -28,8 +26,13 @@ namespace bayesnet {
std::vector<std::string> virtual graph(const std::string& title = "") const = 0;
virtual std::string getVersion() = 0;
std::vector<std::string> virtual topological_order() = 0;
std::vector<std::string> virtual getNotes() const = 0;
void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
std::vector<std::string> validHyperparameters;
};
}
#endif

View File

@@ -2,15 +2,17 @@
#include <functional>
#include <limits.h>
#include "BoostAODE.h"
#include "Colors.h"
#include "Folding.h"
#include "Paths.h"
#include "CFS.h"
#include "FCBF.h"
#include "IWSS.h"
#include "folding.hpp"
namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() {}
BoostAODE::BoostAODE() : Ensemble()
{
validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance" };
}
void BoostAODE::buildModel(const torch::Tensor& weights)
{
// Models shall be built in trainModel
@@ -20,7 +22,7 @@ namespace bayesnet {
auto y_ = dataset.index({ -1, "..." });
if (convergence) {
// Prepare train & validation sets from train data
auto fold = platform::StratifiedKFold(5, y_, 271);
auto fold = folding::StratifiedKFold(5, y_, 271);
dataset_ = torch::clone(dataset);
// save input dataset
auto [train, test] = fold.getFold(0);
@@ -43,25 +45,32 @@ namespace bayesnet {
y_train = y_;
}
}
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features" };
checkHyperparameters(validKeys, hyperparameters);
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
hyperparameters.erase("repeatSparent");
}
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
hyperparameters.erase("maxModels");
}
if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"];
hyperparameters.erase("ascending");
}
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence");
}
if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold");
}
if (hyperparameters.contains("tolerance")) {
tolerance = hyperparameters["tolerance"];
hyperparameters.erase("tolerance");
}
if (hyperparameters.contains("select_features")) {
auto selectedAlgorithm = hyperparameters["select_features"];
@@ -71,6 +80,10 @@ namespace bayesnet {
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
throw std::invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]");
}
hyperparameters.erase("select_features");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
}
}
std::unordered_set<int> BoostAODE::initializeModels()
@@ -102,16 +115,15 @@ namespace bayesnet {
significanceModels.push_back(1.0);
n_models++;
}
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + algorithm);
delete featureSelector;
return featuresUsed;
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
std::unordered_set<int> featuresUsed;
int tolerance = 5; // number of times the accuracy can be lower than the threshold
if (selectFeatures) {
featuresUsed = initializeModels();
tolerance = 0; // Remove tolerance if features are selected
}
if (maxModels == 0)
maxModels = .1 * n > 10 ? .1 * n : n;
@@ -186,8 +198,10 @@ namespace bayesnet {
exitCondition = n_models >= maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance;
}
if (featuresUsed.size() != features.size()) {
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
status = WARNING;
}
notes.push_back("Number of models: " + std::to_string(n_models));
}
std::vector<std::string> BoostAODE::graph(const std::string& title) const
{

View File

@@ -8,9 +8,9 @@ namespace bayesnet {
class BoostAODE : public Ensemble {
public:
BoostAODE();
virtual ~BoostAODE() {};
virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override;
@@ -21,6 +21,7 @@ namespace bayesnet {
// Hyperparameters
bool repeatSparent = false; // if true, a feature can be selected more than once
int maxModels = 0;
int tolerance = 0;
bool ascending = false; //Process KBest features ascending or descending order
bool convergence = false; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection

View File

@@ -1,12 +1,13 @@
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${Python3_INCLUDE_DIRS})
include_directories(
${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR}/src/BayesNet
${CMAKE_BINARY_DIR}/configured_files/include
)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc )
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -153,18 +153,8 @@ namespace bayesnet {
{
model.dump_cpt();
}
void Classifier::checkHyperparameters(const std::vector<std::string>& validKeys, nlohmann::json& hyperparameters)
void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
void Classifier::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const std::vector<std::string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
//For classifiers that don't have hyperparameters
}
}

View File

@@ -19,10 +19,10 @@ namespace bayesnet {
std::map<std::string, std::vector<int>> states;
torch::Tensor dataset; // (n+1)xm tensor
status_t status = NORMAL;
std::vector<std::string> notes; // Used to store messages occurred during the fit process
void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override;
void checkHyperparameters(const std::vector<std::string>& validKeys, nlohmann::json& hyperparameters);
void buildDataset(torch::Tensor& y);
public:
Classifier(Network model);
@@ -37,14 +37,15 @@ namespace bayesnet {
int getNumberOfStates() const override;
torch::Tensor predict(torch::Tensor& X) override;
status_t getStatus() const override { return status; }
std::string getVersion() override { return "0.2.0"; };
std::string getVersion() override { return { project_version.begin(), project_version.end() }; };
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
std::vector<std::string> show() const override;
std::vector<std::string> topological_order() override;
std::vector<std::string> getNotes() const override { return notes; }
void dump_cpt() const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
};
}
#endif

View File

@@ -1,12 +1,13 @@
#include "KDB.h"
namespace bayesnet {
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::setHyperparameters(nlohmann::json& hyperparameters)
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta)
{
validHyperparameters = { "k", "theta" };
}
void KDB::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "k", "theta" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("k")) {
k = hyperparameters["k"];
}

View File

@@ -13,8 +13,8 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override;
public:
explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {};
void setHyperparameters(nlohmann::json& hyperparameters) override;
virtual ~KDB() = default;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override;
};
}

View File

@@ -3,6 +3,7 @@
#include "Node.h"
#include <map>
#include <vector>
#include "config.h"
namespace bayesnet {
class Network {
@@ -56,7 +57,7 @@ namespace bayesnet {
std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
void initialize();
void dump_cpt() const;
inline std::string version() { return "0.2.0"; }
inline std::string version() { return { project_version.begin(), project_version.end() }; }
};
}
#endif

View File

@@ -10,7 +10,7 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override;
public:
explicit SPODE(int root);
virtual ~SPODE() {};
virtual ~SPODE() = default;
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
};
}

View File

@@ -8,7 +8,7 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override;
public:
TAN();
virtual ~TAN() {};
virtual ~TAN() = default;
std::vector<std::string> graph(const std::string& name = "TAN") const override;
};
}

View File

@@ -1,343 +0,0 @@
#include <filesystem>
#include <set>
#include <fstream>
#include <iostream>
#include <sstream>
#include <algorithm>
#include "BestResults.h"
#include "Result.h"
#include "Colors.h"
#include "Statistics.h"
#include "BestResultsExcel.h"
#include "CLocale.h"
namespace fs = std::filesystem;
// function ftime_to_std::string, Code taken from
// https://stackoverflow.com/a/58237530/1389271
template <typename TP>
std::string ftime_to_string(TP tp)
{
auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(tp - TP::clock::now()
+ std::chrono::system_clock::now());
auto tt = std::chrono::system_clock::to_time_t(sctp);
std::tm* gmt = std::gmtime(&tt);
std::stringstream buffer;
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
return buffer.str();
}
namespace platform {
std::string BestResults::build()
{
auto files = loadResultFiles();
if (files.size() == 0) {
std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
exit(1);
}
json bests;
for (const auto& file : files) {
auto result = Result(path, file);
auto data = result.load();
for (auto const& item : data.at("results")) {
bool update = false;
// Check if results file contains only one dataset
auto datasetName = item.at("dataset").get<std::string>();
if (bests.contains(datasetName)) {
if (item.at("score").get<double>() > bests[datasetName].at(0).get<double>()) {
update = true;
}
} else {
update = true;
}
if (update) {
bests[datasetName] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
}
}
}
std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
std::cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << std::endl;
}
std::ofstream file(bestFileName);
file << bests;
file.close();
return bestFileName;
}
std::string BestResults::bestResultFile()
{
return "best_results_" + score + "_" + model + ".json";
}
std::pair<std::string, std::string> getModelScore(std::string name)
{
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
int i = 0;
auto pos = name.find("_");
auto pos2 = name.find("_", pos + 1);
std::string score = name.substr(pos + 1, pos2 - pos - 1);
pos = name.find("_", pos2 + 1);
std::string model = name.substr(pos2 + 1, pos - pos2 - 1);
return { model, score };
}
std::vector<std::string> BestResults::loadResultFiles()
{
std::vector<std::string> files;
using std::filesystem::directory_iterator;
std::string fileModel, fileScore;
for (const auto& file : directory_iterator(path)) {
auto fileName = file.path().filename().string();
if (fileName.find(".json") != std::string::npos && fileName.find("results_") == 0) {
tie(fileModel, fileScore) = getModelScore(fileName);
if (score == fileScore && (model == fileModel || model == "any")) {
files.push_back(fileName);
}
}
}
return files;
}
json BestResults::loadFile(const std::string& fileName)
{
std::ifstream resultData(fileName);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
}
std::vector<std::string> BestResults::getModels()
{
std::set<std::string> models;
std::vector<std::string> result;
auto files = loadResultFiles();
if (files.size() == 0) {
std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
exit(1);
}
std::string fileModel, fileScore;
for (const auto& file : files) {
// extract the model from the file name
tie(fileModel, fileScore) = getModelScore(file);
// add the model to the std::vector of models
models.insert(fileModel);
}
result = std::vector<std::string>(models.begin(), models.end());
return result;
}
std::vector<std::string> BestResults::getDatasets(json table)
{
std::vector<std::string> datasets;
for (const auto& dataset : table.items()) {
datasets.push_back(dataset.key());
}
return datasets;
}
void BestResults::buildAll()
{
auto models = getModels();
for (const auto& model : models) {
std::cout << "Building best results for model: " << model << std::endl;
this->model = model;
build();
}
model = "any";
}
void BestResults::listFile()
{
std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
exit(1);
}
auto temp = ConfigLocale();
auto date = ftime_to_string(std::filesystem::last_write_time(bestFileName));
auto data = loadFile(bestFileName);
auto datasets = getDatasets(data);
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
int maxFileName = 0;
int maxHyper = 15;
for (auto const& item : data.items()) {
maxHyper = std::max(maxHyper, (int)item.value().at(1).dump().size());
maxFileName = std::max(maxFileName, (int)item.value().at(2).get<std::string>().size());
}
std::stringstream oss;
oss << Colors::GREEN() << "Best results for " << model << " as of " << date << std::endl;
std::cout << oss.str();
std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << "Dataset" << "Score " << std::setw(maxFileName) << "File" << " Hyperparameters" << std::endl;
std::cout << "=== " << std::string(maxDatasetName, '=') << " =========== " << std::string(maxFileName, '=') << " " << std::string(maxHyper, '=') << std::endl;
auto i = 0;
bool odd = true;
double total = 0;
for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
double value = item.value().at(0).get<double>();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
std::cout << std::setw(maxDatasetName) << std::left << item.key() << " ";
std::cout << std::setw(11) << std::setprecision(9) << std::fixed << value << " ";
std::cout << std::setw(maxFileName) << item.value().at(2).get<std::string>() << " ";
std::cout << item.value().at(1) << " ";
std::cout << std::endl;
total += value;
odd = !odd;
}
std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ===========" << std::endl;
std::cout << std::setw(5 + maxDatasetName) << "Total.................. " << std::setw(11) << std::setprecision(8) << std::fixed << total << std::endl;
}
json BestResults::buildTableResults(std::vector<std::string> models)
{
json table;
auto maxDate = std::filesystem::file_time_type::max();
for (const auto& model : models) {
this->model = model;
std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
exit(1);
}
auto dateWrite = std::filesystem::last_write_time(bestFileName);
if (dateWrite < maxDate) {
maxDate = dateWrite;
}
auto data = loadFile(bestFileName);
table[model] = data;
}
table["dateTable"] = ftime_to_string(maxDate);
return table;
}
void BestResults::printTableResults(std::vector<std::string> models, json table)
{
std::stringstream oss;
oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<std::string>() << std::endl;
std::cout << oss.str();
std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset");
for (const auto& model : models) {
std::cout << std::setw(maxModelName) << std::left << model << " ";
}
std::cout << std::endl;
std::cout << "=== " << std::string(maxDatasetName, '=') << " ";
for (const auto& model : models) {
std::cout << std::string(maxModelName, '=') << " ";
}
std::cout << std::endl;
auto i = 0;
bool odd = true;
std::map<std::string, double> totals;
int nDatasets = table.begin().value().size();
for (const auto& model : models) {
totals[model] = 0.0;
}
auto datasets = getDatasets(table.begin().value());
for (auto const& dataset : datasets) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
std::cout << std::setw(maxDatasetName) << std::left << dataset << " ";
double maxValue = 0;
// Find out the max value for this dataset
for (const auto& model : models) {
double value = table[model].at(dataset).at(0).get<double>();
if (value > maxValue) {
maxValue = value;
}
}
// Print the row with red colors on max values
for (const auto& model : models) {
std::string efectiveColor = color;
double value = table[model].at(dataset).at(0).get<double>();
if (value == maxValue) {
efectiveColor = Colors::RED();
}
totals[model] += value;
std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " ";
}
std::cout << std::endl;
odd = !odd;
}
std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ";
for (const auto& model : models) {
std::cout << std::string(maxModelName, '=') << " ";
}
std::cout << std::endl;
std::cout << Colors::GREEN() << std::setw(5 + maxDatasetName) << " Totals...................";
double max = 0.0;
for (const auto& total : totals) {
if (total.second > max) {
max = total.second;
}
}
for (const auto& model : models) {
std::string efectiveColor = Colors::GREEN();
if (totals[model] == max) {
efectiveColor = Colors::RED();
}
std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << totals[model] << " ";
}
std::cout << std::endl;
}
void BestResults::reportSingle(bool excel)
{
listFile();
if (excel) {
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value());
BestResultsExcel excel(score, datasets);
excel.reportSingle(model, path + bestResultFile());
messageExcelFile(excel.getFileName());
}
}
void BestResults::reportAll(bool excel)
{
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value());
maxModelName = (*max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxModelName = std::max(12, maxModelName);
maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxDatasetName = std::max(25, maxDatasetName);
// Print the table of results
printTableResults(models, table);
// Compute the Friedman test
std::map<std::string, std::map<std::string, float>> ranksModels;
if (friedman) {
Statistics stats(models, datasets, table, significance);
auto result = stats.friedmanTest();
stats.postHocHolmTest(result);
ranksModels = stats.getRanks();
}
if (excel) {
BestResultsExcel excel(score, datasets);
excel.reportAll(models, table, ranksModels, friedman, significance);
if (friedman) {
int idx = -1;
double min = 2000;
// Find out the control model
auto totals = std::vector<double>(models.size(), 0.0);
for (const auto& dataset : datasets) {
for (int i = 0; i < models.size(); ++i) {
totals[i] += ranksModels[dataset][models[i]];
}
}
for (int i = 0; i < models.size(); ++i) {
if (totals[i] < min) {
min = totals[i];
idx = i;
}
}
model = models.at(idx);
excel.reportSingle(model, path + bestResultFile());
}
messageExcelFile(excel.getFileName());
}
}
void BestResults::messageExcelFile(const std::string& fileName)
{
std::cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << std::endl;
}
}

View File

@@ -1,36 +0,0 @@
#ifndef BESTRESULTS_H
#define BESTRESULTS_H
#include <string>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
class BestResults {
public:
explicit BestResults(const std::string& path, const std::string& score, const std::string& model, bool friedman, double significance = 0.05)
: path(path), score(score), model(model), friedman(friedman), significance(significance)
{
}
std::string build();
void reportSingle(bool excel);
void reportAll(bool excel);
void buildAll();
private:
std::vector<std::string> getModels();
std::vector<std::string> getDatasets(json table);
std::vector<std::string> loadResultFiles();
void messageExcelFile(const std::string& fileName);
json buildTableResults(std::vector<std::string> models);
void printTableResults(std::vector<std::string> models, json table);
std::string bestResultFile();
json loadFile(const std::string& fileName);
void listFile();
std::string path;
std::string score;
std::string model;
bool friedman;
double significance;
int maxModelName = 0;
int maxDatasetName = 0;
};
}
#endif //BESTRESULTS_H

View File

@@ -1,300 +0,0 @@
#include <sstream>
#include "BestResultsExcel.h"
#include "Paths.h"
#include <map>
#include <nlohmann/json.hpp>
#include "Statistics.h"
#include "ReportExcel.h"
namespace platform {
json loadResultData(const std::string& fileName)
{
json data;
std::ifstream resultData(fileName);
if (resultData.is_open()) {
data = json::parse(resultData);
} else {
throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
}
return data;
}
std::string getColumnName(int colNum)
{
std::string columnName = "";
if (colNum == 0)
return "A";
while (colNum > 0) {
int modulo = colNum % 26;
columnName = char(65 + modulo) + columnName;
colNum = (int)((colNum - modulo) / 26);
}
return columnName;
}
BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets)
{
workbook = workbook_new((Paths::excel() + fileName).c_str());
setProperties("Best Results");
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
datasetNameSize = std::max(datasetNameSize, maxDatasetName);
createFormats();
}
void BestResultsExcel::reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance)
{
this->table = table;
this->models = models;
ranksModels = ranks;
this->friedman = friedman;
this->significance = significance;
worksheet = workbook_add_worksheet(workbook, "Best Results");
int maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
modelNameSize = std::max(modelNameSize, maxModelName);
formatColumns();
build();
}
void BestResultsExcel::reportSingle(const std::string& model, const std::string& fileName)
{
worksheet = workbook_add_worksheet(workbook, "Report");
if (FILE* fileTest = fopen(fileName.c_str(), "r")) {
fclose(fileTest);
} else {
std::cerr << "File " << fileName << " doesn't exist." << std::endl;
exit(1);
}
json data = loadResultData(fileName);
std::string title = "Best results for " + model;
worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]);
// Body header
row = 3;
int col = 1;
writeString(row, 0, "", "bodyHeader");
writeString(row, 1, "Dataset", "bodyHeader");
writeString(row, 2, "Score", "bodyHeader");
writeString(row, 3, "File", "bodyHeader");
writeString(row, 4, "Hyperparameters", "bodyHeader");
auto i = 0;
std::string hyperparameters;
int hypSize = 22;
std::map<std::string, std::string> files; // map of files imported and their tabs
for (auto const& item : data.items()) {
row++;
writeInt(row, 0, i++, "ints");
writeString(row, 1, item.key().c_str(), "text");
writeDouble(row, 2, item.value().at(0).get<double>(), "result");
auto fileName = item.value().at(2).get<std::string>();
std::string hyperlink = "";
try {
hyperlink = files.at(fileName);
}
catch (const std::out_of_range& oor) {
auto tabName = "table_" + std::to_string(i);
auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str());
json data = loadResultData(Paths::results() + fileName);
auto report = ReportExcel(data, false, workbook, worksheetNew);
report.show();
hyperlink = "#table_" + std::to_string(i);
files[fileName] = hyperlink;
}
hyperlink += "!H" + std::to_string(i + 6);
std::string fileNameText = "=HYPERLINK(\"" + hyperlink + "\",\"" + fileName + "\")";
worksheet_write_formula(worksheet, row, 3, fileNameText.c_str(), efectiveStyle("text"));
hyperparameters = item.value().at(1).dump();
if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, 4, hyperparameters, "text");
}
row++;
// Set Totals
writeString(row, 1, "Total", "bodyHeader");
std::stringstream oss;
auto colName = getColumnName(2);
oss << "=sum(" << colName << "5:" << colName << row << ")";
worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]);
// Set format
worksheet_freeze_panes(worksheet, 4, 2);
std::vector<int> columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
BestResultsExcel::~BestResultsExcel()
{
workbook_close(workbook);
}
void BestResultsExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 4, 2);
std::vector<int> columns_sizes = { 5, datasetNameSize };
for (int i = 0; i < models.size(); ++i) {
columns_sizes.push_back(modelNameSize);
}
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void BestResultsExcel::addConditionalFormat(std::string formula)
{
// Add conditional format for max/min values in scores/ranks sheets
lxw_format* custom_format = workbook_add_format(workbook);
format_set_bg_color(custom_format, 0xFFC7CE);
format_set_font_color(custom_format, 0x9C0006);
// Create a conditional format object. A static object would also work.
lxw_conditional_format* conditional_format = (lxw_conditional_format*)calloc(1, sizeof(lxw_conditional_format));
conditional_format->type = LXW_CONDITIONAL_TYPE_FORMULA;
std::string col = getColumnName(models.size() + 1);
std::stringstream oss;
oss << "=C5=" << formula << "($C5:$" << col << "5)";
auto formulaValue = oss.str();
conditional_format->value_string = formulaValue.c_str();
conditional_format->format = custom_format;
worksheet_conditional_format_range(worksheet, 4, 2, datasets.size() + 3, models.size() + 1, conditional_format);
}
void BestResultsExcel::build()
{
// Create Sheet with scores
header(false);
body(false);
// Add conditional format for max values
addConditionalFormat("max");
footer(false);
if (friedman) {
// Create Sheet with ranks
worksheet = workbook_add_worksheet(workbook, "Ranks");
formatColumns();
header(true);
body(true);
addConditionalFormat("min");
footer(true);
// Create Sheet with Friedman Test
doFriedman();
}
}
std::string BestResultsExcel::getFileName()
{
return Paths::excel() + fileName;
}
void BestResultsExcel::header(bool ranks)
{
row = 0;
std::string message = ranks ? "Ranks for score " + score : "Best results for " + score;
worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), message.c_str(), styles["headerFirst"]);
// Body header
row = 3;
int col = 1;
writeString(row, 0, "", "bodyHeader");
writeString(row, 1, "Dataset", "bodyHeader");
for (const auto& model : models) {
writeString(row, ++col, model.c_str(), "bodyHeader");
}
}
void BestResultsExcel::body(bool ranks)
{
row = 4;
int i = 0;
json origin = table.begin().value();
for (auto const& item : origin.items()) {
writeInt(row, 0, i++, "ints");
writeString(row, 1, item.key().c_str(), "text");
int col = 1;
for (const auto& model : models) {
double value = ranks ? ranksModels[item.key()][model] : table[model].at(item.key()).at(0).get<double>();
writeDouble(row, ++col, value, "result");
}
++row;
}
}
void BestResultsExcel::footer(bool ranks)
{
// Set Totals
writeString(row, 1, "Total", "bodyHeader");
int col = 1;
for (const auto& model : models) {
std::stringstream oss;
auto colName = getColumnName(col + 1);
oss << "=SUM(" << colName << "5:" << colName << row << ")";
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
}
if (ranks) {
row++;
writeString(row, 1, "Average ranks", "bodyHeader");
int col = 1;
for (const auto& model : models) {
auto colName = getColumnName(col + 1);
std::stringstream oss;
oss << "=SUM(" << colName << "5:" << colName << row - 1 << ")/" << datasets.size();
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
}
}
}
void BestResultsExcel::doFriedman()
{
worksheet = workbook_add_worksheet(workbook, "Friedman");
std::vector<int> columns_sizes = { 5, datasetNameSize };
for (int i = 0; i < models.size(); ++i) {
columns_sizes.push_back(modelNameSize);
}
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), "Friedman Test", styles["headerFirst"]);
row = 2;
Statistics stats(models, datasets, table, significance, false);
auto result = stats.friedmanTest();
stats.postHocHolmTest(result);
auto friedmanResult = stats.getFriedmanResult();
auto holmResult = stats.getHolmResult();
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]);
row += 2;
writeString(row, 1, "Friedman Q", "bodyHeader");
writeDouble(row, 2, friedmanResult.statistic, "bodyHeader");
row++;
writeString(row, 1, "Critical χ2 value", "bodyHeader");
writeDouble(row, 2, friedmanResult.criticalValue, "bodyHeader");
row++;
writeString(row, 1, "p-value", "bodyHeader");
writeDouble(row, 2, friedmanResult.pvalue, "bodyHeader");
writeString(row, 3, friedmanResult.reject ? "<" : ">", "bodyHeader");
writeDouble(row, 4, significance, "bodyHeader");
writeString(row, 5, friedmanResult.reject ? "Reject H0" : "Accept H0", "bodyHeader");
row += 3;
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Holm Test", styles["headerFirst"]);
row += 2;
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between the control model and the other models.'", styles["headerSmall"]);
row += 2;
std::string controlModel = "Control Model: " + holmResult.model;
worksheet_merge_range(worksheet, row, 1, row, 7, controlModel.c_str(), styles["bodyHeader_odd"]);
row++;
writeString(row, 1, "Model", "bodyHeader");
writeString(row, 2, "p-value", "bodyHeader");
writeString(row, 3, "Rank", "bodyHeader");
writeString(row, 4, "Win", "bodyHeader");
writeString(row, 5, "Tie", "bodyHeader");
writeString(row, 6, "Loss", "bodyHeader");
writeString(row, 7, "Reject H0", "bodyHeader");
row++;
bool first = true;
for (const auto& item : holmResult.holmLines) {
writeString(row, 1, item.model, "text");
if (first) {
// Control model info
first = false;
writeString(row, 2, "", "text");
writeDouble(row, 3, item.rank, "result");
writeString(row, 4, "", "text");
writeString(row, 5, "", "text");
writeString(row, 6, "", "text");
writeString(row, 7, "", "textCentered");
} else {
// Rest of the models info
writeDouble(row, 2, item.pvalue, "result");
writeDouble(row, 3, item.rank, "result");
writeInt(row, 4, item.wtl.win, "ints");
writeInt(row, 5, item.wtl.tie, "ints");
writeInt(row, 6, item.wtl.loss, "ints");
writeString(row, 7, item.reject ? "Yes" : "No", "textCentered");
}
row++;
}
}
}

View File

@@ -1,39 +0,0 @@
#ifndef BESTRESULTS_EXCEL_H
#define BESTRESULTS_EXCEL_H
#include "ExcelFile.h"
#include <vector>
#include <map>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
class BestResultsExcel : ExcelFile {
public:
BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets);
~BestResultsExcel();
void reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance);
void reportSingle(const std::string& model, const std::string& fileName);
std::string getFileName();
private:
void build();
void header(bool ranks);
void body(bool ranks);
void footer(bool ranks);
void formatColumns();
void doFriedman();
void addConditionalFormat(std::string formula);
const std::string fileName = "BestResults.xlsx";
std::string score;
std::vector<std::string> models;
std::vector<std::string> datasets;
json table;
std::map<std::string, std::map<std::string, float>> ranksModels;
bool friedman;
double significance;
int modelNameSize = 12; // Min size of the column
int datasetNameSize = 25; // Min size of the column
};
}
#endif //BESTRESULTS_EXCEL_H

View File

@@ -1,28 +0,0 @@
#ifndef BESTSCORE_H
#define BESTSCORE_H
#include <string>
#include <map>
#include <utility>
#include "DotEnv.h"
namespace platform {
class BestScore {
public:
static std::pair<std::string, double> getScore(const std::string& metric)
{
static std::map<std::pair<std::string, std::string>, std::pair<std::string, double>> data = {
{{"discretiz", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
{{"odte", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
};
auto env = platform::DotEnv();
std::string experiment = env.get("experiment");
try {
return data[{experiment, metric}];
}
catch (...) {
return { "", 0.0 };
}
}
};
}
#endif

View File

@@ -1,22 +0,0 @@
#ifndef LOCALE_H
#define LOCALE_H
#include <locale>
#include <iostream>
#include <string>
namespace platform {
struct separation : std::numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
std::string do_grouping() const { return "\03"; }
};
class ConfigLocale {
public:
explicit ConfigLocale()
{
std::locale mylocale(std::cout.getloc(), new separation);
std::locale::global(mylocale);
std::cout.imbue(mylocale);
}
};
}
#endif

View File

@@ -1,19 +0,0 @@
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
include_directories(${Python3_INCLUDE_DIRS})
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap)
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp)
target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,14 +0,0 @@
#ifndef COLORS_H
#define COLORS_H
class Colors {
public:
static std::string MAGENTA() { return "\033[1;35m"; }
static std::string BLUE() { return "\033[1;34m"; }
static std::string CYAN() { return "\033[1;36m"; }
static std::string GREEN() { return "\033[1;32m"; }
static std::string YELLOW() { return "\033[1;33m"; }
static std::string RED() { return "\033[1;31m"; }
static std::string WHITE() { return "\033[1;37m"; }
static std::string RESET() { return "\033[0m"; }
};
#endif // COLORS_H

View File

@@ -1,87 +0,0 @@
#include "CommandParser.h"
#include <iostream>
#include <sstream>
#include <algorithm>
#include "Colors.h"
#include "Utils.h"
namespace platform {
void CommandParser::messageError(const std::string& message)
{
std::cout << Colors::RED() << message << Colors::RESET() << std::endl;
}
std::pair<char, int> CommandParser::parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex)
{
bool finished = false;
while (!finished) {
std::stringstream oss;
std::string line;
oss << color << "Choose option (";
bool first = true;
for (auto& option : options) {
if (first) {
first = false;
} else {
oss << ", ";
}
oss << std::get<char>(option) << "=" << std::get<std::string>(option);
}
oss << "): ";
std::cout << oss.str();
getline(std::cin, line);
std::cout << Colors::RESET();
line = trim(line);
if (line.size() == 0)
continue;
if (all_of(line.begin(), line.end(), ::isdigit)) {
command = defaultCommand;
index = stoi(line);
if (index > maxIndex || index < 0) {
messageError("Index out of range");
continue;
}
finished = true;
break;
}
bool found = false;
for (auto& option : options) {
if (line[0] == std::get<char>(option)) {
found = true;
// it's a match
line.erase(line.begin());
line = trim(line);
if (std::get<bool>(option)) {
// The option requires a value
if (line.size() == 0) {
messageError("Option " + std::get<std::string>(option) + " requires a value");
break;
}
try {
index = stoi(line);
if (index > maxIndex || index < 0) {
messageError("Index out of range");
break;
}
}
catch (const std::invalid_argument& ia) {
messageError("Invalid value: " + line);
break;
}
} else {
if (line.size() > 0) {
messageError("option " + std::get<std::string>(option) + " doesn't accept values");
break;
}
}
command = std::get<char>(option);
finished = true;
break;
}
}
if (!found) {
messageError("I don't know " + line);
}
}
return { command, index };
}
} /* namespace platform */

View File

@@ -1,20 +0,0 @@
#ifndef COMMAND_PARSER_H
#define COMMAND_PARSER_H
#include <string>
#include <vector>
#include <tuple>
namespace platform {
class CommandParser {
public:
CommandParser() = default;
std::pair<char, int> parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex);
char getCommand() const { return command; };
int getIndex() const { return index; };
private:
void messageError(const std::string& message);
char command;
int index;
};
} /* namespace platform */
#endif /* COMMAND_PARSER_H */

View File

@@ -1,215 +0,0 @@
#include "Dataset.h"
#include "ArffFiles.h"
#include <fstream>
namespace platform {
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{
}
std::string Dataset::getName() const
{
return name;
}
std::string Dataset::getClassName() const
{
return className;
}
std::vector<std::string> Dataset::getFeatures() const
{
if (loaded) {
return features;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNFeatures() const
{
if (loaded) {
return n_features;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNSamples() const
{
if (loaded) {
return n_samples;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
std::map<std::string, std::vector<int>> Dataset::getStates() const
{
if (loaded) {
return states;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<std::vector<std::vector<float>>&, std::vector<int>&> Dataset::getVectors()
{
if (loaded) {
return { Xv, yv };
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<std::vector<std::vector<int>>&, std::vector<int>&> Dataset::getVectorsDiscretized()
{
if (loaded) {
return { Xd, yv };
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
{
if (loaded) {
buildTensors();
return { X, y };
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
void Dataset::load_csv()
{
ifstream file(path + "/" + name + ".csv");
if (file.is_open()) {
std::string line;
getline(file, line);
std::vector<std::string> tokens = split(line, ',');
features = std::vector<std::string>(tokens.begin(), tokens.end() - 1);
if (className == "-1") {
className = tokens.back();
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = split(line, ',');
for (auto i = 0; i < features.size(); ++i) {
Xv[i].push_back(stof(tokens[i]));
}
yv.push_back(stoi(tokens.back()));
}
file.close();
} else {
throw std::invalid_argument("Unable to open dataset file.");
}
}
void Dataset::computeStates()
{
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = std::vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
}
states[className] = std::vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0);
}
void Dataset::load_arff()
{
auto arff = ArffFiles();
arff.load(path + "/" + name + ".arff", className);
// Get Dataset X, y
Xv = arff.getX();
yv = arff.getY();
// Get className & Features
className = arff.getClassName();
auto attributes = arff.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
}
std::vector<std::string> tokenize(std::string line)
{
std::vector<std::string> tokens;
for (auto i = 0; i < line.size(); ++i) {
if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
std::string token = line.substr(0, i);
tokens.push_back(token);
line.erase(line.begin(), line.begin() + i + 1);
i = 0;
while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n')
line.erase(line.begin(), line.begin() + i + 1);
}
}
if (line.size() > 0) {
tokens.push_back(line);
}
return tokens;
}
void Dataset::load_rdata()
{
ifstream file(path + "/" + name + "_R.dat");
if (file.is_open()) {
std::string line;
getline(file, line);
line = ArffFiles::trim(line);
std::vector<std::string> tokens = tokenize(line);
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
if (className == "-1") {
className = ArffFiles::trim(tokens.back());
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = tokenize(line);
// We have to skip the first token, which is the instance number.
for (auto i = 1; i < features.size() + 1; ++i) {
const float value = stof(tokens[i]);
Xv[i - 1].push_back(value);
}
yv.push_back(stoi(tokens.back()));
}
file.close();
} else {
throw std::invalid_argument("Unable to open dataset file.");
}
}
void Dataset::load()
{
if (loaded) {
return;
}
if (fileType == CSV) {
load_csv();
} else if (fileType == ARFF) {
load_arff();
} else if (fileType == RDATA) {
load_rdata();
}
if (discretize) {
Xd = discretizeDataset(Xv, yv);
computeStates();
}
n_samples = Xv[0].size();
n_features = Xv.size();
loaded = true;
}
void Dataset::buildTensors()
{
if (discretize) {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
} else {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
}
for (int i = 0; i < features.size(); ++i) {
if (discretize) {
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
} else {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
}
}
y = torch::tensor(yv, torch::kInt32);
}
std::vector<mdlp::labels_t> Dataset::discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{
std::vector<mdlp::labels_t> Xd;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
Xd.push_back(xd);
}
return Xd;
}
}

View File

@@ -1,78 +0,0 @@
#ifndef DATASET_H
#define DATASET_H
#include <torch/torch.h>
#include <map>
#include <vector>
#include <string>
#include "CPPFImdlp.h"
#include "Utils.h"
namespace platform {
enum fileType_t { CSV, ARFF, RDATA };
class SourceData {
public:
SourceData(std::string source)
{
if (source == "Surcov") {
path = "datasets/";
fileType = CSV;
} else if (source == "Arff") {
path = "datasets/";
fileType = ARFF;
} else if (source == "Tanveer") {
path = "data/";
fileType = RDATA;
} else {
throw std::invalid_argument("Unknown source.");
}
}
std::string getPath()
{
return path;
}
fileType_t getFileType()
{
return fileType;
}
private:
std::string path;
fileType_t fileType;
};
class Dataset {
private:
std::string path;
std::string name;
fileType_t fileType;
std::string className;
int n_samples{ 0 }, n_features{ 0 };
std::vector<std::string> features;
std::map<std::string, std::vector<int>> states;
bool loaded;
bool discretize;
torch::Tensor X, y;
std::vector<std::vector<float>> Xv;
std::vector<std::vector<int>> Xd;
std::vector<int> yv;
void buildTensors();
void load_csv();
void load_arff();
void load_rdata();
void computeStates();
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
public:
Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&);
std::string getName() const;
std::string getClassName() const;
std::vector<string> getFeatures() const;
std::map<std::string, std::vector<int>> getStates() const;
std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
std::pair<vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized();
std::pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures() const;
int getNSamples() const;
void load();
const bool inline isLoaded() const { return loaded; };
};
};
#endif

View File

@@ -1,129 +0,0 @@
#include "Datasets.h"
#include <fstream>
namespace platform {
void Datasets::load()
{
auto sd = SourceData(sfileType);
fileType = sd.getFileType();
path = sd.getPath();
ifstream catalog(path + "all.txt");
if (catalog.is_open()) {
std::string line;
while (getline(catalog, line)) {
if (line.empty() || line[0] == '#') {
continue;
}
std::vector<std::string> tokens = split(line, ',');
std::string name = tokens[0];
std::string className;
if (tokens.size() == 1) {
className = "-1";
} else {
className = tokens[1];
}
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
}
catalog.close();
} else {
throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]");
}
}
std::vector<std::string> Datasets::getNames()
{
std::vector<std::string> result;
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result;
}
std::vector<std::string> Datasets::getFeatures(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getFeatures();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
map<std::string, std::vector<int>> Datasets::getStates(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getStates();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
void Datasets::loadDataset(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return;
} else {
datasets.at(name)->load();
}
}
std::string Datasets::getClassName(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getClassName();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNSamples(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getNSamples();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNClasses(const std::string& name)
{
if (datasets.at(name)->isLoaded()) {
auto className = datasets.at(name)->getClassName();
if (discretize) {
auto states = getStates(name);
return states.at(className).size();
}
auto [Xv, yv] = getVectors(name);
return *std::max_element(yv.begin(), yv.end()) + 1;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
std::vector<int> Datasets::getClassesCounts(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
auto [Xv, yv] = datasets.at(name)->getVectors();
std::vector<int> counts(*std::max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) {
counts[y]++;
}
return counts;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<std::vector<std::vector<float>>&, std::vector<int>&> Datasets::getVectors(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectors();
}
pair<std::vector<std::vector<int>>&, std::vector<int>&> Datasets::getVectorsDiscretized(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectorsDiscretized();
}
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getTensors();
}
bool Datasets::isDataset(const std::string& name) const
{
return datasets.find(name) != datasets.end();
}
}

View File

@@ -1,30 +0,0 @@
#ifndef DATASETS_H
#define DATASETS_H
#include "Dataset.h"
namespace platform {
class Datasets {
private:
std::string path;
fileType_t fileType;
std::string sfileType;
std::map<std::string, std::unique_ptr<Dataset>> datasets;
bool discretize;
void load(); // Loads the list of datasets
public:
explicit Datasets(bool discretize, std::string sfileType) : discretize(discretize), sfileType(sfileType) { load(); };
std::vector<string> getNames();
std::vector<string> getFeatures(const std::string& name) const;
int getNSamples(const std::string& name) const;
std::string getClassName(const std::string& name) const;
int getNClasses(const std::string& name);
std::vector<int> getClassesCounts(const std::string& name) const;
std::map<std::string, std::vector<int>> getStates(const std::string& name) const;
std::pair<std::vector<std::vector<float>>&, std::vector<int>&> getVectors(const std::string& name);
std::pair<std::vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized(const std::string& name);
std::pair<torch::Tensor&, torch::Tensor&> getTensors(const std::string& name);
bool isDataset(const std::string& name) const;
void loadDataset(const std::string& name) const;
};
};
#endif

View File

@@ -1,55 +0,0 @@
#ifndef DOTENV_H
#define DOTENV_H
#include <string>
#include <map>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <iostream>
#include "Utils.h"
//#include "Dataset.h"
namespace platform {
class DotEnv {
private:
std::map<std::string, std::string> env;
public:
DotEnv()
{
std::ifstream file(".env");
if (!file.is_open()) {
std::cerr << "File .env not found" << std::endl;
exit(1);
}
std::string line;
while (std::getline(file, line)) {
line = trim(line);
if (line.empty() || line[0] == '#') {
continue;
}
std::istringstream iss(line);
std::string key, value;
if (std::getline(iss, key, '=') && std::getline(iss, value)) {
env[key] = value;
}
}
}
std::string get(const std::string& key)
{
return env.at(key);
}
std::vector<int> getSeeds()
{
auto seeds = std::vector<int>();
auto seeds_str = env["seeds"];
seeds_str = trim(seeds_str);
seeds_str = seeds_str.substr(1, seeds_str.size() - 2);
auto seeds_str_split = split(seeds_str, ',');
transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) {
return stoi(str);
});
return seeds;
}
};
}
#endif

View File

@@ -1,168 +0,0 @@
#include "ExcelFile.h"
namespace platform {
ExcelFile::ExcelFile()
{
setDefault();
}
ExcelFile::ExcelFile(lxw_workbook* workbook) : workbook(workbook)
{
setDefault();
}
ExcelFile::ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet) : workbook(workbook), worksheet(worksheet)
{
setDefault();
}
void ExcelFile::setDefault()
{
normalSize = 14; //font size for report body
row = 0;
colorTitle = 0xB1A0C7;
colorOdd = 0xDCE6F1;
colorEven = 0xFDE9D9;
}
lxw_workbook* ExcelFile::getWorkbook()
{
return workbook;
}
void ExcelFile::setProperties(std::string title)
{
char line[title.size() + 1];
strcpy(line, title.c_str());
lxw_doc_properties properties = {
.title = line,
.subject = (char*)"Machine learning results",
.author = (char*)"Ricardo Montañana Gómez",
.manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta",
.company = (char*)"UCLM",
.comments = (char*)"Created with libxlsxwriter and c++",
};
workbook_set_properties(workbook, &properties);
}
lxw_format* ExcelFile::efectiveStyle(const std::string& style)
{
lxw_format* efectiveStyle = NULL;
if (style != "") {
std::string suffix = row % 2 ? "_odd" : "_even";
try {
efectiveStyle = styles.at(style + suffix);
}
catch (const std::out_of_range& oor) {
try {
efectiveStyle = styles.at(style);
}
catch (const std::out_of_range& oor) {
throw std::invalid_argument("Style " + style + " not found");
}
}
}
return efectiveStyle;
}
void ExcelFile::writeString(int row, int col, const std::string& text, const std::string& style)
{
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
}
void ExcelFile::writeInt(int row, int col, const int number, const std::string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ExcelFile::writeDouble(int row, int col, const double number, const std::string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ExcelFile::addColor(lxw_format* style, bool odd)
{
uint32_t efectiveColor = odd ? colorEven : colorOdd;
format_set_bg_color(style, lxw_color_t(efectiveColor));
}
void ExcelFile::createStyle(const std::string& name, lxw_format* style, bool odd)
{
addColor(style, odd);
if (name == "textCentered") {
format_set_align(style, LXW_ALIGN_CENTER);
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "text") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "bodyHeader") {
format_set_bold(style);
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_CENTER);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
format_set_bg_color(style, lxw_color_t(colorTitle));
} else if (name == "result") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "0.0000000");
} else if (name == "time") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "#,##0.000000");
} else if (name == "ints") {
format_set_font_size(style, normalSize);
format_set_num_format(style, "###,##0");
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "floats") {
format_set_border(style, LXW_BORDER_THIN);
format_set_font_size(style, normalSize);
format_set_num_format(style, "#,##0.00");
}
}
void ExcelFile::createFormats()
{
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
lxw_format* style;
for (std::string name : styleNames) {
lxw_format* style = workbook_add_format(workbook);
style = workbook_add_format(workbook);
createStyle(name, style, true);
styles[name + "_odd"] = style;
style = workbook_add_format(workbook);
createStyle(name, style, false);
styles[name + "_even"] = style;
}
// Header 1st line
lxw_format* headerFirst = workbook_add_format(workbook);
format_set_bold(headerFirst);
format_set_font_size(headerFirst, 18);
format_set_align(headerFirst, LXW_ALIGN_CENTER);
format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerFirst, LXW_BORDER_THIN);
format_set_bg_color(headerFirst, lxw_color_t(colorTitle));
// Header rest
lxw_format* headerRest = workbook_add_format(workbook);
format_set_bold(headerRest);
format_set_align(headerRest, LXW_ALIGN_CENTER);
format_set_font_size(headerRest, 16);
format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerRest, LXW_BORDER_THIN);
format_set_bg_color(headerRest, lxw_color_t(colorOdd));
// Header small
lxw_format* headerSmall = workbook_add_format(workbook);
format_set_bold(headerSmall);
format_set_align(headerSmall, LXW_ALIGN_LEFT);
format_set_font_size(headerSmall, 12);
format_set_border(headerSmall, LXW_BORDER_THIN);
format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER);
format_set_bg_color(headerSmall, lxw_color_t(colorOdd));
// Summary style
lxw_format* summaryStyle = workbook_add_format(workbook);
format_set_bold(summaryStyle);
format_set_font_size(summaryStyle, 16);
format_set_border(summaryStyle, LXW_BORDER_THIN);
format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER);
styles["headerFirst"] = headerFirst;
styles["headerRest"] = headerRest;
styles["headerSmall"] = headerSmall;
styles["summaryStyle"] = summaryStyle;
}
}

View File

@@ -1,43 +0,0 @@
#ifndef EXCELFILE_H
#define EXCELFILE_H
#include <locale>
#include <string>
#include <map>
#include "xlsxwriter.h"
namespace platform {
struct separated : std::numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
std::string do_grouping() const { return "\03"; }
};
class ExcelFile {
public:
ExcelFile();
ExcelFile(lxw_workbook* workbook);
ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet);
lxw_workbook* getWorkbook();
protected:
void setProperties(std::string title);
void writeString(int row, int col, const std::string& text, const std::string& style = "");
void writeInt(int row, int col, const int number, const std::string& style = "");
void writeDouble(int row, int col, const double number, const std::string& style = "");
void createFormats();
void createStyle(const std::string& name, lxw_format* style, bool odd);
void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const std::string& name);
lxw_workbook* workbook;
lxw_worksheet* worksheet;
std::map<std::string, lxw_format*> styles;
int row;
int normalSize; //font size for report body
uint32_t colorTitle;
uint32_t colorOdd;
uint32_t colorEven;
private:
void setDefault();
};
}
#endif // !EXCELFILE_H

View File

@@ -1,227 +0,0 @@
#include <fstream>
#include "Experiment.h"
#include "Datasets.h"
#include "Models.h"
#include "ReportConsole.h"
#include "Paths.h"
namespace platform {
using json = nlohmann::json;
std::string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
std::string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
std::string Experiment::get_file_name()
{
std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
return result;
}
json Experiment::build_json()
{
json result;
result["title"] = title;
result["date"] = get_date();
result["time"] = get_time();
result["model"] = model;
result["version"] = model_version;
result["platform"] = platform;
result["score_name"] = score_name;
result["language"] = language;
result["language_version"] = language_version;
result["discretized"] = discretized;
result["stratified"] = stratified;
result["folds"] = nfolds;
result["seeds"] = randomSeeds;
result["duration"] = duration;
result["results"] = json::array();
for (const auto& r : results) {
json j;
j["dataset"] = r.getDataset();
j["hyperparameters"] = r.getHyperparameters();
j["samples"] = r.getSamples();
j["features"] = r.getFeatures();
j["classes"] = r.getClasses();
j["score_train"] = r.getScoreTrain();
j["score_test"] = r.getScoreTest();
j["score"] = r.getScoreTest();
j["score_std"] = r.getScoreTestStd();
j["score_train_std"] = r.getScoreTrainStd();
j["score_test_std"] = r.getScoreTestStd();
j["train_time"] = r.getTrainTime();
j["train_time_std"] = r.getTrainTimeStd();
j["test_time"] = r.getTestTime();
j["test_time_std"] = r.getTestTimeStd();
j["time"] = r.getTestTime() + r.getTrainTime();
j["time_std"] = r.getTestTimeStd() + r.getTrainTimeStd();
j["scores_train"] = r.getScoresTrain();
j["scores_test"] = r.getScoresTest();
j["times_train"] = r.getTimesTrain();
j["times_test"] = r.getTimesTest();
j["nodes"] = r.getNodes();
j["leaves"] = r.getLeaves();
j["depth"] = r.getDepth();
result["results"].push_back(j);
}
return result;
}
void Experiment::save(const std::string& path)
{
json data = build_json();
ofstream file(path + "/" + get_file_name());
file << data;
file.close();
}
void Experiment::report()
{
json data = build_json();
ReportConsole report(data);
report.show();
}
void Experiment::show()
{
json data = build_json();
std::cout << data.dump(4) << std::endl;
}
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet)
{
std::cout << "*** Starting experiment: " << title << " ***" << std::endl;
for (auto fileName : filesToProcess) {
std::cout << "- " << setw(20) << left << fileName << " " << right << flush;
cross_validation(fileName, quiet);
std::cout << std::endl;
}
}
std::string getColor(bayesnet::status_t status)
{
switch (status) {
case bayesnet::NORMAL:
return Colors::GREEN();
case bayesnet::WARNING:
return Colors::YELLOW();
case bayesnet::ERROR:
return Colors::RED();
default:
return Colors::RESET();
}
}
void showProgress(int fold, const std::string& color, const std::string& phase)
{
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
}
void Experiment::cross_validation(const std::string& fileName, bool quiet)
{
auto datasets = platform::Datasets(discretized, Paths::datasets());
// Get dataset
auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName);
auto features = datasets.getFeatures(fileName);
auto samples = datasets.getNSamples(fileName);
auto className = datasets.getClassName(fileName);
if (!quiet) {
std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
}
// Prepare Result
auto result = Result();
auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters);
// Initialize results std::vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
auto edges = torch::zeros({ nResults }, torch::kFloat64);
auto num_states = torch::zeros({ nResults }, torch::kFloat64);
Timer train_timer, test_timer;
int item = 0;
for (auto seed : randomSeeds) {
if (!quiet)
std::cout << "(" << seed << ") doing Fold: " << flush;
Fold* fold;
if (stratified)
fold = new StratifiedKFold(nfolds, y, seed);
else
fold = new KFold(nfolds, y.size(0), seed);
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion());
if (hyperparameters.size() != 0) {
clf->setHyperparameters(hyperparameters);
}
// Split train - test dataset
train_timer.start();
auto [train, test] = fold->getFold(nfold);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
auto X_train = X.index({ "...", train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
// Train model
clf->fit(X_train, y_train, features, className, states);
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration();
// Score train
auto accuracy_train_value = clf->score(X_train, y_train);
// Test model
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value;
if (!quiet)
std::cout << "\b\b\b, " << flush;
// Store results and times in std::vector
result.addScoreTrain(accuracy_train_value);
result.addScoreTest(accuracy_test_value);
result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>());
item++;
}
if (!quiet)
std::cout << "end. " << flush;
delete fold;
}
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
result.setDataset(fileName);
addResult(result);
}
}

View File

@@ -1,116 +0,0 @@
#ifndef EXPERIMENT_H
#define EXPERIMENT_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <string>
#include <chrono>
#include "Folding.h"
#include "BaseClassifier.h"
#include "TAN.h"
#include "KDB.h"
#include "AODE.h"
namespace platform {
using json = nlohmann::json;
class Timer {
private:
std::chrono::high_resolution_clock::time_point begin;
public:
Timer() = default;
~Timer() = default;
void start() { begin = std::chrono::high_resolution_clock::now(); }
double getDuration()
{
std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double >> (end - begin);
return time_span.count();
}
};
class Result {
private:
std::string dataset, model_version;
json hyperparameters;
int samples{ 0 }, features{ 0 }, classes{ 0 };
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
std::vector<double> scores_train, scores_test, times_train, times_test;
public:
Result() = default;
Result& setDataset(const std::string& dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; }
Result& setClasses(int classes) { this->classes = classes; return *this; }
Result& setScoreTrain(double score) { this->score_train = score; return *this; }
Result& setScoreTest(double score) { this->score_test = score; return *this; }
Result& setScoreTrainStd(double score_std) { this->score_train_std = score_std; return *this; }
Result& setScoreTestStd(double score_std) { this->score_test_std = score_std; return *this; }
Result& setTrainTime(double train_time) { this->train_time = train_time; return *this; }
Result& setTrainTimeStd(double train_time_std) { this->train_time_std = train_time_std; return *this; }
Result& setTestTime(double test_time) { this->test_time = test_time; return *this; }
Result& setTestTimeStd(double test_time_std) { this->test_time_std = test_time_std; return *this; }
Result& setNodes(float nodes) { this->nodes = nodes; return *this; }
Result& setLeaves(float leaves) { this->leaves = leaves; return *this; }
Result& setDepth(float depth) { this->depth = depth; return *this; }
Result& addScoreTrain(double score) { scores_train.push_back(score); return *this; }
Result& addScoreTest(double score) { scores_test.push_back(score); return *this; }
Result& addTimeTrain(double time) { times_train.push_back(time); return *this; }
Result& addTimeTest(double time) { times_test.push_back(time); return *this; }
const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; }
const std::string& getDataset() const { return dataset; }
const json& getHyperparameters() const { return hyperparameters; }
const int getSamples() const { return samples; }
const int getFeatures() const { return features; }
const int getClasses() const { return classes; }
const double getScoreTrain() const { return score_train; }
const double getScoreTest() const { return score_test; }
const double getScoreTrainStd() const { return score_train_std; }
const double getScoreTestStd() const { return score_test_std; }
const double getTrainTime() const { return train_time; }
const double getTrainTimeStd() const { return train_time_std; }
const double getTestTime() const { return test_time; }
const double getTestTimeStd() const { return test_time_std; }
const float getNodes() const { return nodes; }
const float getLeaves() const { return leaves; }
const float getDepth() const { return depth; }
const std::vector<double>& getScoresTrain() const { return scores_train; }
const std::vector<double>& getScoresTest() const { return scores_test; }
const std::vector<double>& getTimesTrain() const { return times_train; }
const std::vector<double>& getTimesTest() const { return times_test; }
};
class Experiment {
private:
std::string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
std::vector<Result> results;
std::vector<int> randomSeeds;
json hyperparameters = "{}";
int nfolds{ 0 };
float duration{ 0 };
json build_json();
public:
Experiment();
Experiment& setTitle(const std::string& title) { this->title = title; return *this; }
Experiment& setModel(const std::string& model) { this->model = model; return *this; }
Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; }
Experiment& setScoreName(const std::string& score_name) { this->score_name = score_name; return *this; }
Experiment& setModelVersion(const std::string& model_version) { this->model_version = model_version; return *this; }
Experiment& setLanguage(const std::string& language) { this->language = language; return *this; }
Experiment& setLanguageVersion(const std::string& language_version) { this->language_version = language_version; return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
std::string get_file_name();
void save(const std::string& path);
void cross_validation(const std::string& fileName, bool quiet);
void go(std::vector<std::string> filesToProcess, bool quiet);
void show();
void report();
};
}
#endif

View File

@@ -1,104 +0,0 @@
#include "Folding.h"
#include <algorithm>
#include <map>
namespace platform {
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
{
std::random_device rd;
random_seed = std::default_random_engine(seed == -1 ? rd() : seed);
std::srand(seed == -1 ? time(0) : seed);
}
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(std::vector<int>(n))
{
std::iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
shuffle(indices.begin(), indices.end(), random_seed);
}
std::pair<std::vector<int>, std::vector<int>> KFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")");
}
int nTest = n / k;
auto train = std::vector<int>();
auto test = std::vector<int>();
for (int i = 0; i < n; i++) {
if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
test.push_back(indices[i]);
} else {
train.push_back(indices[i]);
}
}
return { train, test };
}
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{
n = y.numel();
this->y = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build();
}
StratifiedKFold::StratifiedKFold(int k, const std::vector<int>& y, int seed)
: Fold(k, y.size(), seed)
{
this->y = y;
n = y.size();
build();
}
void StratifiedKFold::build()
{
stratified_indices = std::vector<std::vector<int>>(k);
int fold_size = n / k;
// Compute class counts and indices
auto class_indices = std::map<int, std::vector<int>>();
std::vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
for (auto i = 0; i < n; ++i) {
class_counts[y[i]]++;
class_indices[y[i]].push_back(i);
}
// Shuffle class indices
for (auto& [cls, indices] : class_indices) {
shuffle(indices.begin(), indices.end(), random_seed);
}
// Assign indices to folds
for (auto label = 0; label < class_counts.size(); ++label) {
auto num_samples_to_take = class_counts.at(label) / k;
if (num_samples_to_take == 0) {
std::cerr << "Warning! The number of samples in class " << label << " (" << class_counts.at(label)
<< ") is less than the number of folds (" << k << ")." << std::endl;
faulty = true;
continue;
}
auto remainder_samples_to_take = class_counts[label] % k;
for (auto fold = 0; fold < k; ++fold) {
auto it = next(class_indices[label].begin(), num_samples_to_take);
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
class_indices[label].erase(class_indices[label].begin(), it);
}
auto chosen = std::vector<bool>(k, false);
while (remainder_samples_to_take > 0) {
int fold = (rand() % static_cast<int>(k));
if (chosen.at(fold)) {
continue;
}
chosen[fold] = true;
auto it = next(class_indices[label].begin(), 1);
stratified_indices[fold].push_back(*class_indices[label].begin());
class_indices[label].erase(class_indices[label].begin(), it);
remainder_samples_to_take--;
}
}
}
std::pair<std::vector<int>, std::vector<int>> StratifiedKFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")");
}
std::vector<int> test_indices = stratified_indices[nFold];
std::vector<int> train_indices;
for (int i = 0; i < k; ++i) {
if (i == nFold) continue;
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());
}
return { train_indices, test_indices };
}
}

View File

@@ -1,39 +0,0 @@
#ifndef FOLDING_H
#define FOLDING_H
#include <torch/torch.h>
#include <vector>
#include <random>
namespace platform {
class Fold {
protected:
int k;
int n;
int seed;
std::default_random_engine random_seed;
public:
Fold(int k, int n, int seed = -1);
virtual std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default;
int getNumberOfFolds() { return k; }
};
class KFold : public Fold {
private:
std::vector<int> indices;
public:
KFold(int k, int n, int seed = -1);
std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) override;
};
class StratifiedKFold : public Fold {
private:
std::vector<int> y;
std::vector<std::vector<int>> stratified_indices;
void build();
bool faulty = false; // Only true if the number of samples of any class is less than the number of folds.
public:
StratifiedKFold(int k, const std::vector<int>& y, int seed = -1);
StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) override;
bool isFaulty() { return faulty; }
};
}
#endif

View File

@@ -1,213 +0,0 @@
#include "ManageResults.h"
#include "CommandParser.h"
#include <filesystem>
#include <tuple>
#include "Colors.h"
#include "CLocale.h"
#include "Paths.h"
#include "ReportConsole.h"
#include "ReportExcel.h"
namespace platform {
ManageResults::ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare) :
numFiles{ numFiles }, complete{ complete }, partial{ partial }, compare{ compare }, results(Results(Paths::results(), model, score, complete, partial))
{
indexList = true;
openExcel = false;
workbook = NULL;
if (numFiles == 0) {
this->numFiles = results.size();
}
}
void ManageResults::doMenu()
{
if (results.empty()) {
std::cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl;
return;
}
results.sortDate();
list();
menu();
if (openExcel) {
workbook_close(workbook);
}
std::cout << Colors::RESET() << "Done!" << std::endl;
}
void ManageResults::list()
{
auto temp = ConfigLocale();
std::string suffix = numFiles != results.size() ? " of " + std::to_string(results.size()) : "";
std::stringstream oss;
oss << "Results on screen: " << numFiles << suffix;
std::cout << Colors::GREEN() << oss.str() << std::endl;
std::cout << std::string(oss.str().size(), '-') << std::endl;
if (complete) {
std::cout << Colors::MAGENTA() << "Only listing complete results" << std::endl;
}
if (partial) {
std::cout << Colors::MAGENTA() << "Only listing partial results" << std::endl;
}
auto i = 0;
int maxModel = results.maxModelSize();
std::cout << Colors::GREEN() << " # Date " << std::setw(maxModel) << std::left << "Model" << " Score Name Score C/P Duration Title" << std::endl;
std::cout << "=== ========== " << std::string(maxModel, '=') << " =========== =========== === ========= =============================================================" << std::endl;
bool odd = true;
for (auto& result : results) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
std::cout << result.to_string(maxModel) << std::endl;
if (i == numFiles) {
break;
}
odd = !odd;
}
}
bool ManageResults::confirmAction(const std::string& intent, const std::string& fileName) const
{
std::string color;
if (intent == "delete") {
color = Colors::RED();
} else {
color = Colors::YELLOW();
}
std::string line;
bool finished = false;
while (!finished) {
std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): ";
getline(std::cin, line);
finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n'));
}
if (tolower(line[0]) == 'y') {
return true;
}
std::cout << "Not done!" << std::endl;
return false;
}
void ManageResults::report(const int index, const bool excelReport)
{
std::cout << Colors::YELLOW() << "Reporting " << results.at(index).getFilename() << std::endl;
auto data = results.at(index).load();
if (excelReport) {
ReportExcel reporter(data, compare, workbook);
reporter.show();
openExcel = true;
workbook = reporter.getWorkbook();
std::cout << "Adding sheet to " << Paths::excel() + Paths::excelResults() << std::endl;
} else {
ReportConsole reporter(data, compare);
reporter.show();
}
}
void ManageResults::showIndex(const int index, const int idx)
{
// Show a dataset result inside a report
auto data = results.at(index).load();
std::cout << Colors::YELLOW() << "Showing " << results.at(index).getFilename() << std::endl;
ReportConsole reporter(data, compare, idx);
reporter.show();
}
void ManageResults::sortList()
{
std::cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): ";
std::string line;
char option;
getline(std::cin, line);
if (line.size() == 0)
return;
if (line.size() > 1) {
std::cout << "Invalid option" << std::endl;
return;
}
option = line[0];
switch (option) {
case 'd':
results.sortDate();
break;
case 's':
results.sortScore();
break;
case 'u':
results.sortDuration();
break;
case 'm':
results.sortModel();
break;
default:
std::cout << "Invalid option" << std::endl;
}
}
void ManageResults::menu()
{
char option;
int index, subIndex;
bool finished = false;
std::string filename;
// tuple<Option, digit, requires value>
std::vector<std::tuple<std::string, char, bool>> mainOptions = {
{"quit", 'q', false},
{"list", 'l', false},
{"delete", 'd', true},
{"hide", 'h', true},
{"sort", 's', false},
{"report", 'r', true},
{"excel", 'e', true}
};
std::vector<std::tuple<std::string, char, bool>> listOptions = {
{"report", 'r', true},
{"list", 'l', false},
{"quit", 'q', false}
};
auto parser = CommandParser();
while (!finished) {
if (indexList) {
std::tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1);
} else {
std::tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).load()["results"].size() - 1);
}
switch (option) {
case 'q':
finished = true;
break;
case 'l':
list();
indexList = true;
break;
case 'd':
filename = results.at(index).getFilename();
if (!confirmAction("delete", filename))
break;
std::cout << "Deleting " << filename << std::endl;
results.deleteResult(index);
std::cout << "File: " + filename + " deleted!" << std::endl;
list();
break;
case 'h':
filename = results.at(index).getFilename();
if (!confirmAction("hide", filename))
break;
filename = results.at(index).getFilename();
std::cout << "Hiding " << filename << std::endl;
results.hideResult(index, Paths::hiddenResults());
std::cout << "File: " + filename + " hidden! (moved to " << Paths::hiddenResults() << ")" << std::endl;
list();
break;
case 's':
sortList();
list();
break;
case 'r':
if (indexList) {
report(index, false);
indexList = false;
} else {
showIndex(index, subIndex);
}
break;
case 'e':
report(index, true);
break;
}
}
}
} /* namespace platform */

View File

@@ -1,31 +0,0 @@
#ifndef MANAGE_RESULTS_H
#define MANAGE_RESULTS_H
#include "Results.h"
#include "xlsxwriter.h"
namespace platform {
class ManageResults {
public:
ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare);
~ManageResults() = default;
void doMenu();
private:
void list();
bool confirmAction(const std::string& intent, const std::string& fileName) const;
void report(const int index, const bool excelReport);
void showIndex(const int index, const int idx);
void sortList();
void menu();
int numFiles;
bool indexList;
bool openExcel;
bool complete;
bool partial;
bool compare;
Results results;
lxw_workbook* workbook;
};
}
#endif /* MANAGE_RESULTS_H */

View File

@@ -1,52 +0,0 @@
#include "Models.h"
namespace platform {
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
Models* Models::factory = nullptr;;
Models* Models::instance()
{
//manages singleton
if (factory == nullptr)
factory = new Models();
return factory;
}
void Models::registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
functionRegistry[name] = classFactoryFunction;
}
shared_ptr<bayesnet::BaseClassifier> Models::create(const std::string& name)
{
bayesnet::BaseClassifier* instance = nullptr;
// find name in the registry and call factory method.
auto it = functionRegistry.find(name);
if (it != functionRegistry.end())
instance = it->second();
// wrap instance in a shared ptr and return
if (instance != nullptr)
return unique_ptr<bayesnet::BaseClassifier>(instance);
else
return nullptr;
}
std::vector<std::string> Models::getNames()
{
std::vector<std::string> names;
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
[](const pair<std::string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
return names;
}
std::string Models::tostring()
{
std::string result = "";
for (const auto& pair : functionRegistry) {
result += pair.first + ", ";
}
return "{" + result.substr(0, result.size() - 2) + "}";
}
Registrar::Registrar(const std::string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
Models::instance()->registerFactoryFunction(name, classFactoryFunction);
}
}

View File

@@ -1,41 +0,0 @@
#ifndef MODELS_H
#define MODELS_H
#include <map>
#include "BaseClassifier.h"
#include "AODE.h"
#include "TAN.h"
#include "KDB.h"
#include "SPODE.h"
#include "TANLd.h"
#include "KDBLd.h"
#include "SPODELd.h"
#include "AODELd.h"
#include "BoostAODE.h"
#include "STree.h"
#include "ODTE.h"
#include "SVC.h"
#include "RandomForest.h"
namespace platform {
class Models {
private:
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
static Models* factory; //singleton
Models() {};
public:
Models(Models&) = delete;
void operator=(const Models&) = delete;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
static Models* instance();
shared_ptr<bayesnet::BaseClassifier> create(const std::string& name);
void registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
std::vector<string> getNames();
std::string tostring();
};
class Registrar {
public:
Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
};
}
#endif

View File

@@ -1,20 +0,0 @@
#ifndef PATHS_H
#define PATHS_H
#include <string>
#include "DotEnv.h"
namespace platform {
class Paths {
public:
static std::string results() { return "results/"; }
static std::string hiddenResults() { return "hidden_results/"; }
static std::string excel() { return "excel/"; }
static std::string cfs() { return "cfs/"; }
static std::string datasets()
{
auto env = platform::DotEnv();
return env.get("source_data");
}
static std::string excelResults() { return "some_results.xlsx"; }
};
}
#endif

View File

@@ -1,113 +0,0 @@
#include <sstream>
#include <locale>
#include "Datasets.h"
#include "ReportBase.h"
#include "DotEnv.h"
namespace platform {
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
{
std::stringstream oss;
oss << "Better than ZeroR + " << std::setprecision(1) << fixed << margin * 100 << "%";
meaning = {
{Symbols::equal_best, "Equal to best"},
{Symbols::better_best, "Better than best"},
{Symbols::cross, "Less than or equal to ZeroR"},
{Symbols::upward_arrow, oss.str()}
};
}
std::string ReportBase::fromVector(const std::string& key)
{
std::stringstream oss;
std::string sep = "";
oss << "[";
for (auto& item : data[key]) {
oss << sep << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
std::string ReportBase::fVector(const std::string& title, const json& data, const int width, const int precision)
{
std::stringstream oss;
std::string sep = "";
oss << title << "[";
for (const auto& item : data) {
oss << sep << fixed << setw(width) << std::setprecision(precision) << item.get<double>();
sep = ", ";
}
oss << "]";
return oss.str();
}
void ReportBase::show()
{
header();
body();
}
std::string ReportBase::compareResult(const std::string& dataset, double result)
{
std::string status = " ";
if (compare) {
double best = bestResult(dataset, data["model"].get<std::string>());
if (result == best) {
status = Symbols::equal_best;
} else if (result > best) {
status = Symbols::better_best;
}
} else {
if (data["score_name"].get<std::string>() == "accuracy") {
auto dt = Datasets(false, Paths::datasets());
dt.loadDataset(dataset);
auto numClasses = dt.getNClasses(dataset);
if (numClasses == 2) {
std::vector<int> distribution = dt.getClassesCounts(dataset);
double nSamples = dt.getNSamples(dataset);
std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin);
if (mark > 1) {
mark = 0.9995;
}
status = result < mark ? Symbols::cross : result > mark ? Symbols::upward_arrow : "=";
}
}
}
if (status != " ") {
auto item = summary.find(status);
if (item != summary.end()) {
summary[status]++;
} else {
summary[status] = 1;
}
}
return status;
}
double ReportBase::bestResult(const std::string& dataset, const std::string& model)
{
double value = 0.0;
if (bestResults.size() == 0) {
// try to load the best results
std::string score = data["score_name"];
replace(score.begin(), score.end(), '_', '-');
std::string fileName = "best_results_" + score + "_" + model + ".json";
ifstream resultData(Paths::results() + "/" + fileName);
if (resultData.is_open()) {
bestResults = json::parse(resultData);
} else {
existBestFile = false;
}
}
try {
value = bestResults.at(dataset).at(0);
}
catch (exception) {
value = 1.0;
}
return value;
}
bool ReportBase::getExistBestFile()
{
return existBestFile;
}
}

View File

@@ -1,36 +0,0 @@
#ifndef REPORTBASE_H
#define REPORTBASE_H
#include <string>
#include <iostream>
#include "Paths.h"
#include "Symbols.h"
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
class ReportBase {
public:
explicit ReportBase(json data_, bool compare);
virtual ~ReportBase() = default;
void show();
protected:
json data;
std::string fromVector(const std::string& key);
std::string fVector(const std::string& title, const json& data, const int width, const int precision);
bool getExistBestFile();
virtual void header() = 0;
virtual void body() = 0;
virtual void showSummary() = 0;
std::string compareResult(const std::string& dataset, double result);
std::map<std::string, int> summary;
double margin;
std::map<std::string, std::string> meaning;
bool compare;
private:
double bestResult(const std::string& dataset, const std::string& model);
json bestResults;
bool existBestFile = true;
};
};
#endif

View File

@@ -1,114 +0,0 @@
#include <iostream>
#include <sstream>
#include <locale>
#include "ReportConsole.h"
#include "BestScore.h"
#include "CLocale.h"
namespace platform {
std::string ReportConsole::headerLine(const std::string& text, int utf = 0)
{
int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n;
return "* " + text + std::string(n + utf, ' ') + "*\n";
}
void ReportConsole::header()
{
std::stringstream oss;
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
std::cout << headerLine(
"Report " + data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>()
+ " with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size())
+ " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
);
std::cout << headerLine(data["title"].get<std::string>());
std::cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
oss << "Execution took " << std::setprecision(2) << std::fixed << data["duration"].get<float>()
<< " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<std::string>();
std::cout << headerLine(oss.str());
std::cout << headerLine("Score is " + data["score_name"].get<std::string>());
std::cout << std::string(MAXL, '*') << std::endl;
std::cout << std::endl;
}
void ReportConsole::body()
{
auto tmp = ConfigLocale();
int maxHyper = 15;
int maxDataset = 7;
for (const auto& r : data["results"]) {
maxHyper = std::max(maxHyper, (int)r["hyperparameters"].dump().size());
maxDataset = std::max(maxDataset, (int)r["dataset"].get<std::string>().size());
}
std::cout << Colors::GREEN() << " # " << std::setw(maxDataset) << std::left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << std::endl;
std::cout << "=== " << std::string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << std::string(maxHyper, '=') << std::endl;
json lastResult;
double totalScore = 0.0;
bool odd = true;
int index = 0;
for (const auto& r : data["results"]) {
if (selectedIndex != -1 && index != selectedIndex) {
index++;
continue;
}
auto color = odd ? Colors::CYAN() : Colors::BLUE();
std::cout << color;
std::cout << std::setw(3) << std::right << index++ << " ";
std::cout << std::setw(maxDataset) << std::left << r["dataset"].get<std::string>() << " ";
std::cout << std::setw(6) << std::right << r["samples"].get<int>() << " ";
std::cout << std::setw(5) << std::right << r["features"].get<int>() << " ";
std::cout << std::setw(3) << std::right << r["classes"].get<int>() << " ";
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["nodes"].get<float>() << " ";
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["leaves"].get<float>() << " ";
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["depth"].get<float>() << " ";
std::cout << std::setw(8) << std::right << std::setprecision(6) << std::fixed << r["score"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["score_std"].get<double>();
const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
std::cout << status;
std::cout << std::setw(12) << std::right << std::setprecision(6) << std::fixed << r["time"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["time_std"].get<double>() << " ";
std::cout << r["hyperparameters"].dump();
std::cout << std::endl;
std::cout << std::flush;
lastResult = r;
totalScore += r["score"].get<double>();
odd = !odd;
}
if (data["results"].size() == 1 || selectedIndex != -1) {
std::cout << std::string(MAXL, '*') << std::endl;
std::cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
std::cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
std::cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
std::cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
std::cout << std::string(MAXL, '*') << std::endl;
} else {
footer(totalScore);
}
}
void ReportConsole::showSummary()
{
for (const auto& item : summary) {
std::stringstream oss;
oss << std::setw(3) << std::left << item.first;
oss << std::setw(3) << std::right << item.second << " ";
oss << std::left << meaning.at(item.first);
std::cout << headerLine(oss.str(), 2);
}
}
void ReportConsole::footer(double totalScore)
{
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
showSummary();
auto score = data["score_name"].get<std::string>();
auto best = BestScore::getScore(score);
if (best.first != "") {
std::stringstream oss;
oss << score << " compared to " << best.first << " .: " << totalScore / best.second;
std::cout << headerLine(oss.str());
}
if (!getExistBestFile() && compare) {
std::cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
}
std::cout << std::string(MAXL, '*') << std::endl << Colors::RESET();
}
}

View File

@@ -1,22 +0,0 @@
#ifndef REPORTCONSOLE_H
#define REPORTCONSOLE_H
#include <string>
#include "ReportBase.h"
#include "Colors.h"
namespace platform {
const int MAXL = 133;
class ReportConsole : public ReportBase {
public:
explicit ReportConsole(json data_, bool compare = false, int index = -1) : ReportBase(data_, compare), selectedIndex(index) {};
virtual ~ReportConsole() = default;
private:
int selectedIndex;
std::string headerLine(const std::string& text, int utf);
void header() override;
void body() override;
void footer(double totalScore);
void showSummary() override;
};
};
#endif

View File

@@ -1,180 +0,0 @@
#include <sstream>
#include <locale>
#include "ReportExcel.h"
#include "BestScore.h"
namespace platform {
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet) : ReportBase(data_, compare), ExcelFile(workbook, worksheet)
{
createFile();
}
void ReportExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 6, 1);
std::vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void ReportExcel::createWorksheet()
{
const std::string name = data["model"].get<std::string>();
std::string suffix = "";
std::string efectiveName;
int num = 1;
// Create a sheet with the name of the model
while (true) {
efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = std::to_string(++num);
} else {
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break;
}
if (num > 100) {
throw std::invalid_argument("Couldn't create sheet " + efectiveName);
}
}
}
void ReportExcel::createFile()
{
if (workbook == NULL) {
workbook = workbook_new((Paths::excel() + Paths::excelResults()).c_str());
}
if (worksheet == NULL) {
createWorksheet();
}
setProperties(data["title"].get<std::string>());
createFormats();
formatColumns();
}
void ReportExcel::closeFile()
{
workbook_close(workbook);
}
void ReportExcel::header()
{
std::locale mylocale(std::cout.getloc(), new separated);
std::locale::global(mylocale);
std::cout.imbue(mylocale);
std::stringstream oss;
std::string message = data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>() + " " +
data["language"].get<std::string>() + " ver. " + data["language_version"].get<std::string>() +
" with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>();
worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<std::string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<std::string>()).c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() << " s";
worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
oss.str("");
oss.clear();
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() / 3600 << " h";
worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<std::string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Stratified: " << (data["stratified"].get<bool>() ? "True" : "False");
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False");
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
}
void ReportExcel::body()
{
auto head = std::vector<std::string>(
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time",
"Time Std.", "Hyperparameters" });
int col = 0;
for (const auto& item : head) {
writeString(5, col++, item, "bodyHeader");
}
row = 6;
col = 0;
int hypSize = 22;
json lastResult;
double totalScore = 0.0;
std::string hyperparameters;
for (const auto& r : data["results"]) {
writeString(row, col, r["dataset"].get<std::string>(), "text");
writeInt(row, col + 1, r["samples"].get<int>(), "ints");
writeInt(row, col + 2, r["features"].get<int>(), "ints");
writeInt(row, col + 3, r["classes"].get<int>(), "ints");
writeDouble(row, col + 4, r["nodes"].get<float>(), "floats");
writeDouble(row, col + 5, r["leaves"].get<float>(), "floats");
writeDouble(row, col + 6, r["depth"].get<double>(), "floats");
writeDouble(row, col + 7, r["score"].get<double>(), "result");
writeDouble(row, col + 8, r["score_std"].get<double>(), "result");
const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
writeString(row, col + 9, status, "textCentered");
writeDouble(row, col + 10, r["time"].get<double>(), "time");
writeDouble(row, col + 11, r["time_std"].get<double>(), "time");
hyperparameters = r["hyperparameters"].dump();
if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, col + 12, hyperparameters, "text");
lastResult = r;
totalScore += r["score"].get<double>();
row++;
}
// Set the right column width of hyperparameters with the maximum length
worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL);
// Show totals if only one dataset is present in the result
if (data["results"].size() == 1) {
for (const std::string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
row++;
col = 1;
writeString(row, col, group, "text");
for (double item : lastResult[group]) {
std::string style = group.find("scores") != std::string::npos ? "result" : "time";
writeDouble(row, ++col, item, style);
}
}
// Set with of columns to show those totals completely
worksheet_set_column(worksheet, 1, 1, 12, NULL);
for (int i = 2; i < 7; ++i) {
// doesn't work with from col to col, so...
worksheet_set_column(worksheet, i, i, 15, NULL);
}
} else {
footer(totalScore, row);
}
}
void ReportExcel::showSummary()
{
for (const auto& item : summary) {
worksheet_write_string(worksheet, row + 2, 1, item.first.c_str(), styles["summaryStyle"]);
worksheet_write_number(worksheet, row + 2, 2, item.second, styles["summaryStyle"]);
worksheet_merge_range(worksheet, row + 2, 3, row + 2, 5, meaning.at(item.first).c_str(), styles["summaryStyle"]);
row += 1;
}
}
void ReportExcel::footer(double totalScore, int row)
{
showSummary();
row += 4 + summary.size();
auto score = data["score_name"].get<std::string>();
auto best = BestScore::getScore(score);
if (best.first != "") {
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + best.first + " .:").c_str(), efectiveStyle("text"));
writeDouble(row, 6, totalScore / best.second, "result");
}
if (!getExistBestFile() && compare) {
worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]);
}
}
}

View File

@@ -1,24 +0,0 @@
#ifndef REPORTEXCEL_H
#define REPORTEXCEL_H
#include<map>
#include "xlsxwriter.h"
#include "ReportBase.h"
#include "ExcelFile.h"
#include "Colors.h"
namespace platform {
class ReportExcel : public ReportBase, public ExcelFile {
public:
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL);
private:
void formatColumns();
void createFile();
void createWorksheet();
void closeFile();
void header() override;
void body() override;
void showSummary() override;
void footer(double totalScore, int row);
};
};
#endif // !REPORTEXCEL_H

View File

@@ -1,58 +0,0 @@
#include "Result.h"
#include "BestScore.h"
#include <filesystem>
#include <fstream>
#include <sstream>
#include "Colors.h"
#include "DotEnv.h"
#include "CLocale.h"
namespace platform {
Result::Result(const std::string& path, const std::string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
auto best = BestScore::getScore(scoreName);
if (best.first != "") {
score /= best.second;
}
title = data["title"];
duration = data["duration"];
model = data["model"];
complete = data["results"].size() > 1;
}
json Result::load() const
{
std::ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw std::invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
std::string Result::to_string(int maxModel) const
{
auto tmp = ConfigLocale();
std::stringstream oss;
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
oss << date << " ";
oss << std::setw(maxModel) << std::left << model << " ";
oss << std::setw(11) << std::left << scoreName << " ";
oss << std::right << std::setw(11) << std::setprecision(7) << std::fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << std::setw(1) << " " << completeString << " ";
oss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
oss << std::setw(50) << std::left << title << " ";
return oss.str();
}
}

View File

@@ -1,36 +0,0 @@
#ifndef RESULT_H
#define RESULT_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
class Result {
public:
Result(const std::string& path, const std::string& filename);
json load() const;
std::string to_string(int maxModel) const;
std::string getFilename() const { return filename; };
std::string getDate() const { return date; };
double getScore() const { return score; };
std::string getTitle() const { return title; };
double getDuration() const { return duration; };
std::string getModel() const { return model; };
std::string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; };
private:
std::string path;
std::string filename;
std::string date;
double score;
std::string title;
double duration;
std::string model;
std::string scoreName;
bool complete;
};
};
#endif

View File

@@ -1,74 +0,0 @@
#include "Results.h"
#include <algorithm>
namespace platform {
Results::Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial) :
path(path), model(model), scoreName(score), complete(complete), partial(partial)
{
load();
if (!files.empty()) {
maxModel = (*max_element(files.begin(), files.end(), [](const Result& a, const Result& b) { return a.getModel().size() < b.getModel().size(); })).getModel().size();
} else {
maxModel = 0;
}
};
void Results::load()
{
using std::filesystem::directory_iterator;
for (const auto& file : directory_iterator(path)) {
auto filename = file.path().filename().string();
if (filename.find(".json") != std::string::npos && filename.find("results_") == 0) {
auto result = Result(path, filename);
bool addResult = true;
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete())
addResult = false;
if (addResult)
files.push_back(result);
}
}
}
void Results::hideResult(int index, const std::string& pathHidden)
{
auto filename = files.at(index).getFilename();
rename((path + "/" + filename).c_str(), (pathHidden + "/" + filename).c_str());
files.erase(files.begin() + index);
}
void Results::deleteResult(int index)
{
auto filename = files.at(index).getFilename();
remove((path + "/" + filename).c_str());
files.erase(files.begin() + index);
}
int Results::size() const
{
return files.size();
}
void Results::sortDate()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDate() > b.getDate();
});
}
void Results::sortModel()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getModel() > b.getModel();
});
}
void Results::sortDuration()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getDuration() > b.getDuration();
});
}
void Results::sortScore()
{
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
return a.getScore() > b.getScore();
});
}
bool Results::empty() const
{
return files.empty();
}
}

View File

@@ -1,38 +0,0 @@
#ifndef RESULTS_H
#define RESULTS_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
#include "Result.h"
namespace platform {
using json = nlohmann::json;
class Results {
public:
Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial);
void sortDate();
void sortScore();
void sortModel();
void sortDuration();
int maxModelSize() const { return maxModel; };
void hideResult(int index, const std::string& pathHidden);
void deleteResult(int index);
int size() const;
bool empty() const;
std::vector<Result>::iterator begin() { return files.begin(); };
std::vector<Result>::iterator end() { return files.end(); };
Result& at(int index) { return files.at(index); };
private:
std::string path;
std::string model;
std::string scoreName;
bool complete;
bool partial;
int maxModel;
std::vector<Result> files;
void load(); // Loads the list of results
};
};
#endif

View File

@@ -1,252 +0,0 @@
#include <sstream>
#include "Statistics.h"
#include "Colors.h"
#include "Symbols.h"
#include <boost/math/distributions/chi_squared.hpp>
#include <boost/math/distributions/normal.hpp>
#include "CLocale.h"
namespace platform {
Statistics::Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance, bool output) :
models(models), datasets(datasets), data(data), significance(significance), output(output)
{
nModels = models.size();
nDatasets = datasets.size();
auto temp = ConfigLocale();
};
void Statistics::fit()
{
if (nModels < 3 || nDatasets < 3) {
std::cerr << "nModels: " << nModels << std::endl;
std::cerr << "nDatasets: " << nDatasets << std::endl;
throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
}
ranksModels.clear();
computeRanks();
// Set the control model as the one with the lowest average rank
controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
computeWTL();
maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
fitted = true;
}
std::map<std::string, float> assignRanks(std::vector<std::pair<std::string, double>>& ranksOrder)
{
// sort the ranksOrder std::vector by value
std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair<std::string, double>& a, const std::pair<std::string, double>& b) {
return a.second > b.second;
});
//Assign ranks to values and if they are the same they share the same averaged rank
std::map<std::string, float> ranks;
for (int i = 0; i < ranksOrder.size(); i++) {
ranks[ranksOrder[i].first] = i + 1.0;
}
int i = 0;
while (i < static_cast<int>(ranksOrder.size())) {
int j = i + 1;
int sumRanks = ranks[ranksOrder[i].first];
while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
sumRanks += ranks[ranksOrder[j++].first];
}
if (j > i + 1) {
float averageRank = (float)sumRanks / (j - i);
for (int k = i; k < j; k++) {
ranks[ranksOrder[k].first] = averageRank;
}
}
i = j;
}
return ranks;
}
void Statistics::computeRanks()
{
std::map<std::string, float> ranksLine;
for (const auto& dataset : datasets) {
std::vector<std::pair<std::string, double>> ranksOrder;
for (const auto& model : models) {
double value = data[model].at(dataset).at(0).get<double>();
ranksOrder.push_back({ model, value });
}
// Assign the ranks
ranksLine = assignRanks(ranksOrder);
// Store the ranks of the dataset
ranksModels[dataset] = ranksLine;
if (ranks.size() == 0) {
ranks = ranksLine;
} else {
for (const auto& rank : ranksLine) {
ranks[rank.first] += rank.second;
}
}
}
// Average the ranks
for (const auto& rank : ranks) {
ranks[rank.first] /= nDatasets;
}
}
void Statistics::computeWTL()
{
// Compute the WTL matrix
for (int i = 0; i < nModels; ++i) {
wtl[i] = { 0, 0, 0 };
}
json origin = data.begin().value();
for (auto const& item : origin.items()) {
auto controlModel = models.at(controlIdx);
double controlValue = data[controlModel].at(item.key()).at(0).get<double>();
for (int i = 0; i < nModels; ++i) {
if (i == controlIdx) {
continue;
}
double value = data[models[i]].at(item.key()).at(0).get<double>();
if (value < controlValue) {
wtl[i].win++;
} else if (value == controlValue) {
wtl[i].tie++;
} else {
wtl[i].loss++;
}
}
}
}
void Statistics::postHocHolmTest(bool friedmanResult)
{
if (!fitted) {
fit();
}
std::stringstream oss;
// Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
// Post-hoc Holm test
// Calculate the p-value for the models paired with the control model
std::map<int, double> stats; // p-value of each model paired with the control model
boost::math::normal dist(0.0, 1.0);
double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
for (int i = 0; i < nModels; i++) {
if (i == controlIdx) {
stats[i] = 0.0;
continue;
}
double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
double p_value = (long double)2 * (1 - cdf(dist, z));
stats[i] = p_value;
}
// Sort the models by p-value
std::vector<std::pair<int, double>> statsOrder;
for (const auto& stat : stats) {
statsOrder.push_back({ stat.first, stat.second });
}
std::sort(statsOrder.begin(), statsOrder.end(), [](const std::pair<int, double>& a, const std::pair<int, double>& b) {
return a.second < b.second;
});
// Holm adjustment
for (int i = 0; i < statsOrder.size(); ++i) {
auto item = statsOrder.at(i);
double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
double p_value = std::min((double)1.0, item.second * (nModels - i));
p_value = std::max(before, p_value);
statsOrder[i] = { item.first, p_value };
}
holmResult.model = models.at(controlIdx);
auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
oss << color;
oss << " *************************************************************************************************************" << std::endl;
oss << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl;
oss << " Control model: " << models.at(controlIdx) << std::endl;
oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl;
oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl;
// sort ranks from lowest to highest
std::vector<std::pair<std::string, float>> ranksOrder;
for (const auto& rank : ranks) {
ranksOrder.push_back({ rank.first, rank.second });
}
std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair<std::string, float>& a, const std::pair<std::string, float>& b) {
return a.second < b.second;
});
// Show the control model info.
oss << " " << Colors::BLUE() << std::left << std::setw(maxModelName) << ranksOrder.at(0).first << " ";
oss << std::setw(12) << " " << std::setprecision(7) << std::fixed << " " << ranksOrder.at(0).second << std::endl;
for (const auto& item : ranksOrder) {
auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
double pvalue = 0.0;
for (const auto& stat : statsOrder) {
if (stat.first == idx) {
pvalue = stat.second;
}
}
holmResult.holmLines.push_back({ item.first, pvalue, item.second, wtl.at(idx), pvalue < significance });
if (item.first == models.at(controlIdx)) {
continue;
}
auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
oss << " " << colorStatus << std::left << std::setw(maxModelName) << item.first << " ";
oss << std::setprecision(6) << std::scientific << pvalue << std::setprecision(7) << std::fixed << " " << item.second;
oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss;
oss << " " << status << textStatus << std::endl;
}
oss << color << " *************************************************************************************************************" << std::endl;
oss << Colors::RESET();
if (output) {
std::cout << oss.str();
}
}
bool Statistics::friedmanTest()
{
if (!fitted) {
fit();
}
std::stringstream oss;
// Friedman test
// Calculate the Friedman statistic
oss << Colors::BLUE() << std::endl;
oss << "***************************************************************************************************************" << std::endl;
oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl;
double degreesOfFreedom = nModels - 1.0;
double sumSquared = 0;
for (const auto& rank : ranks) {
sumSquared += pow(rank.second, 2);
}
// Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
// Calculate the critical value
boost::math::chi_squared chiSquared(degreesOfFreedom);
long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
double criticalValue = quantile(chiSquared, 1 - significance);
oss << "Friedman statistic: " << friedmanQ << std::endl;
oss << "Critical χ2 Value for df=" << std::fixed << (int)degreesOfFreedom
<< " and alpha=" << std::setprecision(2) << std::fixed << significance << ": " << std::setprecision(7) << std::scientific << criticalValue << std::endl;
oss << "p-value: " << std::scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << std::setprecision(2) << std::fixed << significance << std::endl;
bool result;
if (p_value < significance) {
oss << Colors::GREEN() << "The null hypothesis H0 is rejected." << std::endl;
result = true;
} else {
oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl;
result = false;
}
oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << std::endl;
if (output) {
std::cout << oss.str();
}
friedmanResult = { friedmanQ, criticalValue, p_value, result };
return result;
}
FriedmanResult& Statistics::getFriedmanResult()
{
return friedmanResult;
}
HolmResult& Statistics::getHolmResult()
{
return holmResult;
}
std::map<std::string, std::map<std::string, float>>& Statistics::getRanks()
{
return ranksModels;
}
} // namespace platform

View File

@@ -1,63 +0,0 @@
#ifndef STATISTICS_H
#define STATISTICS_H
#include <iostream>
#include <vector>
#include <map>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
struct WTL {
int win;
int tie;
int loss;
};
struct FriedmanResult {
double statistic;
double criticalValue;
long double pvalue;
bool reject;
};
struct HolmLine {
std::string model;
long double pvalue;
double rank;
WTL wtl;
bool reject;
};
struct HolmResult {
std::string model;
std::vector<HolmLine> holmLines;
};
class Statistics {
public:
Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true);
bool friedmanTest();
void postHocHolmTest(bool friedmanResult);
FriedmanResult& getFriedmanResult();
HolmResult& getHolmResult();
std::map<std::string, std::map<std::string, float>>& getRanks();
private:
void fit();
void computeRanks();
void computeWTL();
const std::vector<std::string>& models;
const std::vector<std::string>& datasets;
const json& data;
double significance;
bool output;
bool fitted = false;
int nModels = 0;
int nDatasets = 0;
int controlIdx = 0;
std::map<int, WTL> wtl;
std::map<std::string, float> ranks;
int maxModelName = 0;
int maxDatasetName = 0;
FriedmanResult friedmanResult;
HolmResult holmResult;
std::map<std::string, std::map<std::string, float>> ranksModels;
};
}
#endif // !STATISTICS_H

View File

@@ -1,17 +0,0 @@
#ifndef SYMBOLS_H
#define SYMBOLS_H
#include <string>
namespace platform {
class Symbols {
public:
inline static const std::string check_mark{ "\u2714" };
inline static const std::string exclamation{ "\u2757" };
inline static const std::string black_star{ "\u2605" };
inline static const std::string cross{ "\u2717" };
inline static const std::string upward_arrow{ "\u27B6" };
inline static const std::string down_arrow{ "\u27B4" };
inline static const std::string equal_best{ check_mark };
inline static const std::string better_best{ black_star };
};
}
#endif // !SYMBOLS_H

View File

@@ -1,30 +0,0 @@
#ifndef UTILS_H
#define UTILS_H
#include <sstream>
#include <string>
#include <vector>
namespace platform {
//static std::vector<std::string> split(const std::string& text, char delimiter);
static std::vector<std::string> split(const std::string& text, char delimiter)
{
std::vector<std::string> result;
std::stringstream ss(text);
std::string token;
while (std::getline(ss, token, delimiter)) {
result.push_back(token);
}
return result;
}
static std::string trim(const std::string& str)
{
std::string result = str;
result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) {
return !std::isspace(ch);
}));
result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), result.end());
return result;
}
}
#endif

View File

@@ -1,86 +0,0 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "Paths.h"
#include "BestResults.h"
#include "Colors.h"
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
argparse::ArgumentParser program("best");
program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true);
program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const std::string& value) {
try {
auto k = std::stod(value);
if (k < 0.01 || k > 0.15) {
throw std::runtime_error("Significance level hast to be a number in [0.01, 0.15]");
}
return k;
}
catch (const std::runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an decimal number");
}});
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
std::string model, score;
bool build, report, friedman, excel;
double level;
try {
program.parse_args(argc, argv);
model = program.get<std::string>("model");
score = program.get<std::string>("score");
build = program.get<bool>("build");
report = program.get<bool>("report");
friedman = program.get<bool>("friedman");
excel = program.get<bool>("excel");
level = program.get<double>("level");
if (model == "" || score == "") {
throw std::runtime_error("Model and score name must be supplied");
}
if (friedman && model != "any") {
std::cerr << "Friedman test can only be used with all models" << std::endl;
std::cerr << program;
exit(1);
}
if (!report && !build) {
std::cerr << "Either build, report or both, have to be selected to do anything!" << std::endl;
std::cerr << program;
exit(1);
}
}
catch (const std::exception& err) {
std::cerr << err.what() << std::endl;
std::cerr << program;
exit(1);
}
// Generate report
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman, level);
if (build) {
if (model == "any") {
results.buildAll();
} else {
std::string fileName = results.build();
std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl;
}
}
if (report) {
if (model == "any") {
results.reportAll(excel);
} else {
results.reportSingle(excel);
}
}
return 0;
}

View File

@@ -1,56 +0,0 @@
#include <iostream>
#include <locale>
#include "Paths.h"
#include "Colors.h"
#include "Datasets.h"
const int BALANCE_LENGTH = 75;
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
std::string do_grouping() const { return "\03"; }
};
void outputBalance(const std::string& balance)
{
auto temp = std::string(balance);
while (temp.size() > BALANCE_LENGTH - 1) {
auto part = temp.substr(0, BALANCE_LENGTH);
std::cout << part << std::endl;
std::cout << setw(48) << " ";
temp = temp.substr(BALANCE_LENGTH);
}
std::cout << temp << std::endl;
}
int main(int argc, char** argv)
{
auto data = platform::Datasets(false, platform::Paths::datasets());
locale mylocale(std::cout.getloc(), new separated);
locale::global(mylocale);
std::cout.imbue(mylocale);
std::cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << std::endl;
std::string balanceBars = std::string(BALANCE_LENGTH, '=');
std::cout << "============================== ====== ===== === " << balanceBars << std::endl;
bool odd = true;
for (const auto& dataset : data.getNames()) {
auto color = odd ? Colors::CYAN() : Colors::BLUE();
std::cout << color << setw(30) << left << dataset << " ";
data.loadDataset(dataset);
auto nSamples = data.getNSamples(dataset);
std::cout << setw(6) << right << nSamples << " ";
std::cout << setw(5) << right << data.getFeatures(dataset).size() << " ";
std::cout << setw(3) << right << data.getNClasses(dataset) << " ";
std::stringstream oss;
std::string sep = "";
for (auto number : data.getClassesCounts(dataset)) {
oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
sep = " / ";
}
outputBalance(oss.str());
odd = !odd;
}
std::cout << Colors::RESET() << std::endl;
return 0;
}

View File

@@ -1,122 +0,0 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "Experiment.h"
#include "Datasets.h"
#include "DotEnv.h"
#include "Models.h"
#include "modelRegister.h"
#include "Paths.h"
using json = nlohmann::json;
argparse::ArgumentParser manageArguments()
{
auto env = platform::DotEnv();
argparse::ArgumentParser program("main");
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparamters passed to the model in Experiment");
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring());
}
);
program.add_argument("--title").default_value("").help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an integer");
}});
auto seed_values = env.getSeeds();
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
return program;
}
int main(int argc, char** argv)
{
std::string file_name, model_name, title;
json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet;
std::vector<int> seeds;
std::vector<std::string> filesToTest;
int n_folds;
auto program = manageArguments();
try {
program.parse_args(argc, argv);
file_name = program.get<std::string>("dataset");
model_name = program.get<std::string>("model");
discretize_dataset = program.get<bool>("discretize");
stratified = program.get<bool>("stratified");
quiet = program.get<bool>("quiet");
n_folds = program.get<int>("folds");
seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters);
title = program.get<std::string>("title");
if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided");
}
saveResults = program.get<bool>("save");
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets());
if (file_name != "") {
if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << std::endl;
exit(1);
}
if (title == "") {
title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds";
}
filesToTest.push_back(file_name);
} else {
filesToTest = datasets.getNames();
saveResults = true;
}
/*
* Begin Processing
*/
auto env = platform::DotEnv();
auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
experiment.setHyperparameters(hyperparameters_json);
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}
platform::Timer timer;
timer.start();
experiment.go(filesToTest, quiet);
experiment.setDuration(timer.getDuration());
if (saveResults) {
experiment.save(platform::Paths::results());
}
if (!quiet)
experiment.report();
std::cout << "Done!" << std::endl;
return 0;
}

View File

@@ -1,49 +0,0 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "ManageResults.h"
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
argparse::ArgumentParser program("manage");
program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>();
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true);
program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true);
program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true);
try {
program.parse_args(argc, argv);
auto number = program.get<int>("number");
if (number < 0) {
throw std::runtime_error("Number of results must be greater than or equal to 0");
}
auto model = program.get<std::string>("model");
auto score = program.get<std::string>("score");
auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
}
catch (const std::exception& err) {
std::cerr << err.what() << std::endl;
std::cerr << program;
exit(1);
}
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
int number = program.get<int>("number");
std::string model = program.get<std::string>("model");
std::string score = program.get<std::string>("score");
auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
if (complete)
partial = false;
auto manager = platform::ManageResults(number, model, score, complete, partial, compare);
manager.doMenu();
return 0;
}

View File

@@ -1,29 +0,0 @@
#ifndef MODEL_REGISTER_H
#define MODEL_REGISTER_H
static platform::Registrar registrarT("TAN",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
static platform::Registrar registrarTLD("TANLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
static platform::Registrar registrarS("SPODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
static platform::Registrar registrarSLD("SPODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
static platform::Registrar registrarK("KDB",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);});
static platform::Registrar registrarKLD("KDBLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
static platform::Registrar registrarA("AODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
static platform::Registrar registrarBA("BoostAODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
static platform::Registrar registrarSt("STree",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::STree();});
static platform::Registrar registrarOdte("Odte",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::ODTE();});
static platform::Registrar registrarSvc("SVC",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::SVC();});
static platform::Registrar registrarRaF("RandomForest",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();});
#endif

View File

@@ -1,9 +0,0 @@
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${Python3_INCLUDE_DIRS})
include_directories(${TORCH_INCLUDE_DIRS})
add_library(PyWrap SHARED PyWrap.cc STree.cc ODTE.cc SVC.cc RandomForest.cc PyClassifier.cc)
#target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy xgboost::xgboost ArffFiles)
target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles)

View File

@@ -1,15 +0,0 @@
#include "ODTE.h"
namespace pywrap {
std::string ODTE::graph()
{
return callMethodString("graph");
}
void ODTE::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "n_jobs", "n_estimators", "random_state" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */

View File

@@ -1,15 +0,0 @@
#ifndef ODTE_H
#define ODTE_H
#include "nlohmann/json.hpp"
#include "PyClassifier.h"
namespace pywrap {
class ODTE : public PyClassifier {
public:
ODTE() : PyClassifier("odte", "Odte") {};
~ODTE() = default;
std::string graph();
void setHyperparameters(nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* ODTE_H */

View File

@@ -1,99 +0,0 @@
#include "PyClassifier.h"
namespace pywrap {
namespace bp = boost::python;
namespace np = boost::python::numpy;
PyClassifier::PyClassifier(const std::string& module, const std::string& className, bool sklearn) : module(module), className(className), sklearn(sklearn), fitted(false)
{
// This id allows to have more than one instance of the same module/class
id = reinterpret_cast<clfId_t>(this);
pyWrap = PyWrap::GetInstance();
pyWrap->importClass(id, module, className);
}
PyClassifier::~PyClassifier()
{
pyWrap->clean(id);
}
np::ndarray tensor2numpy(torch::Tensor& X)
{
int m = X.size(0);
int n = X.size(1);
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<float>(), bp::make_tuple(m, n), bp::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2), bp::object());
Xn = Xn.transpose();
return Xn;
}
std::pair<np::ndarray, np::ndarray> tensors2numpy(torch::Tensor& X, torch::Tensor& y)
{
int n = X.size(1);
auto yn = np::from_data(y.data_ptr(), np::dtype::get_builtin<int32_t>(), bp::make_tuple(n), bp::make_tuple(sizeof(y.dtype()) * 2), bp::object());
return { tensor2numpy(X), yn };
}
std::string PyClassifier::version()
{
if (sklearn) {
return pyWrap->sklearnVersion();
}
return pyWrap->version(id);
}
std::string PyClassifier::callMethodString(const std::string& method)
{
return pyWrap->callMethodString(id, method);
}
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y)
{
if (!fitted && hyperparameters.size() > 0) {
pyWrap->setHyperparameters(id, hyperparameters);
}
auto [Xn, yn] = tensors2numpy(X, y);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
CPyObject yp = bp::incref(bp::object(yn).ptr());
pyWrap->fit(id, Xp, yp);
fitted = true;
return *this;
}
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
return fit(X, y);
}
torch::Tensor PyClassifier::predict(torch::Tensor& X)
{
int dimension = X.size(1);
auto Xn = tensor2numpy(X);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
PyObject* incoming = pyWrap->predict(id, Xp);
bp::handle<> handle(incoming);
bp::object object(handle);
np::ndarray prediction = np::from_object(object);
if (PyErr_Occurred()) {
PyErr_Print();
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
}
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
}
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{
auto [Xn, yn] = tensors2numpy(X, y);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
CPyObject yp = bp::incref(bp::object(yn).ptr());
float result = pyWrap->score(id, Xp, yp);
return result;
}
void PyClassifier::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const std::vector<std::string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
void PyClassifier::checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
} /* namespace pywrap */

View File

@@ -1,55 +0,0 @@
#ifndef PYCLASSIFIER_H
#define PYCLASSIFIER_H
#include "boost/python/detail/wrap_python.hpp"
#include <boost/python/numpy.hpp>
#include <nlohmann/json.hpp>
#include <string>
#include <map>
#include <vector>
#include <utility>
#include <torch/torch.h>
#include "PyWrap.h"
#include "Classifier.h"
#include "TypeId.h"
namespace pywrap {
class PyClassifier : public bayesnet::BaseClassifier {
public:
PyClassifier(const std::string& module, const std::string& className, const bool sklearn = false);
virtual ~PyClassifier();
PyClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override { return *this; };
// X is nxm tensor, y is nx1 tensor
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y);
PyClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override { return *this; };
PyClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override { return *this; };
torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int >>& X) override { return std::vector<int>(); };
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override { return 0.0; };
float score(torch::Tensor& X, torch::Tensor& y) override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
std::string version();
std::string callMethodString(const std::string& method);
std::string getVersion() override { return this->version(); };
int getNumberOfNodes()const override { return 0; };
int getNumberOfEdges()const override { return 0; };
int getNumberOfStates() const override { return 0; };
std::vector<std::string> show() const override { return std::vector<std::string>(); }
std::vector<std::string> graph(const std::string& title = "") const override { return std::vector<std::string>(); }
bayesnet::status_t getStatus() const override { return bayesnet::NORMAL; };
std::vector<std::string> topological_order() override { return std::vector<std::string>(); }
void dump_cpt() const override {};
protected:
void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters);
nlohmann::json hyperparameters;
void trainModel(const torch::Tensor& weights) override {};
private:
PyWrap* pyWrap;
std::string module;
std::string className;
bool sklearn;
clfId_t id;
bool fitted;
};
} /* namespace pywrap */
#endif /* PYCLASSIFIER_H */

View File

@@ -1,15 +0,0 @@
#ifndef PYCLF_H
#define PYCLF_H
#include <string>
#include "DotEnv.h"
namespace PyClassifiers {
class PyClf {
public:
PyClf(const std::string& name);
virtual ~PyClf();
private:
std::string name;
};
} /* namespace PyClassifiers */
#endif /* PYCLF_H */

View File

@@ -1,87 +0,0 @@
#ifndef PYHELPER_HPP
#define PYHELPER_HPP
#pragma once
// Code taken and adapted from
// https ://www.codeproject.com/Articles/820116/Embedding-Python-program-in-a-C-Cplusplus-code
#include "boost/python/detail/wrap_python.hpp"
#include <boost/python/numpy.hpp>
#include <iostream>
namespace pywrap {
namespace p = boost::python;
namespace np = boost::python::numpy;
class CPyInstance {
public:
CPyInstance()
{
Py_Initialize();
np::initialize();
}
~CPyInstance()
{
Py_Finalize();
}
};
class CPyObject {
private:
PyObject* p;
public:
CPyObject() : p(NULL)
{
}
CPyObject(PyObject* _p) : p(_p)
{
}
~CPyObject()
{
Release();
}
PyObject* getObject()
{
return p;
}
PyObject* setObject(PyObject* _p)
{
return (p = _p);
}
PyObject* AddRef()
{
if (p) {
Py_INCREF(p);
}
return p;
}
void Release()
{
if (p) {
Py_XDECREF(p);
}
p = NULL;
}
PyObject* operator ->()
{
return p;
}
bool is()
{
return p ? true : false;
}
operator PyObject* ()
{
return p;
}
PyObject* operator = (PyObject* pp)
{
p = pp;
return p;
}
operator bool()
{
return p ? true : false;
}
};
} /* namespace pywrap */
#endif

View File

@@ -1,192 +0,0 @@
#define PY_SSIZE_T_CLEAN
#include <stdexcept>
#include "PyWrap.h"
#include <string>
#include <map>
#include <sstream>
#include <boost/python/numpy.hpp>
#include <iostream>
namespace pywrap {
namespace np = boost::python::numpy;
PyWrap* PyWrap::wrapper = nullptr;
std::mutex PyWrap::mutex;
CPyInstance* PyWrap::pyInstance = nullptr;
auto moduleClassMap = std::map<std::pair<std::string, std::string>, std::tuple<PyObject*, PyObject*, PyObject*>>();
PyWrap* PyWrap::GetInstance()
{
std::lock_guard<std::mutex> lock(mutex);
if (wrapper == nullptr) {
wrapper = new PyWrap();
pyInstance = new CPyInstance();
PyRun_SimpleString("import warnings;warnings.filterwarnings('ignore')");
}
return wrapper;
}
void PyWrap::RemoveInstance()
{
if (wrapper != nullptr) {
if (pyInstance != nullptr) {
delete pyInstance;
}
pyInstance = nullptr;
if (wrapper != nullptr) {
delete wrapper;
}
wrapper = nullptr;
}
}
void PyWrap::importClass(const clfId_t id, const std::string& moduleName, const std::string& className)
{
std::lock_guard<std::mutex> lock(mutex);
auto result = moduleClassMap.find(id);
if (result != moduleClassMap.end()) {
return;
}
PyObject* module = PyImport_ImportModule(moduleName.c_str());
if (PyErr_Occurred()) {
errorAbort("Couldn't import module " + moduleName);
}
PyObject* classObject = PyObject_GetAttrString(module, className.c_str());
if (PyErr_Occurred()) {
errorAbort("Couldn't find class " + className);
}
PyObject* instance = PyObject_CallObject(classObject, NULL);
if (PyErr_Occurred()) {
errorAbort("Couldn't create instance of class " + className);
}
moduleClassMap.insert({ id, { module, classObject, instance } });
}
void PyWrap::clean(const clfId_t id)
{
// Remove Python interpreter if no more modules imported left
std::lock_guard<std::mutex> lock(mutex);
auto result = moduleClassMap.find(id);
if (result == moduleClassMap.end()) {
return;
}
Py_DECREF(std::get<0>(result->second));
Py_DECREF(std::get<1>(result->second));
Py_DECREF(std::get<2>(result->second));
moduleClassMap.erase(result);
if (PyErr_Occurred()) {
PyErr_Print();
errorAbort("Error cleaning module ");
}
// With boost you can't remove the interpreter
// https://www.boost.org/doc/libs/1_83_0/libs/python/doc/html/tutorial/tutorial/embedding.html#tutorial.embedding.getting_started
// if (moduleClassMap.empty()) {
// RemoveInstance();
// }
}
void PyWrap::errorAbort(const std::string& message)
{
std::cerr << message << std::endl;
PyErr_Print();
RemoveInstance();
exit(1);
}
PyObject* PyWrap::getClass(const clfId_t id)
{
auto item = moduleClassMap.find(id);
if (item == moduleClassMap.end()) {
errorAbort("Module not found");
}
return std::get<2>(item->second);
}
std::string PyWrap::callMethodString(const clfId_t id, const std::string& method)
{
PyObject* instance = getClass(id);
PyObject* result;
try {
if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL)))
errorAbort("Couldn't call method " + method);
}
catch (const std::exception& e) {
errorAbort(e.what());
}
std::string value = PyUnicode_AsUTF8(result);
Py_XDECREF(result);
return value;
}
std::string PyWrap::sklearnVersion()
{
return "1.0";
// CPyObject data = PyRun_SimpleString("import sklearn;return sklearn.__version__");
// std::string result = PyUnicode_AsUTF8(data);
// return result;
}
std::string PyWrap::version(const clfId_t id)
{
return callMethodString(id, "version");
}
void PyWrap::setHyperparameters(const clfId_t id, const json& hyperparameters)
{
// Set hyperparameters as attributes of the class
PyObject* pValue;
PyObject* instance = getClass(id);
for (const auto& [key, value] : hyperparameters.items()) {
std::stringstream oss;
oss << value.type_name();
if (oss.str() == "string") {
pValue = Py_BuildValue("s", value.get<std::string>().c_str());
} else {
if (value.is_number_integer()) {
pValue = Py_BuildValue("i", value.get<int>());
} else {
pValue = Py_BuildValue("f", value.get<double>());
}
}
int res = PyObject_SetAttrString(instance, key.c_str(), pValue);
if (res == -1 && PyErr_Occurred()) {
Py_XDECREF(pValue);
errorAbort("Couldn't set attribute " + key + "=" + value.dump());
}
Py_XDECREF(pValue);
}
}
void PyWrap::fit(const clfId_t id, CPyObject& X, CPyObject& y)
{
PyObject* instance = getClass(id);
CPyObject result;
CPyObject method = PyUnicode_FromString("fit");
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL)))
errorAbort("Couldn't call method fit");
}
catch (const std::exception& e) {
errorAbort(e.what());
}
}
PyObject* PyWrap::predict(const clfId_t id, CPyObject& X)
{
PyObject* instance = getClass(id);
PyObject* result;
CPyObject method = PyUnicode_FromString("predict");
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), NULL)))
errorAbort("Couldn't call method predict");
}
catch (const std::exception& e) {
errorAbort(e.what());
}
Py_INCREF(result);
return result; // Caller must free this object
}
double PyWrap::score(const clfId_t id, CPyObject& X, CPyObject& y)
{
PyObject* instance = getClass(id);
CPyObject result;
CPyObject method = PyUnicode_FromString("score");
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL)))
errorAbort("Couldn't call method score");
}
catch (const std::exception& e) {
errorAbort(e.what());
}
double resultValue = PyFloat_AsDouble(result);
return resultValue;
}
}

View File

@@ -1,47 +0,0 @@
#ifndef PYWRAP_H
#define PYWRAP_H
#include "boost/python/detail/wrap_python.hpp"
#include <string>
#include <map>
#include <tuple>
#include <mutex>
#include <nlohmann/json.hpp>
#include "PyHelper.hpp"
#include "TypeId.h"
#pragma once
namespace pywrap {
/*
Singleton class to handle Python/numpy interpreter.
*/
using json = nlohmann::json;
class PyWrap {
public:
PyWrap() = default;
PyWrap(PyWrap& other) = delete;
static PyWrap* GetInstance();
void operator=(const PyWrap&) = delete;
~PyWrap() = default;
std::string callMethodString(const clfId_t id, const std::string& method);
std::string sklearnVersion();
std::string version(const clfId_t id);
void setHyperparameters(const clfId_t id, const json& hyperparameters);
void fit(const clfId_t id, CPyObject& X, CPyObject& y);
PyObject* predict(const clfId_t id, CPyObject& X);
double score(const clfId_t id, CPyObject& X, CPyObject& y);
void clean(const clfId_t id);
void importClass(const clfId_t id, const std::string& moduleName, const std::string& className);
PyObject* getClass(const clfId_t id);
private:
// Only call RemoveInstance from clean method
static void RemoveInstance();
void errorAbort(const std::string& message);
// No need to use static map here, since this class is a singleton
std::map<clfId_t, std::tuple<PyObject*, PyObject*, PyObject*>> moduleClassMap;
static CPyInstance* pyInstance;
static PyWrap* wrapper;
static std::mutex mutex;
};
} /* namespace pywrap */
#endif /* PYWRAP_H */

View File

@@ -1,18 +0,0 @@
#include "PyClf.h"
namespace PyClassifiers {
PyClf::PyClf(const std::std::string& name) : name(name)
{
env = platform::DotEnv();
}
PyClf::~PyClf()
{
}
} /* namespace PyClassifiers */

View File

@@ -1,11 +0,0 @@
#include "RandomForest.h"
namespace pywrap {
void RandomForest::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "n_estimators", "n_jobs", "random_state" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */

View File

@@ -1,13 +0,0 @@
#ifndef RANDOMFOREST_H
#define RANDOMFOREST_H
#include "PyClassifier.h"
namespace pywrap {
class RandomForest : public PyClassifier {
public:
RandomForest() : PyClassifier("sklearn.ensemble", "RandomForestClassifier", true) {};
~RandomForest() = default;
void setHyperparameters(nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* RANDOMFOREST_H */

View File

@@ -1,15 +0,0 @@
#include "STree.h"
namespace pywrap {
std::string STree::graph()
{
return callMethodString("graph");
}
void STree::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "C", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */

View File

@@ -1,15 +0,0 @@
#ifndef STREE_H
#define STREE_H
#include "nlohmann/json.hpp"
#include "PyClassifier.h"
namespace pywrap {
class STree : public PyClassifier {
public:
STree() : PyClassifier("stree", "Stree") {};
~STree() = default;
std::string graph();
void setHyperparameters(nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* STREE_H */

View File

@@ -1,11 +0,0 @@
#include "SVC.h"
namespace pywrap {
void SVC::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "C", "gamma", "kernel", "random_state" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */

View File

@@ -1,14 +0,0 @@
#ifndef SVC_H
#define SVC_H
#include "PyClassifier.h"
namespace pywrap {
class SVC : public PyClassifier {
public:
SVC() : PyClassifier("sklearn.svm", "SVC", true) {};
~SVC() = default;
void setHyperparameters(nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* STREE_H */

View File

@@ -1,6 +0,0 @@
#ifndef TYPEDEF_H
#define TYPEDEF_H
namespace pywrap {
typedef uint64_t clfId_t;
}
#endif /* TYPEDEF_H */

View File

@@ -1,18 +0,0 @@
#include "XGBoost.h"
See https ://stackoverflow.com/questions/36071672/using-xgboost-in-c
namespace pywrap {
std::string XGBoost::version()
{
return callMethodString("1.0");
}
} /* namespace pywrap */

View File

@@ -1,13 +0,0 @@
#ifndef XGBOOST_H
#define XGBOOST_H
#include "PyClassifier.h"
namespace pywrap {
class XGBoost : public PyClassifier {
public:
XGBoost() : PyClassifier("xgboost", "XGBClassifier") {};
~XGBoost() = default;
std::string version();
};
} /* namespace pywrap */
#endif /* XGBOOST_H */

View File

@@ -1,18 +1,16 @@
if(ENABLE_TESTING)
set(TEST_BAYESNET "unit_tests_bayesnet")
set(TEST_PLATFORM "unit_tests_platform")
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCES})
set(TEST_SOURCES_PLATFORM TestFolding.cc TestUtils.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc)
include_directories(
${BayesNet_SOURCE_DIR}/src/BayesNet
${BayesNet_SOURCE_DIR}/src/Platform
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include
${CMAKE_BINARY_DIR}/configured_files/include
)
set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES})
add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET})
add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM})
target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)
target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)
add_test(NAME ${TEST_BAYESNET} COMMAND ${TEST_BAYESNET})
add_test(NAME ${TEST_PLATFORM} COMMAND ${TEST_PLATFORM})
endif(ENABLE_TESTING)

View File

@@ -2,9 +2,9 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <std::vector>
#include <vector>
#include <map>
#include <std::string>
#include <string>
#include "KDB.h"
#include "TAN.h"
#include "SPODE.h"
@@ -16,6 +16,11 @@
#include "AODELd.h"
#include "TestUtils.h"
TEST_CASE("Library check version", "[BayesNet]")
{
auto clf = bayesnet::KDB(2);
REQUIRE(clf.getVersion() == "1.0.1");
}
TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
{
map <pair<std::string, std::string>, float> scores = {
@@ -126,7 +131,7 @@ TEST_CASE("Models features", "[BayesNet]")
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::TAN();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 6);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
REQUIRE(clf.graph("Test") == graph);
@@ -136,6 +141,36 @@ TEST_CASE("Get num features & num edges", "[BayesNet]")
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::KDB(2);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 6);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8);
}
}
TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "CFS"} });
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("BoostAODE test used features in train note", "[BayesNet]")
{
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({
{"ascending",true},
{"convergence", true},
{"repeatSparent",true},
{"select_features","CFS"}
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 72);
REQUIRE(clf.getNumberOfEdges() == 120);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8");
REQUIRE(clf.getNotes()[2] == "Number of models: 8");
}

View File

@@ -1,11 +1,11 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <std::string>
#include <string>
#include "TestUtils.h"
#include "Network.h"
void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::std::string& className)
void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::string& className)
{
std::vector<pair<int, int>> network = { {0, 1}, {0, 2}, {1, 3} };
for (const auto& feature : features) {
@@ -25,6 +25,7 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
auto raw = RawDatasets("iris", true);
auto net = bayesnet::Network();
double threshold = 1e-4;
SECTION("Test get features")
{
@@ -167,97 +168,44 @@ TEST_CASE("Test Bayesian Network", "[BayesNet]")
REQUIRE(str[5] == "C [shape=circle] \n");
REQUIRE(str[6] == "}\n");
}
// SECTION("Test predict")
// {
// auto net = bayesnet::Network();
// net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
// std::vector<std::vector<int>> test = { {1, 2, 0, 1}, {0, 1, 2, 0}, {1, 1, 1, 1}, {0, 0, 0, 0}, {2, 2, 2, 2} };
// std::vector<int> y_test = { 0, 1, 1, 0, 2 };
// auto y_pred = net.predict(test);
// REQUIRE(y_pred == y_test);
// }
// SECTION("Test predict_proba")
// {
// auto net = bayesnet::Network();
// net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
// std::vector<std::vector<int>> test = { {1, 2, 0, 1}, {0, 1, 2, 0}, {1, 1, 1, 1}, {0, 0, 0, 0}, {2, 2, 2, 2} };
// auto y_test = { 0, 1, 1, 0, 2 };
// auto y_pred = net.predict(test);
// REQUIRE(y_pred == y_test);
// }
}
// SECTION("Test score")
// {
// auto net = bayesnet::Network();
// net.fit(Xd, y, weights, features, className, states);
// auto test = { {1, 2, 0, 1}, {0, 1, 2, 0}, {1, 1, 1, 1}, {0, 0, 0, 0}, {2, 2, 2, 2} };
// auto score = net.score(X, y);
// REQUIRE(score == Catch::Approx();
// }
//
//
// SECTION("Test graph")
// {
// auto net = bayesnet::Network();
// net.addNode("A");
// net.addNode("B");
// net.addNode("C");
// net.addEdge("A", "B");
// net.addEdge("A", "C");
// auto str = net.graph("Test Graph");
// REQUIRE(str.size() == 6);
// REQUIRE(str[0] == "digraph \"Test Graph\" {");
// REQUIRE(str[1] == " A -> B;");
// REQUIRE(str[2] == " A -> C;");
// REQUIRE(str[3] == " B [shape=ellipse];");
// REQUIRE(str[4] == " C [shape=ellipse];");
// REQUIRE(str[5] == "}");
// }
// SECTION("Test initialize")
// {
// auto net = bayesnet::Network();
// net.addNode("A");
// net.addNode("B");
// net.addNode("C");
// net.addEdge("A", "B");
// net.addEdge("A", "C");
// net.initialize();
// REQUIRE(net.getNodes().size() == 0);
// REQUIRE(net.getEdges().size() == 0);
// REQUIRE(net.getFeatures().size() == 0);
// REQUIRE(net.getClassNumStates() == 0);
// REQUIRE(net.getClassName().empty());
// REQUIRE(net.getStates() == 0);
// REQUIRE(net.getSamples().numel() == 0);
// }
// SECTION("Test dump_cpt")
// {
// auto net = bayesnet::Network();
// net.addNode("A");
// net.addNode("B");
// net.addNode("C");
// net.addEdge("A", "B");
// net.addEdge("A", "C");
// net.setClassName("C");
// net.setStates({ {"A", {0, 1}}, {"B", {0, 1}}, {"C", {0, 1, 2}} });
// net.fit({ {0, 0}, {0, 1}, {1, 0}, {1, 1} }, { 0, 1, 1, 2 }, {}, { "A", "B" }, "C", { {"A", {0, 1}}, {"B", {0, 1}}, {"C", {0, 1, 2}} });
// net.dump_cpt();
// // TODO: Check that the file was created and contains the expected data
// }
// SECTION("Test version")
// {
// auto net = bayesnet::Network();
// REQUIRE(net.version() == "0.2.0");
// }
// }
// }
SECTION("Test predict")
{
auto net = bayesnet::Network();
buildModel(net, raw.featuresv, raw.classNamev);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
std::vector<int> y_test = { 2, 2, 0, 2, 1 };
auto y_pred = net.predict(test);
REQUIRE(y_pred == y_test);
}
SECTION("Test predict_proba")
{
auto net = bayesnet::Network();
buildModel(net, raw.featuresv, raw.classNamev);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
std::vector<std::vector<double>> y_test = {
{0.450237, 0.0866621, 0.463101},
{0.244443, 0.0925922, 0.662964},
{0.913441, 0.0125857, 0.0739732},
{0.450237, 0.0866621, 0.463101},
{0.0135226, 0.971726, 0.0147519}
};
auto y_pred = net.predict_proba(test);
REQUIRE(y_pred.size() == 5);
REQUIRE(y_pred[0].size() == 3);
for (int i = 0; i < y_pred.size(); ++i) {
for (int j = 0; j < y_pred[i].size(); ++j) {
REQUIRE(y_pred[i][j] == Catch::Approx(y_test[i][j]).margin(threshold));
}
}
}
SECTION("Test score")
{
auto net = bayesnet::Network();
buildModel(net, raw.featuresv, raw.classNamev);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = net.score(raw.Xv, raw.yv);
REQUIRE(score == Catch::Approx(0.97333333).margin(threshold));
}
}

View File

@@ -1,95 +0,0 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "TestUtils.h"
#include "Folding.h"
TEST_CASE("KFold Test", "[Platform][KFold]")
{
// Initialize a KFold object with k=5 and a seed of 19.
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
auto raw = RawDatasets(file_name, true);
int nFolds = 5;
platform::KFold kfold(nFolds, raw.nSamples, 19);
int number = raw.nSamples * (kfold.getNumberOfFolds() - 1) / kfold.getNumberOfFolds();
SECTION("Number of Folds")
{
REQUIRE(kfold.getNumberOfFolds() == nFolds);
}
SECTION("Fold Test")
{
// Test each fold's size and contents.
for (int i = 0; i < nFolds; ++i) {
auto [train_indices, test_indices] = kfold.getFold(i);
bool result = train_indices.size() == number || train_indices.size() == number + 1;
REQUIRE(result);
REQUIRE(train_indices.size() + test_indices.size() == raw.nSamples);
}
}
}
map<int, int> counts(std::vector<int> y, std::vector<int> indices)
{
map<int, int> result;
for (auto i = 0; i < indices.size(); ++i) {
result[y[indices[i]]]++;
}
return result;
}
TEST_CASE("StratifiedKFold Test", "[Platform][StratifiedKFold]")
{
// Initialize a StratifiedKFold object with k=3, using the y std::vector, and a seed of 17.
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
int nFolds = GENERATE(3, 5, 10);
auto raw = RawDatasets(file_name, true);
platform::StratifiedKFold stratified_kfoldt(nFolds, raw.yt, 17);
platform::StratifiedKFold stratified_kfoldv(nFolds, raw.yv, 17);
int number = raw.nSamples * (stratified_kfoldt.getNumberOfFolds() - 1) / stratified_kfoldt.getNumberOfFolds();
SECTION("Stratified Number of Folds")
{
REQUIRE(stratified_kfoldt.getNumberOfFolds() == nFolds);
}
SECTION("Stratified Fold Test")
{
// Test each fold's size and contents.
auto counts = map<int, std::vector<int>>();
// Initialize the counts per Fold
for (int i = 0; i < nFolds; ++i) {
counts[i] = std::vector<int>(raw.classNumStates, 0);
}
// Check fold and compute counts of each fold
for (int fold = 0; fold < nFolds; ++fold) {
auto [train_indicest, test_indicest] = stratified_kfoldt.getFold(fold);
auto [train_indicesv, test_indicesv] = stratified_kfoldv.getFold(fold);
REQUIRE(train_indicest == train_indicesv);
REQUIRE(test_indicest == test_indicesv);
// In the worst case scenario, the number of samples in the training set is number + raw.classNumStates
// because in that fold can come one remainder sample from each class.
REQUIRE(train_indicest.size() <= number + raw.classNumStates);
// If the number of samples in any class is less than the number of folds, then the fold is faulty.
// and the number of samples in the training set + test set will be less than nSamples
if (!stratified_kfoldt.isFaulty()) {
REQUIRE(train_indicest.size() + test_indicest.size() == raw.nSamples);
} else {
REQUIRE(train_indicest.size() + test_indicest.size() <= raw.nSamples);
}
auto train_t = torch::tensor(train_indicest);
auto ytrain = raw.yt.index({ train_t });
// Check that the class labels have been equally assign to each fold
for (const auto& idx : train_indicest) {
counts[fold][raw.yt[idx].item<int>()]++;
}
}
// Test the fold counting of every class
for (int fold = 0; fold < nFolds; ++fold) {
for (int j = 1; j < nFolds - 1; ++j) {
for (int k = 0; k < raw.classNumStates; ++k) {
REQUIRE(abs(counts.at(fold).at(k) - counts.at(j).at(k)) <= 1);
}
}
}
}
}

Some files were not shown because too many files have changed in this diff Show More