Compare commits

...

173 Commits

Author SHA1 Message Date
65a96851ef Check min number of nested folds 2024-01-04 11:01:59 +01:00
722da7f781 Keep only mpi b_grid compute 2024-01-04 01:21:56 +01:00
b1833a5feb Add reset color to final progress bar 2024-01-03 22:45:16 +01:00
41a0bd4ddd fix dataset name mistakes 2024-01-03 17:15:57 +01:00
9ab4fc7d76 Fix some mistakes in methods 2024-01-03 11:53:46 +01:00
beadb7465f Complete first approach 2023-12-31 12:02:13 +01:00
652e5f623f Add todo comments 2023-12-28 23:32:24 +01:00
b7fef9a99d Remove kk file 2023-12-28 23:24:59 +01:00
343269d48c Fix syntax errors 2023-12-28 23:21:50 +01:00
21c4c6df51 Fix first mistakes in structure 2023-12-25 19:33:52 +01:00
702f086706 Update miniconda instructions 2023-12-23 19:54:00 +01:00
981bc8f98b Fix install message in readme 2023-12-23 01:00:55 +01:00
e0b7b2d316 Set structure & protocol of producer-consumer 2023-12-22 12:47:13 +01:00
9b9e91e856 Merge pull request 'mpi_grid' (#14) from mpi_grid into main
Reviewed-on: #14
2023-12-18 09:05:55 +00:00
18e8e84284 Add openmpi instructions for Oracle Linux 2023-12-17 12:19:50 +01:00
7de11b0e6d Fix format of duration 2023-12-17 01:45:04 +01:00
9b8db37a4b Fix duration of task not set 2023-12-16 19:31:45 +01:00
49b26bd04b fix duration output 2023-12-16 12:53:25 +01:00
b5b5b48864 Update grid progress bar output 2023-12-15 18:09:17 +01:00
19586a3a5a Fix pesky error allocating memory in workers 2023-12-15 01:54:13 +01:00
ffe6d37436 Add messages to control trace 2023-12-14 21:06:43 +01:00
b73f4be146 First try with complete algorithm 2023-12-14 15:55:08 +01:00
dbf2f35502 First compiling version 2023-12-12 18:57:57 +01:00
db9e80a70e Create build tasks 2023-12-12 12:15:22 +01:00
40ae4ad7f9 Include mpi in CMakeLists 2023-12-11 09:06:05 +01:00
234342f2de Add mpi parameter to b_grid 2023-12-10 22:33:17 +01:00
aa0936abd1 Add --exclude parameter to b_grid to exclude datasets 2023-12-08 12:09:08 +01:00
f0d6f0cc38 Fix sample building 2023-12-04 19:12:44 +01:00
cc316bb8d3 Add colors to results of gridsearch 2023-12-04 17:34:00 +01:00
0723564e66 Fix some output in gridsearch 2023-12-03 17:55:44 +01:00
2e95e8999d Complete nested gridsearch 2023-12-03 12:37:25 +01:00
fb9b395748 Begin output nested grid 2023-12-02 13:19:12 +01:00
03e4437fea refactor gridsearch to have only one go method 2023-12-02 10:59:05 +01:00
33cd32c639 Add header to grid output and report 2023-12-01 10:30:53 +01:00
c460ef46ed Refactor gridsearch method 2023-11-30 11:01:37 +01:00
dee9c674da Refactor grid input hyperparameter file 2023-11-29 18:24:34 +01:00
e3f6dc1e0b Fix tolerance hyperp error & gridsearch 2023-11-29 12:33:50 +01:00
460d20a402 Add reports to gridsearch 2023-11-29 00:26:48 +01:00
8dbbb65a2f Add only parameter to gridsearch 2023-11-28 10:08:40 +01:00
d06bf187b2 Implement Random Forest nodes/leaves/depth 2023-11-28 00:35:38 +01:00
4addaefb47 Implement sklearn version in PyWrap 2023-11-27 22:34:34 +01:00
82964190f6 Add nodes/leaves/depth to STree & ODTE 2023-11-27 10:57:57 +01:00
4fefe9a1d2 Add grid input info to grid output 2023-11-26 16:07:32 +01:00
7c12dd25e5 Fix upper case typo 2023-11-26 10:55:32 +01:00
c713c0b1df Add continue from parameter to gridsearch 2023-11-26 10:36:09 +01:00
64069a6cb7 Adapt b_main to the new hyperparam file format 2023-11-25 16:52:25 +01:00
ba2a3f9523 Merge pull request 'gridsearch' (#13) from gridsearch into main
Reviewed-on: #13
2023-11-25 11:16:13 +00:00
f94e2d6a27 Add quiet parameter 2023-11-24 21:16:20 +01:00
2121ba9b98 Refactor input grid parameters to json file 2023-11-24 09:57:29 +01:00
8b7b59d42b Complete first step 2023-11-23 12:59:21 +01:00
bbe5302ab1 Add info to output 2023-11-22 16:38:50 +01:00
c2eb727fc7 Complete output interface of gridsearch 2023-11-22 16:30:04 +01:00
fb347ed5b9 Begin gridsearch implementation 2023-11-22 12:22:30 +01:00
b657762c0c Generate combinations sample 2023-11-22 00:18:24 +01:00
495d8a8528 Begin implementing grid combinations 2023-11-21 13:11:14 +01:00
4628e48d3c Build gridsearch structure 2023-11-20 23:32:34 +01:00
5876be4b24 Add more install instructions of Boost to README 2023-11-20 20:39:22 +01:00
dc3400197f Add coment todo impelemt number of nodes 2023-11-20 01:14:13 +01:00
26d3a57782 Add info to invalid hyperparameter exception 2023-11-19 23:02:28 +01:00
4f3a04058f Refactor Hyperparameters management 2023-11-19 22:36:27 +01:00
89c4613591 Implement hyperparameters with json file 2023-11-18 11:56:10 +01:00
28f3d87e32 Add Python Classifiers
Add STree, Odte, SVC & RandomForest Classifiers
Remove using namespace ... in project
2023-11-17 11:11:05 +01:00
e8d2c9fc0b Set intolerant convergence 2023-11-17 10:26:25 +01:00
d3cb580387 Remove n_jobs from STree 2023-11-17 10:10:31 +01:00
f088df14fd Restore the Creation model position in experiment 2023-11-17 01:10:46 +01:00
e2249eace7 Disable Warning messages in python clfs
Disable removing Python env
2023-11-16 22:38:46 +01:00
64f5a7f14a Fix header in example 2023-11-16 17:03:40 +01:00
408db2aad5 Mark override fit funtcion 2023-11-14 18:59:41 +01:00
e03efb5f63 set tolerance=0 if feature selection in BoostAODE 2023-11-14 10:12:02 +01:00
f617886133 Add new models to example 2023-11-14 09:12:25 +01:00
69ad660040 Refactor version method in PyClassifier 2023-11-13 13:59:06 +01:00
431b3a3aa5 Fit PyWrap into BayesNet 2023-11-13 11:13:32 +01:00
6a23e2cc26 Add CMakelist integration 2023-11-12 22:14:29 +01:00
f6e00530be Add Pywrap sources 2023-11-12 21:43:07 +01:00
f9258e43b9 Remove using namespace from Library 2023-11-08 18:45:35 +01:00
92820555da Simple fix 2023-10-28 10:56:47 +02:00
5a3af51826 Activate best score in odte 2023-10-25 10:23:42 +02:00
a8f9800631 Fix mistake when no results in manage 2023-10-24 19:44:23 +02:00
84cec0c1e0 Add results files affected in best results excel 2023-10-24 16:18:52 +02:00
130139f644 Update formulas to use letters in ranges in excel 2023-10-24 13:06:31 +02:00
651f84b562 Fix mistake in conditional format in bestresults 2023-10-24 11:18:19 +02:00
553ab0fa22 Add conditional format to BestResults Excel 2023-10-24 10:56:41 +02:00
4975feabff Fix mistake in node count 2023-10-23 22:46:10 +02:00
32293af69f Fix header in manage 2023-10-23 17:04:59 +02:00
858664be2d Add total number of results in manage 2023-10-23 16:22:15 +02:00
1f705f6018 Refactor BestScore and add experiment to .env 2023-10-23 16:12:52 +02:00
7bcd2eed06 Add variable width of dataset name in reports 2023-10-22 22:58:52 +02:00
833acefbb3 Fix index limits mistake in manage 2023-10-22 20:21:50 +02:00
26b649ebae Refactor ManageResults and CommandParser 2023-10-22 20:03:34 +02:00
080eddf9cd Fix hyperparameters output in b_best 2023-10-20 22:52:48 +02:00
04e754b2f5 Adjust filename and hyperparameters in reports 2023-10-20 11:12:46 +02:00
38423048bd Add excel to best report of model 2023-10-19 18:12:55 +02:00
64fc97b892 Rename utilities sources to match final names 2023-10-19 09:57:04 +02:00
2c2159f192 Add quiet mode to b_main
Reduce output when --quiet is set, not showing fold info
2023-10-17 21:51:53 +02:00
6765552a7c Update submodule versions 2023-10-16 19:21:57 +02:00
f72aa5b9a6 Merge pull request 'Create Boost_CFS' (#11) from Boost_CFS into main
Add hyper parameter to BoostAODE. This hyper parameter decides if we select features with cfs/fcbf/iwss before start building models and build a Spode with the selected features.
The hyperparameter is select_features
2023-10-15 09:22:14 +00:00
fa7fe081ad Fix xlsx library finding 2023-10-15 11:19:58 +02:00
660e783517 Update validation for feature selection 2023-10-14 13:32:09 +02:00
b35532dd9e Implement IWSS and FCBF too for BoostAODE 2023-10-14 13:12:04 +02:00
6ef49385ea Remove unneeded method declaration FeatureSelect 2023-10-14 11:30:32 +02:00
6d5a25cdc8 Refactor CFS class creating abstract base class 2023-10-14 11:27:46 +02:00
d00b08cbe8 Fix Header for Linux 2023-10-13 14:26:47 +02:00
977ff6fddb Update CMakeLists for Linux 2023-10-13 14:01:52 +02:00
54b8939f35 Prepare BoostAODE first try 2023-10-13 13:46:22 +02:00
5022a4dc90 Complete CFS tested with Python mufs 2023-10-13 12:29:25 +02:00
40d1dad5d8 Begin CFS implementation 2023-10-11 21:17:26 +02:00
47e2b138c5 Complete first working cfs 2023-10-11 11:33:29 +02:00
e7ded68267 First cfs working version 2023-10-10 23:00:38 +02:00
ca833a34f5 try openssl sha256 2023-10-10 18:16:43 +02:00
df9b4c48d2 Begin CFS initialization 2023-10-10 13:39:11 +02:00
f288bbd6fa Begin adding cfs to BoostAODE 2023-10-10 11:52:39 +02:00
7d8aca4f59 Add Locale shared config to reports 2023-10-09 19:41:29 +02:00
8fdad78a8c Continue Test Network 2023-10-09 11:25:30 +02:00
e3ae073333 Continue test Network 2023-10-08 15:54:58 +02:00
4b732e76c2 MST change unordered_set to list 2023-10-07 19:08:13 +02:00
fe5fead27e Begin Fix Test MST 2023-10-07 01:43:26 +02:00
8c3864f3c8 Complete Folding Test 2023-10-07 01:23:36 +02:00
1287160c47 Refactor makefile to use variables 2023-10-07 00:16:25 +02:00
2f58807322 Begin refactor CMakeLists debug/release paths 2023-10-06 19:32:29 +02:00
17e079edd5 Begin Test Folding 2023-10-06 17:08:54 +02:00
b9e0028e9d Refactor Makefile 2023-10-06 01:28:27 +02:00
e0d39fe631 Fix BayesMetrics Test 2023-10-06 01:14:55 +02:00
36b0277576 Add Maximum Spanning Tree test 2023-10-05 15:45:36 +02:00
da8d018ec4 Refactor Makefile 2023-10-05 11:45:00 +02:00
5f0676691c Add First BayesMetrics Tests 2023-10-05 01:14:16 +02:00
3448fb1299 Refactor Tests and add BayesMetrics test 2023-10-04 23:19:23 +02:00
5e938d5cca Add ranks sheet to excel best results 2023-10-04 16:26:57 +02:00
55e742438f Add constant references to Statistics 2023-10-04 13:40:45 +02:00
c4ae3fe429 Add Control model rank info to report 2023-10-04 12:42:35 +02:00
93e4ff94db Add significance level as parameter in best 2023-10-02 15:46:40 +02:00
57c27f739c Remove unused code in BestResults 2023-10-02 15:31:02 +02:00
a434d7f1ae Add a Linux config in launch.json 2023-09-30 18:44:21 +02:00
294666c516 Fix a Linux problem in Datasets 2023-09-30 18:43:47 +02:00
fd04e78ad9 Restore sample.cc 2023-09-29 18:50:25 +02:00
66ec1b343b Remove platformUtils and split Datasets & Dataset 2023-09-29 18:20:46 +02:00
bb423da42f Add csv and R_dat files to platform 2023-09-29 13:52:50 +02:00
db17c14042 Change names of executables to b_... 2023-09-29 09:17:50 +02:00
a4401cb78f Linux CMakeLists.txt adjustment 2023-09-29 00:30:47 +02:00
9d3d9cc6c6 Complete Excel output for bestResults with Friedman test 2023-09-28 18:52:37 +02:00
cfcf3c16df Add best results Excel 2023-09-28 17:12:04 +02:00
85202260f3 Separate specific Excel methods to ExcelFile 2023-09-28 13:07:11 +02:00
82acb3cab5 Enhance output of Best results reports 2023-09-28 12:08:56 +02:00
623ceed396 Merge pull request 'Add Friedman Test & post hoc tests to BestResults' (#10) from boost into main
Reviewed-on: #10
2023-09-28 07:44:55 +00:00
926de2bebd Add boost info to README 2023-09-28 09:44:33 +02:00
71704e3547 Enhance output info in Statistics 2023-09-28 01:27:18 +02:00
3b06534327 Remove duplicated code in BestResults 2023-09-28 00:59:34 +02:00
ac89a451e3 Duplicate statistics tests in class 2023-09-28 00:45:15 +02:00
00c6cf663b Fix order of output in posthoc 2023-09-27 19:11:47 +02:00
5043c12be8 Complete posthoc with Holm adjust 2023-09-27 18:34:16 +02:00
11320e2cc7 Complete friedman test as in exreport 2023-09-27 12:36:03 +02:00
ce66483b65 Update boost version requirement for Linux 2023-09-26 14:12:53 +02:00
cab8e14b2d Add friedman hyperparameter 2023-09-26 11:26:59 +02:00
f0d0abe891 Add boost library link to linux build 2023-09-26 01:07:50 +02:00
dcba146e12 Begin adding Friedman test to BestResults 2023-09-26 01:04:59 +02:00
3ea0285119 Fix ranks to match friedman test ranks 2023-09-25 18:38:12 +02:00
e3888e1503 Merge pull request 'bestResults' (#9) from bestResults into main
Reviewed-on: https://gitea.rmontanana.es:3000/rmontanana/BayesNet/pulls/9

Add best results management, build, report, build all & report all
2023-09-25 12:02:17 +00:00
06de13df98 Add date/time to header of report best 2023-09-25 10:04:53 +02:00
de4fa6a04f Add color to totals 2023-09-23 10:30:39 +02:00
3a7bf4e672 Fix ranking order mistake 2023-09-23 01:33:23 +02:00
cd0bc02a74 Add report/build all with totals and ranks 2023-09-23 01:14:02 +02:00
c8597a794e Begin report all models 2023-09-22 18:13:32 +02:00
b30416364d Fix mistake in best results file name 2023-09-22 14:14:39 +02:00
3a16589220 Add best config for debug in vscode 2023-09-22 01:04:36 +02:00
c4f9187e2a Complete best build and report 2023-09-22 01:03:55 +02:00
c4d0a5b4e6 Split Result from Results 2023-09-21 23:30:17 +02:00
7bfafe555f Begin BestResults build 2023-09-21 23:04:11 +02:00
337b6f7e79 Rename BestResult to BestScore 2023-09-21 19:30:07 +02:00
5fa0b957dd Fix mistake in idx range in manage 2023-09-20 19:12:07 +02:00
67252fc41d Fix CMakeLists libxlsxwriter for Linux 2023-09-20 19:02:53 +02:00
94ae9456a0 Fix libxslxwriter linking problem 2023-09-20 18:50:11 +02:00
781993e326 Resolve some warnings 2023-09-20 17:54:15 +02:00
8257a6ae39 Add message of not exist Best Results 2023-09-20 13:50:34 +02:00
fc81730dfc Merge pull request 'Exchange OpenXLSX to libxlsxwriter' (#8) from libxlsxwriter into main
Add multiple sheets to excel file
Add format and color to sheets
Add comparison with ZeroR
Add comparison with Best Results
Separate contextual menu from general in manage
2023-09-20 11:17:16 +00:00
146 changed files with 7551 additions and 2259 deletions

3
.gitignore vendored
View File

@@ -31,7 +31,8 @@
*.exe
*.out
*.app
build/
build/**
build_*/**
*.dSYM/**
cmake-build*/**
.idea

10
.gitmodules vendored
View File

@@ -1,15 +1,25 @@
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"]
path = lib/catch2
main = v2.x
update = merge
url = https://github.com/catchorg/Catch2.git
[submodule "lib/argparse"]
path = lib/argparse
url = https://github.com/p-ranav/argparse
master = master
update = merge
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
master = master
update = merge
[submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git
main = main
update = merge

18
.vscode/c_cpp_properties.json vendored Normal file
View File

@@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "Mac",
"includePath": [
"${workspaceFolder}/**"
],
"defines": [],
"macFrameworkPath": [
"/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks"
],
"cStandard": "c17",
"cppStandard": "c++17",
"compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json"
}
],
"version": 4
}

85
.vscode/launch.json vendored
View File

@@ -5,7 +5,7 @@
"type": "lldb",
"request": "launch",
"name": "sample",
"program": "${workspaceFolder}/build/sample/BayesNetSample",
"program": "${workspaceFolder}/build_debug/sample/BayesNetSample",
"args": [
"-d",
"iris",
@@ -21,46 +21,103 @@
{
"type": "lldb",
"request": "launch",
"name": "experiment",
"program": "${workspaceFolder}/build/src/Platform/main",
"name": "experimentPy",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"BoostAODE",
"-p",
"/Users/rmontanana/Code/discretizbench/datasets",
"STree",
"--stratified",
"-d",
"mfeat-morphological",
"--discretize"
"iris",
//"--discretize"
// "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "gridsearch",
"program": "${workspaceFolder}/build_debug/src/Platform/b_grid",
"args": [
"-m",
"KDB",
"--discretize",
"--continue",
"glass",
"--only",
"--compute"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "experimentBayes",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"TAN",
"--stratified",
"--discretize",
"-d",
"iris",
"--hyperparameters",
"{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/home/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "best",
"program": "${workspaceFolder}/build_debug/src/Platform/b_best",
"args": [
"-m",
"BoostAODE",
"-s",
"accuracy",
"--build",
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "manage",
"program": "${workspaceFolder}/build/src/Platform/manage",
"program": "${workspaceFolder}/build_debug/src/Platform/b_manage",
"args": [
"-n",
"20"
],
"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "list",
"program": "${workspaceFolder}/build/src/Platform/list",
"program": "${workspaceFolder}/build_debug/src/Platform/b_list",
"args": [],
"cwd": "/Users/rmontanana/Code/discretizbench",
//"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "test",
"program": "${workspaceFolder}/build_debug/tests/unit_tests",
"args": [
"-c=\"Metrics Test\"",
// "-s",
],
"cwd": "${workspaceFolder}/build/tests",
},
{
"name": "Build & debug active file",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/bayesnet",
"program": "${workspaceFolder}/build_debug/bayesnet",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",

View File

@@ -24,18 +24,39 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# Options
# -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
option(MPI_ENABLED "Enable MPI options" ON)
if (MPI_ENABLED)
find_package(MPI REQUIRED)
message("MPI_CXX_LIBRARIES=${MPI_CXX_LIBRARIES}")
message("MPI_CXX_INCLUDE_DIRS=${MPI_CXX_INCLUDE_DIRS}")
endif (MPI_ENABLED)
# Boost Library
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.66.0 REQUIRED COMPONENTS python3 numpy3)
if(Boost_FOUND)
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
include_directories(${Boost_INCLUDE_DIRS})
endif()
# Python
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}")
# CMakes modules
# --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule)
if (CODE_COVERAGE)
enable_testing()
include(CodeCoverage)
@@ -55,15 +76,21 @@ add_git_submodule("lib/mdlp")
add_git_submodule("lib/argparse")
add_git_submodule("lib/json")
find_library(XLSXWRITER_LIB NAMES libxlsxwriter.dylib libxlsxwriter.so PATHS ${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/lib)
message("XLSXWRITER_LIB=${XLSXWRITER_LIB}")
# Subdirectories
# --------------
add_subdirectory(config)
add_subdirectory(lib/Files)
add_subdirectory(src/BayesNet)
add_subdirectory(src/Platform)
add_subdirectory(src/PyClassifiers)
add_subdirectory(sample)
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.hpp)
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h)
file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cc ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cpp)
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform/*.cc ${BayesNet_SOURCE_DIR}/src/Platform/*.cpp)

121
Makefile
View File

@@ -1,6 +1,26 @@
SHELL := /bin/bash
.DEFAULT_GOAL := help
.PHONY: coverage setup help build test
.PHONY: coverage setup help build test clean debug release
f_release = build_release
f_debug = build_debug
app_targets = b_best b_list b_main b_manage b_grid
test_targets = unit_tests_bayesnet unit_tests_platform
n_procs = -j 16
define ClearTests
@for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \
echo ">>> Cleaning $$t..." ; \
rm -f $(f_debug)/tests/$$t ; \
fi ; \
done
@nfiles="$(find . -name "*.gcda" -print0)" ; \
if test "${nfiles}" != "" ; then \
find . -name "*.gcda" -print0 | xargs -0 rm 2>/dev/null ;\
fi ;
endef
setup: ## Install dependencies for tests and coverage
@if [ "$(shell uname)" = "Darwin" ]; then \
@@ -11,62 +31,87 @@ setup: ## Install dependencies for tests and coverage
pip install gcovr; \
fi
dest ?= ../discretizbench
copy: ## Copy binary files to selected folder
dest ?= ${HOME}/bin
install: ## Copy binary files to bin folder
@echo "Destination folder: $(dest)"
make build
make buildr
@echo "*******************************************"
@echo ">>> Copying files to $(dest)"
@cp build/src/Platform/main $(dest)
@cp build/src/Platform/list $(dest)
@cp build/src/Platform/manage $(dest)
@echo ">>> Done"
@echo "*******************************************"
@for item in $(app_targets); do \
echo ">>> Copying $$item" ; \
cp $(f_release)/src/Platform/$$item $(dest) ; \
done
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
@echo ">>> Creating dependency graph diagram of the project...";
$(MAKE) debug
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
build: ## Build the main and BayesNetSample
cmake --build build -t main -t BayesNetSample -t manage -t list -j 32
buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) BayesNetSample $(n_procs)
clean: ## Clean the debug info
@echo ">>> Cleaning Debug BayesNet ...";
find . -name "*.gcda" -print0 | xargs -0 rm
buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) BayesNetSample $(n_procs)
clean: ## Clean the tests info
@echo ">>> Cleaning Debug BayesNet tests...";
$(call ClearTests)
@echo ">>> Done";
clang-uml: ## Create uml class and sequence diagrams
clang-uml -p --add-compile-flag -I /usr/lib/gcc/x86_64-redhat-linux/8/include/
debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet ...";
@if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
cmake --build build -t main -t BayesNetSample -t manage -t list unit_tests -j 32;
@echo ">>> Building Debug BayesNet...";
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@mkdir $(f_debug);
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
@echo ">>> Done";
release: ## Build a Release version of the project
@echo ">>> Building Release BayesNet ...";
@if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
cmake --build build -t main -t BayesNetSample -t manage -t list -j 32;
@echo ">>> Building Release BayesNet...";
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release);
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
@echo ">>> Done";
test: ## Run tests
@echo "* Running tests...";
find . -name "*.gcda" -print0 | xargs -0 rm
@cd build; \
cmake --build . --target unit_tests ;
@cd build/tests; \
./unit_tests;
opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet & Platform tests...";
@$(MAKE) clean
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
@for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \
cd $(f_debug)/tests ; \
./$$t $(opt) ; \
fi ; \
done
@echo ">>> Done";
opt = ""
testp: ## Run platform tests (opt="-s") to verbose output the tests, (opt="-c='Stratified Fold Test'") to run only that section
@echo ">>> Running Platform tests...";
@$(MAKE) clean
@cmake --build $(f_debug) --target unit_tests_platform $(n_procs)
@if [ -f $(f_debug)/tests/unit_tests_platform ]; then cd $(f_debug)/tests ; ./unit_tests_platform $(opt) ; fi ;
@echo ">>> Done";
opt = ""
testb: ## Run BayesNet tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet tests...";
@$(MAKE) clean
@cmake --build $(f_debug) --target unit_tests_bayesnet $(n_procs)
@if [ -f $(f_debug)/tests/unit_tests_bayesnet ]; then cd $(f_debug)/tests ; ./unit_tests_bayesnet $(opt) ; fi ;
@echo ">>> Done";
coverage: ## Run tests and generate coverage report (build/index.html)
@echo "*Building tests...";
find . -name "*.gcda" -print0 | xargs -0 rm
@cd build; \
cmake --build . --target unit_tests ;
@cd build/tests; \
./unit_tests;
gcovr ;
@echo ">>> Building tests with coverage...";
@$(MAKE) test
@cd $(f_debug) ; \
gcovr --config ../gcovr.cfg tests ;
@echo ">>> Done";
help: ## Show help message
@IFS=$$'\n' ; \

View File

@@ -1,21 +1,75 @@
# BayesNet
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
Bayesian Network Classifier with libtorch from scratch
## 0. Setup
### libxlswriter
Before compiling BayesNet.
### Miniconda
To be able to run Python Classifiers such as STree, ODTE, SVC, etc. it is needed to install Miniconda. To do so, download the installer from [Miniconda](https://docs.conda.io/en/latest/miniconda.html) and run it. It is recommended to install it in the home folder.
In Linux sometimes the library libstdc++ is mistaken from the miniconda installation and produces the next message when running the b_xxxx executables:
```bash
libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by b_xxxx)
```
The solution is to erase the libstdc++ library from the miniconda installation:
### MPI
In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
```bash
export MPI_HOME="/usr/lib64/openmpi"
```
In Mac OS X, install mpich with brew and if cmake doesn't find it, edit mpicxx wrapper to remove the ",-commons,use_dylibs" from final_ldflags
```bash
vi /opt/homebrew/bin/mpicx
```
### boost library
[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
The best option is install the packages that the Linux distribution have in its repository. If this is the case:
```bash
sudo dnf install boost-devel
```
If this is not possible and the compressed packaged is installed, the following environment variable has to be set pointing to the folder where it was unzipped to:
```bash
export BOOST_ROOT=/path/to/library/
```
In some cases, it is needed to build the library, to do so:
```bash
cd /path/to/library
mkdir own
./bootstrap.sh --prefix=/path/to/library/own
./b2 install
export BOOST_ROOT=/path/to/library/own/
```
Don't forget to add the export BOOST_ROOT statement to .bashrc or wherever it is meant to be.
### libxlswriter
```bash
cd lib/libxlsxwriter
make
sudo make install
make install DESTDIR=/home/rmontanana/Code PREFIX=
```
It has to be installed in /usr/local/lib otherwise CMakeLists.txt has to be modified accordingly
Environment variable has to be set:
```bash

162
grid_stree.json Normal file
View File

@@ -0,0 +1,162 @@
{
"balance-scale": {
"C": 10000.0,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"balloons": {
"C": 7,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"breast-cancer-wisc-diag": {
"C": 0.2,
"max_iter": 10000
},
"breast-cancer-wisc-prog": {
"C": 0.2,
"max_iter": 10000
},
"breast-cancer-wisc": {},
"breast-cancer": {},
"cardiotocography-10clases": {},
"cardiotocography-3clases": {},
"conn-bench-sonar-mines-rocks": {},
"cylinder-bands": {},
"dermatology": {
"C": 55,
"max_iter": 10000
},
"echocardiogram": {
"C": 7,
"gamma": 0.1,
"kernel": "poly",
"max_features": "auto",
"max_iter": 10000
},
"fertility": {
"C": 0.05,
"max_features": "auto",
"max_iter": 10000
},
"haberman-survival": {},
"heart-hungarian": {
"C": 0.05,
"max_iter": 10000
},
"hepatitis": {
"C": 7,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"ilpd-indian-liver": {},
"ionosphere": {
"C": 7,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"iris": {},
"led-display": {},
"libras": {
"C": 0.08,
"max_iter": 10000
},
"low-res-spect": {
"C": 0.05,
"max_iter": 10000
},
"lymphography": {
"C": 0.05,
"max_iter": 10000
},
"mammographic": {},
"molec-biol-promoter": {
"C": 0.05,
"gamma": 0.1,
"kernel": "poly",
"max_iter": 10000
},
"musk-1": {
"C": 0.05,
"gamma": 0.1,
"kernel": "poly",
"max_iter": 10000
},
"oocytes_merluccius_nucleus_4d": {
"C": 8.25,
"gamma": 0.1,
"kernel": "poly"
},
"oocytes_merluccius_states_2f": {},
"oocytes_trisopterus_nucleus_2f": {},
"oocytes_trisopterus_states_5b": {
"C": 0.11,
"max_iter": 10000
},
"parkinsons": {},
"pima": {},
"pittsburg-bridges-MATERIAL": {
"C": 7,
"gamma": 0.1,
"kernel": "rbf",
"max_iter": 10000
},
"pittsburg-bridges-REL-L": {},
"pittsburg-bridges-SPAN": {
"C": 0.05,
"max_iter": 10000
},
"pittsburg-bridges-T-OR-D": {},
"planning": {
"C": 7,
"gamma": 10.0,
"kernel": "rbf",
"max_iter": 10000
},
"post-operative": {
"C": 55,
"degree": 5,
"gamma": 0.1,
"kernel": "poly",
"max_iter": 10000
},
"seeds": {
"C": 10000.0,
"max_iter": 10000
},
"statlog-australian-credit": {
"C": 0.05,
"max_features": "auto",
"max_iter": 10000
},
"statlog-german-credit": {},
"statlog-heart": {},
"statlog-image": {
"C": 7,
"max_iter": 10000
},
"statlog-vehicle": {},
"synthetic-control": {
"C": 0.55,
"max_iter": 10000
},
"tic-tac-toe": {
"C": 0.2,
"gamma": 0.1,
"kernel": "poly",
"max_iter": 10000
},
"vertebral-column-2clases": {},
"wine": {
"C": 0.55,
"max_iter": 10000
},
"zoo": {
"C": 0.1,
"max_iter": 10000
}
}

View File

@@ -4,11 +4,9 @@
#include <map>
#include <iostream>
using namespace std;
ArffFiles::ArffFiles() = default;
vector<string> ArffFiles::getLines() const
std::vector<std::string> ArffFiles::getLines() const
{
return lines;
}
@@ -18,48 +16,48 @@ unsigned long int ArffFiles::getSize() const
return lines.size();
}
vector<pair<string, string>> ArffFiles::getAttributes() const
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
{
return attributes;
}
string ArffFiles::getClassName() const
std::string ArffFiles::getClassName() const
{
return className;
}
string ArffFiles::getClassType() const
std::string ArffFiles::getClassType() const
{
return classType;
}
vector<vector<float>>& ArffFiles::getX()
std::vector<std::vector<float>>& ArffFiles::getX()
{
return X;
}
vector<int>& ArffFiles::getY()
std::vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::loadCommon(string fileName)
void ArffFiles::loadCommon(std::string fileName)
{
ifstream file(fileName);
std::ifstream file(fileName);
if (!file.is_open()) {
throw invalid_argument("Unable to open file");
throw std::invalid_argument("Unable to open file");
}
string line;
string keyword;
string attribute;
string type;
string type_w;
std::string line;
std::string keyword;
std::string attribute;
std::string type;
std::string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line);
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
std::stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
@@ -74,35 +72,35 @@ void ArffFiles::loadCommon(string fileName)
}
file.close();
if (attributes.empty())
throw invalid_argument("No attributes found");
throw std::invalid_argument("No attributes found");
}
void ArffFiles::load(const string& fileName, bool classLast)
void ArffFiles::load(const std::string& fileName, bool classLast)
{
int labelIndex;
loadCommon(fileName);
if (classLast) {
className = get<0>(attributes.back());
classType = get<1>(attributes.back());
className = std::get<0>(attributes.back());
classType = std::get<1>(attributes.back());
attributes.pop_back();
labelIndex = static_cast<int>(attributes.size());
} else {
className = get<0>(attributes.front());
classType = get<1>(attributes.front());
className = std::get<0>(attributes.front());
classType = std::get<1>(attributes.front());
attributes.erase(attributes.begin());
labelIndex = 0;
}
generateDataset(labelIndex);
}
void ArffFiles::load(const string& fileName, const string& name)
void ArffFiles::load(const std::string& fileName, const std::string& name)
{
int labelIndex;
loadCommon(fileName);
bool found = false;
for (int i = 0; i < attributes.size(); ++i) {
if (attributes[i].first == name) {
className = get<0>(attributes[i]);
classType = get<1>(attributes[i]);
className = std::get<0>(attributes[i]);
classType = std::get<1>(attributes[i]);
attributes.erase(attributes.begin() + i);
labelIndex = i;
found = true;
@@ -110,19 +108,19 @@ void ArffFiles::load(const string& fileName, const string& name)
}
}
if (!found) {
throw invalid_argument("Class name not found");
throw std::invalid_argument("Class name not found");
}
generateDataset(labelIndex);
}
void ArffFiles::generateDataset(int labelIndex)
{
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
auto yy = vector<string>(lines.size(), "");
auto removeLines = vector<int>(); // Lines with missing values
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
auto yy = std::vector<std::string>(lines.size(), "");
auto removeLines = std::vector<int>(); // Lines with missing values
for (size_t i = 0; i < lines.size(); i++) {
stringstream ss(lines[i]);
string value;
std::stringstream ss(lines[i]);
std::string value;
int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) {
@@ -146,21 +144,21 @@ void ArffFiles::generateDataset(int labelIndex)
y = factorize(yy);
}
string ArffFiles::trim(const string& source)
std::string ArffFiles::trim(const std::string& source)
{
string s(source);
std::string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s;
}
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
{
vector<int> yy;
std::vector<int> yy;
yy.reserve(labels_t.size());
map<string, int> labelMap;
std::map<std::string, int> labelMap;
int i = 0;
for (const string& label : labels_t) {
for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}

View File

@@ -4,31 +4,29 @@
#include <string>
#include <vector>
using namespace std;
class ArffFiles {
private:
vector<string> lines;
vector<pair<string, string>> attributes;
string className;
string classType;
vector<vector<float>> X;
vector<int> y;
std::vector<std::string> lines;
std::vector<std::pair<std::string, std::string>> attributes;
std::string className;
std::string classType;
std::vector<std::vector<float>> X;
std::vector<int> y;
void generateDataset(int);
void loadCommon(string);
void loadCommon(std::string);
public:
ArffFiles();
void load(const string&, bool = true);
void load(const string&, const string&);
vector<string> getLines() const;
void load(const std::string&, bool = true);
void load(const std::string&, const std::string&);
std::vector<std::string> getLines() const;
unsigned long int getSize() const;
string getClassName() const;
string getClassType() const;
static string trim(const string&);
vector<vector<float>>& getX();
vector<int>& getY();
vector<pair<string, string>> getAttributes() const;
static vector<int> factorize(const vector<string>& labels_t);
std::string getClassName() const;
std::string getClassType() const;
static std::string trim(const std::string&);
std::vector<std::vector<float>>& getX();
std::vector<int>& getY();
std::vector<std::pair<std::string, std::string>> getAttributes() const;
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
};
#endif

View File

@@ -1,8 +1,10 @@
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${Python3_INCLUDE_DIRS})
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap)

View File

@@ -12,14 +12,12 @@
#include "modelRegister.h"
#include <fstream>
using namespace std;
const std::string PATH = "../../data/";
const string PATH = "../../data/";
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features)
pair<std::vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<std::string> features)
{
vector<mdlp::labels_t>Xd;
map<string, int> maxes;
std::vector<mdlp::labels_t>Xd;
map<std::string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
@@ -40,12 +38,12 @@ bool file_exists(const std::string& name)
return false;
}
}
pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vector<vector<int>> X, vector<int> y)
pair<std::vector<std::vector<int>>, std::vector<int>> extract_indices(std::vector<int> indices, std::vector<std::vector<int>> X, std::vector<int> y)
{
vector<vector<int>> Xr; // nxm
vector<int> yr;
std::vector<std::vector<int>> Xr; // nxm
std::vector<int> yr;
for (int col = 0; col < X.size(); ++col) {
Xr.push_back(vector<int>());
Xr.push_back(std::vector<int>());
}
for (auto index : indices) {
for (int col = 0; col < X.size(); ++col) {
@@ -58,226 +56,180 @@ pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vect
int main(int argc, char** argv)
{
torch::Tensor weights_ = torch::full({ 10 }, 1.0 / 10, torch::kFloat64);
torch::Tensor y_ = torch::tensor({ 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }, torch::kInt32);
torch::Tensor ypred = torch::tensor({ 1, 1, 1, 0, 0, 1, 1, 1, 1, 0 }, torch::kInt32);
cout << "Initial weights_: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << ", ";
map<std::string, bool> datasets = {
{"diabetes", true},
{"ecoli", true},
{"glass", true},
{"iris", true},
{"kdd_JapaneseVowels", false},
{"letter", true},
{"liver-disorders", true},
{"mfeat-factors", true},
};
auto valid_datasets = std::vector<std::string>();
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
[](const pair<std::string, bool>& pair) { return pair.first; });
argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-d", "--dataset")
.help("Dataset file name")
.action([valid_datasets](const std::string& value) {
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
return value;
}
cout << "end." << endl;
cout << "y_: " << endl;
for (int i = 0; i < 10; i++) {
cout << y_.index({ i }).item<int>() << ", ";
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
}
cout << "end." << endl;
cout << "ypred: " << endl;
for (int i = 0; i < 10; i++) {
cout << ypred.index({ i }).item<int>() << ", ";
);
program.add_argument("-p", "--path")
.help(" folder where the data files are located, default")
.default_value(std::string{ PATH }
);
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->tostring())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
cout << "end." << endl;
auto mask_wrong = ypred != y_;
auto mask_right = ypred == y_;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double epsilon_t = masked_weights.sum().item<double>();
cout << "epsilon_t: " << epsilon_t << endl;
double wt = (1 - epsilon_t) / epsilon_t;
cout << "wt: " << wt << endl;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
cout << "alpha_t: " << alpha_t << endl;
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
cout << "exp(alpha_t): " << exp(alpha_t) << endl;
cout << "exp(-alpha_t): " << exp(-alpha_t) << endl;
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
cout << "totalWeights: " << totalWeights << endl;
cout << "Before normalization: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << endl;
throw runtime_error("Model must be one of " + platform::Models::instance()->tostring());
}
weights_ = weights_ / totalWeights;
cout << "After normalization: " << endl;
for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << endl;
);
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw runtime_error(err.what());
}
catch (...) {
throw runtime_error("Number of folds must be an integer");
}});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors, dump_cpt;
std::string model_name, file_name, path, complete_file_name;
int nFolds, seed;
try {
program.parse_args(argc, argv);
file_name = program.get<std::string>("dataset");
path = program.get<std::string>("path");
model_name = program.get<std::string>("model");
complete_file_name = path + file_name + ".arff";
stratified = program.get<bool>("stratified");
tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds");
seed = program.get<int>("seed");
dump_cpt = program.get<bool>("dumpcpt");
class_last = datasets[file_name];
if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
}
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
// map<string, bool> datasets = {
// {"diabetes", true},
// {"ecoli", true},
// {"glass", true},
// {"iris", true},
// {"kdd_JapaneseVowels", false},
// {"letter", true},
// {"liver-disorders", true},
// {"mfeat-factors", true},
// };
// auto valid_datasets = vector<string>();
// transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
// [](const pair<string, bool>& pair) { return pair.first; });
// argparse::ArgumentParser program("BayesNetSample");
// program.add_argument("-d", "--dataset")
// .help("Dataset file name")
// .action([valid_datasets](const std::string& value) {
// if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
// return value;
// }
// throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
// }
// );
// program.add_argument("-p", "--path")
// .help(" folder where the data files are located, default")
// .default_value(string{ PATH }
// );
// program.add_argument("-m", "--model")
// .help("Model to use " + platform::Models::instance()->toString())
// .action([](const std::string& value) {
// static const vector<string> choices = platform::Models::instance()->getNames();
// if (find(choices.begin(), choices.end(), value) != choices.end()) {
// return value;
// }
// throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
// }
// );
// program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
// program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
// program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
// program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
// program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
// try {
// auto k = stoi(value);
// if (k < 2) {
// throw runtime_error("Number of folds must be greater than 1");
// }
// return k;
// }
// catch (const runtime_error& err) {
// throw runtime_error(err.what());
// }
// catch (...) {
// throw runtime_error("Number of folds must be an integer");
// }});
// program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
// bool class_last, stratified, tensors, dump_cpt;
// string model_name, file_name, path, complete_file_name;
// int nFolds, seed;
// try {
// program.parse_args(argc, argv);
// file_name = program.get<string>("dataset");
// path = program.get<string>("path");
// model_name = program.get<string>("model");
// complete_file_name = path + file_name + ".arff";
// stratified = program.get<bool>("stratified");
// tensors = program.get<bool>("tensors");
// nFolds = program.get<int>("folds");
// seed = program.get<int>("seed");
// dump_cpt = program.get<bool>("dumpcpt");
// class_last = datasets[file_name];
// if (!file_exists(complete_file_name)) {
// throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
// }
// }
// catch (const exception& err) {
// cerr << err.what() << endl;
// cerr << program;
// exit(1);
// }
/*
* Begin Processing
*/
// auto handler = ArffFiles();
// handler.load(complete_file_name, class_last);
// // Get Dataset X, y
// vector<mdlp::samples_t>& X = handler.getX();
// mdlp::labels_t& y = handler.getY();
// // Get className & Features
// auto className = handler.getClassName();
// vector<string> features;
// auto attributes = handler.getAttributes();
// transform(attributes.begin(), attributes.end(), back_inserter(features),
// [](const pair<string, string>& item) { return item.first; });
// // Discretize Dataset
// auto [Xd, maxes] = discretize(X, y, features);
// maxes[className] = *max_element(y.begin(), y.end()) + 1;
// map<string, vector<int>> states;
// for (auto feature : features) {
// states[feature] = vector<int>(maxes[feature]);
// }
// states[className] = vector<int>(maxes[className]);
// auto clf = platform::Models::instance()->create(model_name);
// clf->fit(Xd, y, features, className, states);
// if (dump_cpt) {
// cout << "--- CPT Tables ---" << endl;
// clf->dump_cpt();
// }
// auto lines = clf->show();
// for (auto line : lines) {
// cout << line << endl;
// }
// cout << "--- Topological Order ---" << endl;
// auto order = clf->topological_order();
// for (auto name : order) {
// cout << name << ", ";
// }
// cout << "end." << endl;
// auto score = clf->score(Xd, y);
// cout << "Score: " << score << endl;
// auto graph = clf->graph();
// auto dot_file = model_name + "_" + file_name;
// ofstream file(dot_file + ".dot");
// file << graph;
// file.close();
// cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
// cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
// string stratified_string = stratified ? " Stratified" : "";
// cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
// cout << "==========================================" << endl;
// torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
// torch::Tensor yt = torch::tensor(y, torch::kInt32);
// for (int i = 0; i < features.size(); ++i) {
// Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
// }
// float total_score = 0, total_score_train = 0, score_train, score_test;
// platform::Fold* fold;
// if (stratified)
// fold = new platform::StratifiedKFold(nFolds, y, seed);
// else
// fold = new platform::KFold(nFolds, y.size(), seed);
// for (auto i = 0; i < nFolds; ++i) {
// auto [train, test] = fold->getFold(i);
// cout << "Fold: " << i + 1 << endl;
// if (tensors) {
// auto ttrain = torch::tensor(train, torch::kInt64);
// auto ttest = torch::tensor(test, torch::kInt64);
// torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
// torch::Tensor ytraint = yt.index({ ttrain });
// torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
// torch::Tensor ytestt = yt.index({ ttest });
// clf->fit(Xtraint, ytraint, features, className, states);
// auto temp = clf->predict(Xtraint);
// score_train = clf->score(Xtraint, ytraint);
// score_test = clf->score(Xtestt, ytestt);
// } else {
// auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
// auto [Xtest, ytest] = extract_indices(test, Xd, y);
// clf->fit(Xtrain, ytrain, features, className, states);
// score_train = clf->score(Xtrain, ytrain);
// score_test = clf->score(Xtest, ytest);
// }
// if (dump_cpt) {
// cout << "--- CPT Tables ---" << endl;
// clf->dump_cpt();
// }
// total_score_train += score_train;
// total_score += score_test;
// cout << "Score Train: " << score_train << endl;
// cout << "Score Test : " << score_test << endl;
// cout << "-------------------------------------------------------------------------------" << endl;
// }
// cout << "**********************************************************************************" << endl;
// cout << "Average Score Train: " << total_score_train / nFolds << endl;
// cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
auto handler = ArffFiles();
handler.load(complete_file_name, class_last);
// Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
std::vector<std::string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features),
[](const pair<std::string, std::string>& item) { return item.first; });
// Discretize Dataset
auto [Xd, maxes] = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<std::string, std::vector<int>> states;
for (auto feature : features) {
states[feature] = std::vector<int>(maxes[feature]);
}
states[className] = std::vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states);
if (dump_cpt) {
std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt();
}
auto lines = clf->show();
for (auto line : lines) {
std::cout << line << std::endl;
}
std::cout << "--- Topological Order ---" << std::endl;
auto order = clf->topological_order();
for (auto name : order) {
std::cout << name << ", ";
}
std::cout << "end." << std::endl;
auto score = clf->score(Xd, y);
std::cout << "Score: " << score << std::endl;
auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot");
file << graph;
file.close();
std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
std::string stratified_string = stratified ? " Stratified" : "";
std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
std::cout << "==========================================" << std::endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
}
float total_score = 0, total_score_train = 0, score_train, score_test;
platform::Fold* fold;
if (stratified)
fold = new platform::StratifiedKFold(nFolds, y, seed);
else
fold = new platform::KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i);
std::cout << "Fold: " << i + 1 << std::endl;
if (tensors) {
auto ttrain = torch::tensor(train, torch::kInt64);
auto ttest = torch::tensor(test, torch::kInt64);
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
torch::Tensor ytraint = yt.index({ ttrain });
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest });
clf->fit(Xtraint, ytraint, features, className, states);
auto temp = clf->predict(Xtraint);
score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt);
} else {
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
auto [Xtest, ytest] = extract_indices(test, Xd, y);
clf->fit(Xtrain, ytrain, features, className, states);
score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest);
}
if (dump_cpt) {
std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt();
}
total_score_train += score_train;
total_score += score_test;
std::cout << "Score Train: " << score_train << std::endl;
std::cout << "Score Test : " << score_test << std::endl;
std::cout << "-------------------------------------------------------------------------------" << std::endl;
}
std::cout << "**********************************************************************************" << std::endl;
std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
}

View File

@@ -9,9 +9,9 @@ namespace bayesnet {
models.push_back(std::make_unique<SPODE>(i));
}
n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
significanceModels = std::vector<double>(n_models, 1.0);
}
vector<string> AODE::graph(const string& title) const
std::vector<std::string> AODE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}

View File

@@ -9,7 +9,7 @@ namespace bayesnet {
public:
AODE();
virtual ~AODE() {};
vector<string> graph(const string& title = "AODE") const override;
std::vector<std::string> graph(const std::string& title = "AODE") const override;
};
}
#endif

View File

@@ -1,17 +1,15 @@
#include "AODELd.h"
#include "Models.h"
namespace bayesnet {
using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
@@ -26,7 +24,7 @@ namespace bayesnet {
models.push_back(std::make_unique<SPODELd>(i));
}
n_models = models.size();
significanceModels = vector<double>(n_models, 1.0);
significanceModels = std::vector<double>(n_models, 1.0);
}
void AODELd::trainModel(const torch::Tensor& weights)
{
@@ -34,7 +32,7 @@ namespace bayesnet {
model->fit(Xf, y, features, className, states);
}
}
vector<string> AODELd::graph(const string& name) const
std::vector<std::string> AODELd::graph(const std::string& name) const
{
return Ensemble::graph(name);
}

View File

@@ -5,17 +5,16 @@
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
class AODELd : public Ensemble, public Proposal {
protected:
void trainModel(const torch::Tensor& weights) override;
void buildModel(const torch::Tensor& weights) override;
public:
AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_) override;
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
virtual ~AODELd() = default;
vector<string> graph(const string& name = "AODELd") const override;
static inline string version() { return "0.0.1"; };
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
static inline std::string version() { return "0.0.1"; };
};
}
#endif // !AODELD_H

View File

@@ -4,33 +4,34 @@
#include <nlohmann/json.hpp>
#include <vector>
namespace bayesnet {
using namespace std;
enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier {
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
public:
// X is nxm vector, y is nx1 vector
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
// X is nxm std::vector, y is nx1 std::vector
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) = 0;
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0;
vector<int> virtual predict(vector<vector<int>>& X) = 0;
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
status_t virtual getStatus() const = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() const = 0;
vector<string> virtual show() const = 0;
vector<string> virtual graph(const string& title = "") const = 0;
const string inline getVersion() const { return "0.2.0"; };
vector<string> virtual topological_order() = 0;
std::vector<std::string> virtual show() const = 0;
std::vector<std::string> virtual graph(const std::string& title = "") const = 0;
virtual std::string getVersion() = 0;
std::vector<std::string> virtual topological_order() = 0;
void virtual dump_cpt()const = 0;
virtual void setHyperparameters(nlohmann::json& hyperparameters) = 0;
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
std::vector<std::string> validHyperparameters;
};
}
#endif

View File

@@ -1,16 +1,16 @@
#include "BayesMetrics.h"
#include "Mst.h"
namespace bayesnet {
//samples is nxm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates)
//samples is n+1xm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: samples(samples)
, features(features)
, className(className)
, classNumStates(classNumStates)
{
}
//samples is nxm vector used to fit the model
Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates)
//samples is nxm std::vector used to fit the model
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: features(features)
, className(className)
, classNumStates(classNumStates)
@@ -21,7 +21,7 @@ namespace bayesnet {
}
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
}
vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
{
// Return the K Best features
auto n = samples.size(0) - 1;
@@ -56,25 +56,15 @@ namespace bayesnet {
}
return featuresKBest;
}
vector<double> Metrics::getScoresKBest() const
std::vector<double> Metrics::getScoresKBest() const
{
return scoresKBest;
}
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
{
vector<pair<string, string>> result;
for (int i = 0; i < source.size(); ++i) {
string temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
}
}
return result;
}
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
{
auto result = vector<double>();
auto source = vector<string>(features);
auto result = std::vector<double>();
auto source = std::vector<std::string>(features);
source.push_back(className);
auto combinations = doCombinations(source);
// Compute class prior
@@ -110,7 +100,7 @@ namespace bayesnet {
return matrix;
}
// To use in Python
vector<float> Metrics::conditionalEdgeWeights(vector<float>& weights_)
std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_)
{
const torch::Tensor weights = torch::tensor(weights_);
auto matrix = conditionalEdge(weights);
@@ -131,7 +121,7 @@ namespace bayesnet {
{
int numSamples = firstFeature.sizes()[0];
torch::Tensor featureCounts = secondFeature.bincount(weights);
unordered_map<int, unordered_map<int, double>> jointCounts;
std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
double totalWeight = 0;
for (auto i = 0; i < numSamples; i++) {
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
@@ -165,7 +155,7 @@ namespace bayesnet {
and the indices of the weights as nodes of this square matrix using
Kruskal algorithm
*/
vector<pair<int, int>> Metrics::maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root)
std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
{
auto mst = MST(features, weights, root);
return mst.maximumSpanningTree();

View File

@@ -4,29 +4,46 @@
#include <vector>
#include <string>
namespace bayesnet {
using namespace std;
using namespace torch;
class Metrics {
private:
Tensor samples; // nxm tensor used to fit the model
vector<string> features;
string className;
int classNumStates = 0;
vector<double> scoresKBest;
vector<int> featuresKBest; // sorted indices of the features
double entropy(const Tensor& feature, const Tensor& weights);
double conditionalEntropy(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<pair<string, string>> doCombinations(const vector<string>&);
std::vector<double> scoresKBest;
std::vector<int> featuresKBest; // sorted indices of the features
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
protected:
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
std::string className;
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
std::vector<std::string> features;
template <class T>
std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
{
std::vector<std::pair<T, T>> result;
for (int i = 0; i < source.size(); ++i) {
T temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
}
}
return result;
}
template <class T>
T pop_first(std::vector<T>& v)
{
T temp = v[0];
v.erase(v.begin());
return temp;
}
public:
Metrics() = default;
Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates);
Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates);
vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending=false, unsigned k = 0);
vector<double> getScoresKBest() const;
double mutualInformation(const Tensor& firstFeature, const Tensor& secondFeature, const Tensor& weights);
vector<float> conditionalEdgeWeights(vector<float>& weights); // To use in Python
Tensor conditionalEdge(const torch::Tensor& weights);
vector<pair<int, int>> maximumSpanningTree(const vector<string>& features, const Tensor& weights, const int root);
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
std::vector<double> getScoresKBest() const;
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
torch::Tensor conditionalEdge(const torch::Tensor& weights);
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
};
}
#endif

View File

@@ -1,36 +1,26 @@
#include "BoostAODE.h"
#include <set>
#include "BayesMetrics.h"
#include <functional>
#include <limits.h>
#include "BoostAODE.h"
#include "Colors.h"
#include "Folding.h"
#include <limits.h>
#include "Paths.h"
#include "CFS.h"
#include "FCBF.h"
#include "IWSS.h"
namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble() {}
BoostAODE::BoostAODE() : Ensemble()
{
validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance" };
}
void BoostAODE::buildModel(const torch::Tensor& weights)
{
// Models shall be built in trainModel
}
void BoostAODE::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const vector<string> validKeys = { "repeatSparent", "maxModels", "ascending", "convergence" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
}
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
}
if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"];
}
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
}
}
void BoostAODE::validationInit()
{
models.clear();
n_models = 0;
// Prepare the validation dataset
auto y_ = dataset.index({ -1, "..." });
if (convergence) {
// Prepare train & validation sets from train data
@@ -56,46 +46,117 @@ namespace bayesnet {
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
y_train = y_;
}
}
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
hyperparameters.erase("repeatSparent");
}
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
hyperparameters.erase("maxModels");
}
if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"];
hyperparameters.erase("ascending");
}
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence");
}
if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold");
}
if (hyperparameters.contains("tolerance")) {
tolerance = hyperparameters["tolerance"];
hyperparameters.erase("tolerance");
}
if (hyperparameters.contains("select_features")) {
auto selectedAlgorithm = hyperparameters["select_features"];
std::vector<std::string> algos = { "IWSS", "FCBF", "CFS" };
selectFeatures = true;
algorithm = selectedAlgorithm;
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
throw std::invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]");
}
hyperparameters.erase("select_features");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
}
}
std::unordered_set<int> BoostAODE::initializeModels()
{
std::unordered_set<int> featuresUsed;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
int maxFeatures = 0;
if (algorithm == "CFS") {
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
} else if (algorithm == "IWSS") {
if (threshold < 0 || threshold >0.5) {
throw std::invalid_argument("Invalid threshold value for IWSS [0, 0.5]");
}
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
} else if (algorithm == "FCBF") {
if (threshold < 1e-7 || threshold > 1) {
throw std::invalid_argument("Invalid threshold value [1e-7, 1]");
}
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
}
featureSelector->fit();
auto cfsFeatures = featureSelector->getFeatures();
for (const int& feature : cfsFeatures) {
// std::cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << std::endl;
featuresUsed.insert(feature);
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
models.push_back(std::move(model));
significanceModels.push_back(1.0);
n_models++;
}
delete featureSelector;
return featuresUsed;
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
models.clear();
n_models = 0;
std::unordered_set<int> featuresUsed;
if (selectFeatures) {
featuresUsed = initializeModels();
}
if (maxModels == 0)
maxModels = .1 * n > 10 ? .1 * n : n;
validationInit();
Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool exitCondition = false;
unordered_set<int> featuresUsed;
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double delta = 1.0;
double threshold = 1e-4;
int tolerance = 5; // number of times the accuracy can be lower than the threshold
int count = 0; // number of times the accuracy is lower than the threshold
fitted = true; // to enable predict
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == maxModels
// epsiolon sub t > 0.5 => inverse the weights policy
// epsilon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing
while (!exitCondition) {
// Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
unique_ptr<Classifier> model;
std::unique_ptr<Classifier> model;
auto feature = featureSelection[0];
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
bool found = false;
for (auto feat : featureSelection) {
if (find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
bool used = true;
for (const auto& feat : featureSelection) {
if (std::find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
continue;
}
found = true;
used = false;
feature = feat;
break;
}
if (!found) {
if (used) {
exitCondition = true;
continue;
}
@@ -135,13 +196,13 @@ namespace bayesnet {
count++;
}
}
exitCondition = n_models == maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance;
exitCondition = n_models >= maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance;
}
if (featuresUsed.size() != features.size()) {
status = WARNING;
}
}
vector<string> BoostAODE::graph(const string& title) const
std::vector<std::string> BoostAODE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}

View File

@@ -1,25 +1,33 @@
#ifndef BOOSTAODE_H
#define BOOSTAODE_H
#include "Ensemble.h"
#include <map>
#include "SPODE.h"
#include "FeatureSelect.h"
namespace bayesnet {
class BoostAODE : public Ensemble {
public:
BoostAODE();
virtual ~BoostAODE() {};
vector<string> graph(const string& title = "BoostAODE") const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override;
private:
torch::Tensor dataset_;
torch::Tensor X_train, y_train, X_test, y_test;
void validationInit();
bool repeatSparent = false;
std::unordered_set<int> initializeModels();
// Hyperparameters
bool repeatSparent = false; // if true, a feature can be selected more than once
int maxModels = 0;
int tolerance = 0;
bool ascending = false; //Process KBest features ascending or descending order
bool convergence = false; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection
std::string algorithm = ""; // Selected feature selection algorithm
FeatureSelect* featureSelector = nullptr;
double threshold = -1;
};
}
#endif

72
src/BayesNet/CFS.cc Normal file
View File

@@ -0,0 +1,72 @@
#include "CFS.h"
#include <limits>
#include "bayesnetUtils.h"
namespace bayesnet {
void CFS::fit()
{
initialize();
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto continueCondition = true;
auto feature = featureOrder[0];
selectedFeatures.push_back(feature);
selectedScores.push_back(suLabels[feature]);
selectedFeatures.erase(selectedFeatures.begin());
while (continueCondition) {
double merit = std::numeric_limits<double>::lowest();
int bestFeature = -1;
for (auto feature : featureOrder) {
selectedFeatures.push_back(feature);
// Compute merit with selectedFeatures
auto meritNew = computeMeritCFS();
if (meritNew > merit) {
merit = meritNew;
bestFeature = feature;
}
selectedFeatures.pop_back();
}
if (bestFeature == -1) {
// meritNew has to be nan due to constant features
break;
}
selectedFeatures.push_back(bestFeature);
selectedScores.push_back(merit);
featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), bestFeature), featureOrder.end());
continueCondition = computeContinueCondition(featureOrder);
}
fitted = true;
}
bool CFS::computeContinueCondition(const std::vector<int>& featureOrder)
{
if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) {
return false;
}
if (selectedScores.size() >= 5) {
/*
"To prevent the best first search from exploring the entire
feature subset search space, a stopping criterion is imposed.
The search will terminate if five consecutive fully expanded
subsets show no improvement over the current best subset."
as stated in Mark A.Hall Thesis
*/
double item_ant = std::numeric_limits<double>::lowest();
int num = 0;
std::vector<double> lastFive(selectedScores.end() - 5, selectedScores.end());
for (auto item : lastFive) {
if (item_ant == std::numeric_limits<double>::lowest()) {
item_ant = item;
}
if (item > item_ant) {
break;
} else {
num++;
item_ant = item;
}
}
if (num == 5) {
return false;
}
}
return true;
}
}

20
src/BayesNet/CFS.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef CFS_H
#define CFS_H
#include <torch/torch.h>
#include <vector>
#include "FeatureSelect.h"
namespace bayesnet {
class CFS : public FeatureSelect {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
CFS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights)
{
}
virtual ~CFS() {};
void fit() override;
private:
bool computeContinueCondition(const std::vector<int>& featureOrder);
};
}
#endif

View File

@@ -3,7 +3,10 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${Python3_INCLUDE_DIRS})
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -2,10 +2,8 @@
#include "bayesnetUtils.h"
namespace bayesnet {
using namespace torch;
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
Classifier& Classifier::build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights)
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
this->features = features;
this->className = className;
@@ -21,7 +19,7 @@ namespace bayesnet {
fitted = true;
return *this;
}
void Classifier::buildDataset(Tensor& ytmp)
void Classifier::buildDataset(torch::Tensor& ytmp)
{
try {
auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
@@ -29,8 +27,8 @@ namespace bayesnet {
}
catch (const std::exception& e) {
std::cerr << e.what() << '\n';
cout << "X dimensions: " << dataset.sizes() << "\n";
cout << "y dimensions: " << ytmp.sizes() << "\n";
std::cout << "X dimensions: " << dataset.sizes() << "\n";
std::cout << "y dimensions: " << ytmp.sizes() << "\n";
exit(1);
}
}
@@ -39,7 +37,7 @@ namespace bayesnet {
model.fit(dataset, weights, features, className, states);
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states)
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
dataset = X;
buildDataset(y);
@@ -47,24 +45,24 @@ namespace bayesnet {
return build(features, className, states, weights);
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states)
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, kInt32);
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
for (int i = 0; i < X.size(); ++i) {
dataset.index_put_({ i, "..." }, torch::tensor(X[i], kInt32));
dataset.index_put_({ i, "..." }, torch::tensor(X[i], torch::kInt32));
}
auto ytmp = torch::tensor(y, kInt32);
auto ytmp = torch::tensor(y, torch::kInt32);
buildDataset(ytmp);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states)
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights)
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
this->dataset = dataset;
return build(features, className, states, weights);
@@ -72,57 +70,57 @@ namespace bayesnet {
void Classifier::checkFitParameters()
{
if (torch::is_floating_point(dataset)) {
throw invalid_argument("dataset (X, y) must be of type Integer");
throw std::invalid_argument("dataset (X, y) must be of type Integer");
}
if (n != features.size()) {
throw invalid_argument("Classifier: X " + to_string(n) + " and features " + to_string(features.size()) + " must have the same number of features");
throw std::invalid_argument("Classifier: X " + std::to_string(n) + " and features " + std::to_string(features.size()) + " must have the same number of features");
}
if (states.find(className) == states.end()) {
throw invalid_argument("className not found in states");
throw std::invalid_argument("className not found in states");
}
for (auto feature : features) {
if (states.find(feature) == states.end()) {
throw invalid_argument("feature [" + feature + "] not found in states");
throw std::invalid_argument("feature [" + feature + "] not found in states");
}
}
}
Tensor Classifier::predict(Tensor& X)
torch::Tensor Classifier::predict(torch::Tensor& X)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error("Classifier has not been fitted");
}
return model.predict(X);
}
vector<int> Classifier::predict(vector<vector<int>>& X)
std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error("Classifier has not been fitted");
}
auto m_ = X[0].size();
auto n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
}
auto yp = model.predict(Xd);
return yp;
}
float Classifier::score(Tensor& X, Tensor& y)
float Classifier::score(torch::Tensor& X, torch::Tensor& y)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error("Classifier has not been fitted");
}
Tensor y_pred = predict(X);
torch::Tensor y_pred = predict(X);
return (y_pred == y).sum().item<float>() / y.size(0);
}
float Classifier::score(vector<vector<int>>& X, vector<int>& y)
float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error("Classifier has not been fitted");
}
return model.score(X, y);
}
vector<string> Classifier::show() const
std::vector<std::string> Classifier::show() const
{
return model.show();
}
@@ -137,7 +135,7 @@ namespace bayesnet {
int Classifier::getNumberOfNodes() const
{
// Features does not include class
return fitted ? model.getFeatures().size() + 1 : 0;
return fitted ? model.getFeatures().size() : 0;
}
int Classifier::getNumberOfEdges() const
{
@@ -147,7 +145,7 @@ namespace bayesnet {
{
return fitted ? model.getStates() : 0;
}
vector<string> Classifier::topological_order()
std::vector<std::string> Classifier::topological_order()
{
return model.topological_sort();
}
@@ -155,18 +153,8 @@ namespace bayesnet {
{
model.dump_cpt();
}
void Classifier::checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters)
void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
void Classifier::setHyperparameters(nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const vector<string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
//For classifiers that don't have hyperparameters
}
}

View File

@@ -4,48 +4,46 @@
#include "BaseClassifier.h"
#include "Network.h"
#include "BayesMetrics.h"
using namespace std;
using namespace torch;
namespace bayesnet {
class Classifier : public BaseClassifier {
private:
Classifier& build(const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights);
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
protected:
bool fitted;
int m, n; // m: number of samples, n: number of features
Network model;
Metrics metrics;
vector<string> features;
string className;
map<string, vector<int>> states;
Tensor dataset; // (n+1)xm tensor
std::vector<std::string> features;
std::string className;
std::map<std::string, std::vector<int>> states;
torch::Tensor dataset; // (n+1)xm tensor
status_t status = NORMAL;
void checkFitParameters();
virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override;
void checkHyperparameters(const vector<string>& validKeys, nlohmann::json& hyperparameters);
void buildDataset(torch::Tensor& y);
public:
Classifier(Network model);
virtual ~Classifier() = default;
Classifier& fit(vector<vector<int>>& X, vector<int>& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states, const torch::Tensor& weights) override;
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override;
void addNodes();
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
Tensor predict(Tensor& X) override;
torch::Tensor predict(torch::Tensor& X) override;
status_t getStatus() const override { return status; }
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
vector<string> show() const override;
vector<string> topological_order() override;
std::string getVersion() override { return "0.2.0"; };
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
std::vector<std::string> show() const override;
std::vector<std::string> topological_order() override;
void dump_cpt() const override;
void setHyperparameters(nlohmann::json& hyperparameters) override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
};
}
#endif

View File

@@ -1,7 +1,6 @@
#include "Ensemble.h"
namespace bayesnet {
using namespace torch;
Ensemble::Ensemble() : Classifier(Network()), n_models(0) {}
@@ -9,20 +8,20 @@ namespace bayesnet {
{
n_models = models.size();
for (auto i = 0; i < n_models; ++i) {
// fit with vectors
// fit with std::vectors
models[i]->fit(dataset, features, className, states);
}
}
vector<int> Ensemble::voting(Tensor& y_pred)
std::vector<int> Ensemble::voting(torch::Tensor& y_pred)
{
auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final;
std::vector<int> y_pred_final;
int numClasses = states.at(className).size();
// y_pred is m x n_models with the prediction of every model for each sample
for (int i = 0; i < y_pred.size(0); ++i) {
// votes store in each index (value of class) the significance added by each model
// i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
vector<double> votes(numClasses, 0.0);
std::vector<double> votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
votes[y_pred_[i][j]] += significanceModels.at(j);
}
@@ -32,18 +31,18 @@ namespace bayesnet {
}
return y_pred_final;
}
Tensor Ensemble::predict(Tensor& X)
torch::Tensor Ensemble::predict(torch::Tensor& X)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
throw std::logic_error("Ensemble has not been fitted");
}
Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32);
auto threads{ vector<thread>() };
mutex mtx;
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(thread([&, i]() {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict(X);
lock_guard<mutex> lock(mtx);
std::lock_guard<std::mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict);
}));
}
@@ -52,27 +51,27 @@ namespace bayesnet {
}
return torch::tensor(voting(y_pred));
}
vector<int> Ensemble::predict(vector<vector<int>>& X)
std::vector<int> Ensemble::predict(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
throw std::logic_error("Ensemble has not been fitted");
}
long m_ = X[0].size();
long n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
}
Tensor y_pred = torch::zeros({ m_, n_models }, kInt32);
torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32);
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt32));
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32));
}
return voting(y_pred);
}
float Ensemble::score(Tensor& X, Tensor& y)
float Ensemble::score(torch::Tensor& X, torch::Tensor& y)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
throw std::logic_error("Ensemble has not been fitted");
}
auto y_pred = predict(X);
int correct = 0;
@@ -83,10 +82,10 @@ namespace bayesnet {
}
return (double)correct / y_pred.size(0);
}
float Ensemble::score(vector<vector<int>>& X, vector<int>& y)
float Ensemble::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
throw std::logic_error("Ensemble has not been fitted");
}
auto y_pred = predict(X);
int correct = 0;
@@ -97,20 +96,20 @@ namespace bayesnet {
}
return (double)correct / y_pred.size();
}
vector<string> Ensemble::show() const
std::vector<std::string> Ensemble::show() const
{
auto result = vector<string>();
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->show();
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
vector<string> Ensemble::graph(const string& title) const
std::vector<std::string> Ensemble::graph(const std::string& title) const
{
auto result = vector<string>();
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->graph(title + "_" + to_string(i));
auto res = models[i]->graph(title + "_" + std::to_string(i));
result.insert(result.end(), res.begin(), res.end());
}
return result;

View File

@@ -4,34 +4,32 @@
#include "Classifier.h"
#include "BayesMetrics.h"
#include "bayesnetUtils.h"
using namespace std;
using namespace torch;
namespace bayesnet {
class Ensemble : public Classifier {
private:
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
Ensemble& build(std::vector<std::string>& features, std::string className, std::map<std::string, std::vector<int>>& states);
protected:
unsigned n_models;
vector<unique_ptr<Classifier>> models;
vector<double> significanceModels;
std::vector<std::unique_ptr<Classifier>> models;
std::vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override;
vector<int> voting(Tensor& y_pred);
std::vector<int> voting(torch::Tensor& y_pred);
public:
Ensemble();
virtual ~Ensemble() = default;
Tensor predict(Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
vector<string> show() const override;
vector<string> graph(const string& title) const override;
vector<string> topological_order() override
std::vector<std::string> show() const override;
std::vector<std::string> graph(const std::string& title) const override;
std::vector<std::string> topological_order() override
{
return vector<string>();
return std::vector<std::string>();
}
void dump_cpt() const override
{

44
src/BayesNet/FCBF.cc Normal file
View File

@@ -0,0 +1,44 @@
#include "bayesnetUtils.h"
#include "FCBF.h"
namespace bayesnet {
FCBF::FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
{
if (threshold < 1e-7) {
throw std::invalid_argument("Threshold cannot be less than 1e-7");
}
}
void FCBF::fit()
{
initialize();
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto featureOrderCopy = featureOrder;
for (const auto& feature : featureOrder) {
// Don't self compare
featureOrderCopy.erase(featureOrderCopy.begin());
if (suLabels.at(feature) == 0.0) {
// The feature has been removed from the list
continue;
}
if (suLabels.at(feature) < threshold) {
break;
}
// Remove redundant features
for (const auto& featureCopy : featureOrderCopy) {
double value = computeSuFeatures(feature, featureCopy);
if (value >= suLabels.at(featureCopy)) {
// Remove feature from list
suLabels[featureCopy] = 0.0;
}
}
selectedFeatures.push_back(feature);
selectedScores.push_back(suLabels[feature]);
if (selectedFeatures.size() == maxFeatures) {
break;
}
}
fitted = true;
}
}

17
src/BayesNet/FCBF.h Normal file
View File

@@ -0,0 +1,17 @@
#ifndef FCBF_H
#define FCBF_H
#include <torch/torch.h>
#include <vector>
#include "FeatureSelect.h"
namespace bayesnet {
class FCBF : public FeatureSelect {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
virtual ~FCBF() {};
void fit() override;
private:
double threshold = -1;
};
}
#endif

View File

@@ -0,0 +1,79 @@
#include "FeatureSelect.h"
#include <limits>
#include "bayesnetUtils.h"
namespace bayesnet {
FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
{
}
void FeatureSelect::initialize()
{
selectedFeatures.clear();
selectedScores.clear();
}
double FeatureSelect::symmetricalUncertainty(int a, int b)
{
/*
Compute symmetrical uncertainty. Normalize* information gain (mutual
information) with the entropies of the features in order to compensate
the bias due to high cardinality features. *Range [0, 1]
(https://www.sciencedirect.com/science/article/pii/S0020025519303603)
*/
auto x = samples.index({ a, "..." });
auto y = samples.index({ b, "..." });
auto mu = mutualInformation(x, y, weights);
auto hx = entropy(x, weights);
auto hy = entropy(y, weights);
return 2.0 * mu / (hx + hy);
}
void FeatureSelect::computeSuLabels()
{
// Compute Simmetrical Uncertainty between features and labels
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
for (int i = 0; i < features.size(); ++i) {
suLabels.push_back(symmetricalUncertainty(i, -1));
}
}
double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature)
{
// Compute Simmetrical Uncertainty between features
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
try {
return suFeatures.at({ firstFeature, secondFeature });
}
catch (const std::out_of_range& e) {
double result = symmetricalUncertainty(firstFeature, secondFeature);
suFeatures[{firstFeature, secondFeature}] = result;
return result;
}
}
double FeatureSelect::computeMeritCFS()
{
double result;
double rcf = 0;
for (auto feature : selectedFeatures) {
rcf += suLabels[feature];
}
double rff = 0;
int n = selectedFeatures.size();
for (const auto& item : doCombinations(selectedFeatures)) {
rff += computeSuFeatures(item.first, item.second);
}
return rcf / sqrt(n + (n * n - n) * rff);
}
std::vector<int> FeatureSelect::getFeatures() const
{
if (!fitted) {
throw std::runtime_error("FeatureSelect not fitted");
}
return selectedFeatures;
}
std::vector<double> FeatureSelect::getScores() const
{
if (!fitted) {
throw std::runtime_error("FeatureSelect not fitted");
}
return selectedScores;
}
}

View File

@@ -0,0 +1,30 @@
#ifndef FEATURE_SELECT_H
#define FEATURE_SELECT_H
#include <torch/torch.h>
#include <vector>
#include "BayesMetrics.h"
namespace bayesnet {
class FeatureSelect : public Metrics {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights);
virtual ~FeatureSelect() {};
virtual void fit() = 0;
std::vector<int> getFeatures() const;
std::vector<double> getScores() const;
protected:
void initialize();
void computeSuLabels();
double computeSuFeatures(const int a, const int b);
double symmetricalUncertainty(int a, int b);
double computeMeritCFS();
const torch::Tensor& weights;
int maxFeatures;
std::vector<int> selectedFeatures;
std::vector<double> selectedScores;
std::vector<double> suLabels;
std::map<std::pair<int, int>, double> suFeatures;
bool fitted = false;
};
}
#endif

47
src/BayesNet/IWSS.cc Normal file
View File

@@ -0,0 +1,47 @@
#include "IWSS.h"
#include <limits>
#include "bayesnetUtils.h"
namespace bayesnet {
IWSS::IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
{
if (threshold < 0 || threshold > .5) {
throw std::invalid_argument("Threshold has to be in [0, 0.5]");
}
}
void IWSS::fit()
{
initialize();
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto featureOrderCopy = featureOrder;
// Add first and second features to result
// First with its own score
auto first_feature = pop_first(featureOrderCopy);
selectedFeatures.push_back(first_feature);
selectedScores.push_back(suLabels.at(first_feature));
// Second with the score of the candidates
selectedFeatures.push_back(pop_first(featureOrderCopy));
auto merit = computeMeritCFS();
selectedScores.push_back(merit);
for (const auto feature : featureOrderCopy) {
selectedFeatures.push_back(feature);
// Compute merit with selectedFeatures
auto meritNew = computeMeritCFS();
double delta = merit != 0.0 ? abs(merit - meritNew) / merit : 0.0;
if (meritNew > merit || delta < threshold) {
if (meritNew > merit) {
merit = meritNew;
}
selectedScores.push_back(meritNew);
} else {
selectedFeatures.pop_back();
break;
}
if (selectedFeatures.size() == maxFeatures) {
break;
}
}
fitted = true;
}
}

17
src/BayesNet/IWSS.h Normal file
View File

@@ -0,0 +1,17 @@
#ifndef IWSS_H
#define IWSS_H
#include <torch/torch.h>
#include <vector>
#include "FeatureSelect.h"
namespace bayesnet {
class IWSS : public FeatureSelect {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
virtual ~IWSS() {};
void fit() override;
private:
double threshold = -1;
};
}
#endif

View File

@@ -1,14 +1,13 @@
#include "KDB.h"
namespace bayesnet {
using namespace torch;
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::setHyperparameters(nlohmann::json& hyperparameters)
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta)
{
validHyperparameters = { "k", "theta" };
}
void KDB::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const vector<string> validKeys = { "k", "theta" };
checkHyperparameters(validKeys, hyperparameters);
if (hyperparameters.contains("k")) {
k = hyperparameters["k"];
}
@@ -40,16 +39,16 @@ namespace bayesnet {
// 1. For each feature Xi, compute mutual information, I(X;C),
// where C is the class.
addNodes();
const Tensor& y = dataset.index({ -1, "..." });
vector<double> mi;
const torch::Tensor& y = dataset.index({ -1, "..." });
std::vector<double> mi;
for (auto i = 0; i < features.size(); i++) {
Tensor firstFeature = dataset.index({ i, "..." });
torch::Tensor firstFeature = dataset.index({ i, "..." });
mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
}
// 2. Compute class conditional mutual information I(Xi;XjIC), f or each
auto conditionalEdgeWeights = metrics.conditionalEdge(weights);
// 3. Let the used variable list, S, be empty.
vector<int> S;
std::vector<int> S;
// 4. Let the DAG network being constructed, BN, begin with a single
// class node, C.
// 5. Repeat until S includes all domain features
@@ -67,9 +66,9 @@ namespace bayesnet {
S.push_back(idx);
}
}
void KDB::add_m_edges(int idx, vector<int>& S, Tensor& weights)
void KDB::add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights)
{
auto n_edges = min(k, static_cast<int>(S.size()));
auto n_edges = std::min(k, static_cast<int>(S.size()));
auto cond_w = clone(weights);
bool exit_cond = k == 0;
int num = 0;
@@ -81,7 +80,7 @@ namespace bayesnet {
model.addEdge(features[max_minfo], features[idx]);
num++;
}
catch (const invalid_argument& e) {
catch (const std::invalid_argument& e) {
// Loops are not allowed
}
}
@@ -91,11 +90,11 @@ namespace bayesnet {
exit_cond = num == n_edges || candidates.size(0) == 0;
}
}
vector<string> KDB::graph(const string& title) const
std::vector<std::string> KDB::graph(const std::string& title) const
{
string header{ title };
std::string header{ title };
if (title == "KDB") {
header += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")";
header += " (k=" + std::to_string(k) + ", theta=" + std::to_string(theta) + ")";
}
return model.graph(header);
}

View File

@@ -4,20 +4,18 @@
#include "Classifier.h"
#include "bayesnetUtils.h"
namespace bayesnet {
using namespace std;
using namespace torch;
class KDB : public Classifier {
private:
int k;
float theta;
void add_m_edges(int idx, vector<int>& S, Tensor& weights);
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
protected:
void buildModel(const torch::Tensor& weights) override;
public:
explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {};
void setHyperparameters(nlohmann::json& hyperparameters) override;
vector<string> graph(const string& name = "KDB") const override;
virtual ~KDB() = default;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override;
};
}
#endif

View File

@@ -1,16 +1,15 @@
#include "KDBLd.h"
namespace bayesnet {
using namespace std;
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
@@ -18,12 +17,12 @@ namespace bayesnet {
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor KDBLd::predict(Tensor& X)
torch::Tensor KDBLd::predict(torch::Tensor& X)
{
auto Xt = prepareX(X);
return KDB::predict(Xt);
}
vector<string> KDBLd::graph(const string& name) const
std::vector<std::string> KDBLd::graph(const std::string& name) const
{
return KDB::graph(name);
}

View File

@@ -4,16 +4,15 @@
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class KDBLd : public KDB, public Proposal {
private:
public:
explicit KDBLd(int k);
virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override;
torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };
};
}
#endif // !KDBLD_H

View File

@@ -1,13 +1,13 @@
#include "Mst.h"
#include <vector>
#include <list>
/*
Based on the code from https://www.softwaretestinghelp.com/minimum-spanning-tree-tutorial/
*/
namespace bayesnet {
using namespace std;
Graph::Graph(int V) : V(V), parent(vector<int>(V))
Graph::Graph(int V) : V(V), parent(std::vector<int>(V))
{
for (int i = 0; i < V; i++)
parent[i] = i;
@@ -34,36 +34,45 @@ namespace bayesnet {
void Graph::kruskal_algorithm()
{
// sort the edges ordered on decreasing weight
sort(G.begin(), G.end(), [](const auto& left, const auto& right) {return left.first > right.first;});
stable_sort(G.begin(), G.end(), [](const auto& left, const auto& right) {return left.first > right.first;});
for (int i = 0; i < G.size(); i++) {
int uSt, vEd;
uSt = find_set(G[i].second.first);
vEd = find_set(G[i].second.second);
if (uSt != vEd) {
T.push_back(G[i]); // add to mst vector
T.push_back(G[i]); // add to mst std::vector
union_set(uSt, vEd);
}
}
}
void Graph::display_mst()
{
cout << "Edge :" << " Weight" << endl;
std::cout << "Edge :" << " Weight" << std::endl;
for (int i = 0; i < T.size(); i++) {
cout << T[i].second.first << " - " << T[i].second.second << " : "
std::cout << T[i].second.first << " - " << T[i].second.second << " : "
<< T[i].first;
cout << endl;
std::cout << std::endl;
}
}
vector<pair<int, int>> reorder(vector<pair<float, pair<int, int>>> T, int root_original)
void insertElement(std::list<int>& variables, int variable)
{
auto result = vector<pair<int, int>>();
auto visited = vector<int>();
auto nextVariables = unordered_set<int>();
nextVariables.emplace(root_original);
if (std::find(variables.begin(), variables.end(), variable) == variables.end()) {
variables.push_front(variable);
}
}
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
{
// Create the edges of a DAG from the MST
// replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted
auto result = std::vector<std::pair<int, int>>();
auto visited = std::vector<int>();
auto nextVariables = std::list<int>();
nextVariables.push_front(root_original);
while (nextVariables.size() > 0) {
int root = *nextVariables.begin();
nextVariables.erase(nextVariables.begin());
int root = nextVariables.front();
nextVariables.pop_front();
for (int i = 0; i < T.size(); ++i) {
auto [weight, edge] = T[i];
auto [from, to] = edge;
@@ -71,10 +80,10 @@ namespace bayesnet {
visited.insert(visited.begin(), i);
if (from == root) {
result.push_back({ from, to });
nextVariables.emplace(to);
insertElement(nextVariables, to);
} else {
result.push_back({ to, from });
nextVariables.emplace(from);
insertElement(nextVariables, from);
}
}
}
@@ -94,12 +103,11 @@ namespace bayesnet {
return result;
}
MST::MST(const vector<string>& features, const Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
vector<pair<int, int>> MST::maximumSpanningTree()
MST::MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
std::vector<std::pair<int, int>> MST::maximumSpanningTree()
{
auto num_features = features.size();
Graph g(num_features);
// Make a complete graph
for (int i = 0; i < num_features - 1; ++i) {
for (int j = i + 1; j < num_features; ++j) {

View File

@@ -4,24 +4,22 @@
#include <vector>
#include <string>
namespace bayesnet {
using namespace std;
using namespace torch;
class MST {
private:
Tensor weights;
vector<string> features;
torch::Tensor weights;
std::vector<std::string> features;
int root = 0;
public:
MST() = default;
MST(const vector<string>& features, const Tensor& weights, const int root);
vector<pair<int, int>> maximumSpanningTree();
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
std::vector<std::pair<int, int>> maximumSpanningTree();
};
class Graph {
private:
int V; // number of nodes in graph
vector <pair<float, pair<int, int>>> G; // vector for graph
vector <pair<float, pair<int, int>>> T; // vector for mst
vector<int> parent;
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
std::vector<int> parent;
public:
explicit Graph(int V);
void addEdge(int u, int v, float wt);
@@ -29,7 +27,7 @@ namespace bayesnet {
void union_set(int u, int v);
void kruskal_algorithm();
void display_mst();
vector <pair<float, pair<int, int>>> get_mst() { return T; }
std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; }
};
}
#endif

View File

@@ -3,18 +3,18 @@
#include "Network.h"
#include "bayesnetUtils.h"
namespace bayesnet {
Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
Network::Network() : features(std::vector<std::string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
Network::Network(float maxT) : features(std::vector<std::string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
getmaxThreads()), fitted(other.fitted)
{
for (const auto& pair : other.nodes) {
nodes[pair.first] = std::make_unique<Node>(*pair.second);
for (const auto& node : other.nodes) {
nodes[node.first] = std::make_unique<Node>(*node.second);
}
}
void Network::initialize()
{
features = vector<string>();
features = std::vector<std::string>();
className = "";
classNumStates = 0;
fitted = false;
@@ -29,10 +29,10 @@ namespace bayesnet {
{
return samples;
}
void Network::addNode(const string& name)
void Network::addNode(const std::string& name)
{
if (name == "") {
throw invalid_argument("Node name cannot be empty");
throw std::invalid_argument("Node name cannot be empty");
}
if (nodes.find(name) != nodes.end()) {
return;
@@ -42,7 +42,7 @@ namespace bayesnet {
}
nodes[name] = std::make_unique<Node>(name);
}
vector<string> Network::getFeatures() const
std::vector<std::string> Network::getFeatures() const
{
return features;
}
@@ -58,11 +58,11 @@ namespace bayesnet {
}
return result;
}
string Network::getClassName() const
std::string Network::getClassName() const
{
return className;
}
bool Network::isCyclic(const string& nodeId, unordered_set<string>& visited, unordered_set<string>& recStack)
bool Network::isCyclic(const std::string& nodeId, std::unordered_set<std::string>& visited, std::unordered_set<std::string>& recStack)
{
if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet
{
@@ -78,78 +78,78 @@ namespace bayesnet {
recStack.erase(nodeId); // remove node from recursion stack before function ends
return false;
}
void Network::addEdge(const string& parent, const string& child)
void Network::addEdge(const std::string& parent, const std::string& child)
{
if (nodes.find(parent) == nodes.end()) {
throw invalid_argument("Parent node " + parent + " does not exist");
throw std::invalid_argument("Parent node " + parent + " does not exist");
}
if (nodes.find(child) == nodes.end()) {
throw invalid_argument("Child node " + child + " does not exist");
throw std::invalid_argument("Child node " + child + " does not exist");
}
// Temporarily add edge to check for cycles
nodes[parent]->addChild(nodes[child].get());
nodes[child]->addParent(nodes[parent].get());
unordered_set<string> visited;
unordered_set<string> recStack;
std::unordered_set<std::string> visited;
std::unordered_set<std::string> recStack;
if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
{
// remove problematic edge
nodes[parent]->removeChild(nodes[child].get());
nodes[child]->removeParent(nodes[parent].get());
throw invalid_argument("Adding this edge forms a cycle in the graph.");
throw std::invalid_argument("Adding this edge forms a cycle in the graph.");
}
}
map<string, std::unique_ptr<Node>>& Network::getNodes()
std::map<std::string, std::unique_ptr<Node>>& Network::getNodes()
{
return nodes;
}
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights)
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
if (weights.size(0) != n_samples) {
throw invalid_argument("Weights (" + to_string(weights.size(0)) + ") must have the same number of elements as samples (" + to_string(n_samples) + ") in Network::fit");
throw std::invalid_argument("Weights (" + std::to_string(weights.size(0)) + ") must have the same number of elements as samples (" + std::to_string(n_samples) + ") in Network::fit");
}
if (n_samples != n_samples_y) {
throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")");
throw std::invalid_argument("X and y must have the same number of samples in Network::fit (" + std::to_string(n_samples) + " != " + std::to_string(n_samples_y) + ")");
}
if (n_features != featureNames.size()) {
throw invalid_argument("X and features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(featureNames.size()) + ")");
throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")");
}
if (n_features != features.size() - 1) {
throw invalid_argument("X and local features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(features.size() - 1) + ")");
throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")");
}
if (find(features.begin(), features.end(), className) == features.end()) {
throw invalid_argument("className not found in Network::features");
throw std::invalid_argument("className not found in Network::features");
}
for (auto& feature : featureNames) {
if (find(features.begin(), features.end(), feature) == features.end()) {
throw invalid_argument("Feature " + feature + " not found in Network::features");
throw std::invalid_argument("Feature " + feature + " not found in Network::features");
}
if (states.find(feature) == states.end()) {
throw invalid_argument("Feature " + feature + " not found in states");
throw std::invalid_argument("Feature " + feature + " not found in states");
}
}
}
void Network::setStates(const map<string, vector<int>>& states)
void Network::setStates(const std::map<std::string, std::vector<int>>& states)
{
// Set states to every Node in the network
for_each(features.begin(), features.end(), [this, &states](const string& feature) {
for_each(features.begin(), features.end(), [this, &states](const std::string& feature) {
nodes.at(feature)->setNumStates(states.at(feature).size());
});
classNumStates = nodes.at(className)->getNumStates();
}
// X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className;
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
torch::Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X , ytmp }, 0);
for (int i = 0; i < featureNames.size(); ++i) {
auto row_feature = X.index({ i, "..." });
}
completeFit(states, weights);
}
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className;
@@ -157,7 +157,7 @@ namespace bayesnet {
completeFit(states, weights);
}
// input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights_, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states)
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
@@ -170,11 +170,11 @@ namespace bayesnet {
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(states, weights);
}
void Network::completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights)
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
vector<thread> threads;
std::vector<std::thread> threads;
for (auto& node : nodes) {
threads.emplace_back([this, &node, &weights]() {
node.second->computeCPT(samples, features, laplaceSmoothing, weights);
@@ -188,12 +188,12 @@ namespace bayesnet {
torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict()");
throw std::logic_error("You must call fit() before calling predict()");
}
torch::Tensor result;
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
for (int i = 0; i < samples.size(1); ++i) {
const Tensor sample = samples.index({ "...", i });
const torch::Tensor sample = samples.index({ "...", i });
auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64);
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
@@ -201,36 +201,35 @@ namespace bayesnet {
}
if (proba)
return result;
else
return result.argmax(1);
}
// Return mxn tensor of probabilities
Tensor Network::predict_proba(const Tensor& samples)
torch::Tensor Network::predict_proba(const torch::Tensor& samples)
{
return predict_tensor(samples, true);
}
// Return mxn tensor of probabilities
Tensor Network::predict(const Tensor& samples)
torch::Tensor Network::predict(const torch::Tensor& samples)
{
return predict_tensor(samples, false);
}
// Return mx1 vector of predictions
// tsamples is nxm vector of samples
vector<int> Network::predict(const vector<vector<int>>& tsamples)
// Return mx1 std::vector of predictions
// tsamples is nxm std::vector of samples
std::vector<int> Network::predict(const std::vector<std::vector<int>>& tsamples)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict()");
throw std::logic_error("You must call fit() before calling predict()");
}
vector<int> predictions;
vector<int> sample;
std::vector<int> predictions;
std::vector<int> sample;
for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear();
for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]);
}
vector<double> classProbabilities = predict_sample(sample);
std::vector<double> classProbabilities = predict_sample(sample);
// Find the class with the maximum posterior probability
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
int predictedClass = distance(classProbabilities.begin(), maxElem);
@@ -238,14 +237,14 @@ namespace bayesnet {
}
return predictions;
}
// Return mxn vector of probabilities
vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
// Return mxn std::vector of probabilities
std::vector<std::vector<double>> Network::predict_proba(const std::vector<std::vector<int>>& tsamples)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict_proba()");
throw std::logic_error("You must call fit() before calling predict_proba()");
}
vector<vector<double>> predictions;
vector<int> sample;
std::vector<std::vector<double>> predictions;
std::vector<int> sample;
for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear();
for (int col = 0; col < tsamples.size(); ++col) {
@@ -255,9 +254,9 @@ namespace bayesnet {
}
return predictions;
}
double Network::score(const vector<vector<int>>& tsamples, const vector<int>& labels)
double Network::score(const std::vector<std::vector<int>>& tsamples, const std::vector<int>& labels)
{
vector<int> y_pred = predict(tsamples);
std::vector<int> y_pred = predict(tsamples);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == labels[i]) {
@@ -266,35 +265,35 @@ namespace bayesnet {
}
return (double)correct / y_pred.size();
}
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const vector<int>& sample)
// Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const std::vector<int>& sample)
{
// Ensure the sample size is equal to the number of features
if (sample.size() != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size()) +
") does not match the number of features (" + to_string(features.size() - 1) + ")");
throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
map<string, int> evidence;
std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(); ++i) {
evidence[features[i]] = sample[i];
}
return exactInference(evidence);
}
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const Tensor& sample)
// Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const torch::Tensor& sample)
{
// Ensure the sample size is equal to the number of features
if (sample.size(0) != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size(0)) +
") does not match the number of features (" + to_string(features.size() - 1) + ")");
throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
map<string, int> evidence;
std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(0); ++i) {
evidence[features[i]] = sample[i].item<int>();
}
return exactInference(evidence);
}
double Network::computeFactor(map<string, int>& completeEvidence)
double Network::computeFactor(std::map<std::string, int>& completeEvidence)
{
double result = 1.0;
for (auto& node : getNodes()) {
@@ -302,17 +301,17 @@ namespace bayesnet {
}
return result;
}
vector<double> Network::exactInference(map<string, int>& evidence)
std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
{
vector<double> result(classNumStates, 0.0);
vector<thread> threads;
mutex mtx;
std::vector<double> result(classNumStates, 0.0);
std::vector<std::thread> threads;
std::mutex mtx;
for (int i = 0; i < classNumStates; ++i) {
threads.emplace_back([this, &result, &evidence, i, &mtx]() {
auto completeEvidence = map<string, int>(evidence);
auto completeEvidence = std::map<std::string, int>(evidence);
completeEvidence[getClassName()] = i;
double factor = computeFactor(completeEvidence);
lock_guard<mutex> lock(mtx);
std::lock_guard<std::mutex> lock(mtx);
result[i] = factor;
});
}
@@ -324,12 +323,12 @@ namespace bayesnet {
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result;
}
vector<string> Network::show() const
std::vector<std::string> Network::show() const
{
vector<string> result;
std::vector<std::string> result;
// Draw the network
for (auto& node : nodes) {
string line = node.first + " -> ";
std::string line = node.first + " -> ";
for (auto child : node.second->getChildren()) {
line += child->getName() + ", ";
}
@@ -337,12 +336,12 @@ namespace bayesnet {
}
return result;
}
vector<string> Network::graph(const string& title) const
std::vector<std::string> Network::graph(const std::string& title) const
{
auto output = vector<string>();
auto output = std::vector<std::string>();
auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n";
string header = prefix + title + suffix;
std::string header = prefix + title + suffix;
output.push_back(header);
for (auto& node : nodes) {
auto result = node.second->graph(className);
@@ -351,9 +350,9 @@ namespace bayesnet {
output.push_back("}\n");
return output;
}
vector<pair<string, string>> Network::getEdges() const
std::vector<std::pair<std::string, std::string>> Network::getEdges() const
{
auto edges = vector<pair<string, string>>();
auto edges = std::vector<std::pair<std::string, std::string>>();
for (const auto& node : nodes) {
auto head = node.first;
for (const auto& child : node.second->getChildren()) {
@@ -367,7 +366,7 @@ namespace bayesnet {
{
return getEdges().size();
}
vector<string> Network::topological_sort()
std::vector<std::string> Network::topological_sort()
{
/* Check if al the fathers of every node are before the node */
auto result = features;
@@ -394,10 +393,10 @@ namespace bayesnet {
ending = false;
}
} else {
throw logic_error("Error in topological sort because of node " + feature + " is not in result");
throw std::logic_error("Error in topological sort because of node " + feature + " is not in result");
}
} else {
throw logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
throw std::logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
}
}
}
@@ -407,8 +406,8 @@ namespace bayesnet {
void Network::dump_cpt() const
{
for (auto& node : nodes) {
cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl;
cout << node.second->getCPT() << endl;
std::cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
std::cout << node.second->getCPT() << std::endl;
}
}
}

View File

@@ -7,22 +7,22 @@
namespace bayesnet {
class Network {
private:
map<string, unique_ptr<Node>> nodes;
std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted;
float maxThreads = 0.95;
int classNumStates;
vector<string> features; // Including classname
string className;
std::vector<std::string> features; // Including classname
std::string className;
double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
vector<double> predict_sample(const vector<int>&);
vector<double> predict_sample(const torch::Tensor&);
vector<double> exactInference(map<string, int>&);
double computeFactor(map<string, int>&);
void completeFit(const map<string, vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states, const torch::Tensor& weights);
void setStates(const map<string, vector<int>>&);
std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&);
std::vector<double> exactInference(std::map<std::string, int>&);
double computeFactor(std::map<std::string, int>&);
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const std::map<std::string, std::vector<int>>&);
public:
Network();
explicit Network(float);
@@ -30,30 +30,33 @@ namespace bayesnet {
~Network() = default;
torch::Tensor& getSamples();
float getmaxThreads();
void addNode(const string&);
void addEdge(const string&, const string&);
map<string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures() const;
void addNode(const std::string&);
void addEdge(const std::string&, const std::string&);
std::map<std::string, std::unique_ptr<Node>>& getNodes();
std::vector<std::string> getFeatures() const;
int getStates() const;
vector<pair<string, string>> getEdges() const;
std::vector<std::pair<std::string, std::string>> getEdges() const;
int getNumEdges() const;
int getClassNumStates() const;
string getClassName() const;
void fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<float>& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const vector<string>& featureNames, const string& className, const map<string, vector<int>>& states);
vector<int> predict(const vector<vector<int>>&); // Return mx1 vector of predictions
std::string getClassName() const;
/*
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
*/
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
vector<vector<double>> predict_proba(const vector<vector<int>>&); // Return mxn vector of probabilities
std::vector<std::vector<double>> predict_proba(const std::vector<std::vector<int>>&); // Return mxn std::vector of probabilities
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const vector<vector<int>>&, const vector<int>&);
vector<string> topological_sort();
vector<string> show() const;
vector<string> graph(const string& title) const; // Returns a vector of strings representing the graph in graphviz format
double score(const std::vector<std::vector<int>>&, const std::vector<int>&);
std::vector<std::string> topological_sort();
std::vector<std::string> show() const;
std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
void initialize();
void dump_cpt() const;
inline string version() { return "0.2.0"; }
inline std::string version() { return "0.2.0"; }
};
}
#endif

View File

@@ -3,7 +3,7 @@
namespace bayesnet {
Node::Node(const std::string& name)
: name(name), numStates(0), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
: name(name), numStates(0), cpTable(torch::Tensor()), parents(std::vector<Node*>()), children(std::vector<Node*>())
{
}
void Node::clear()
@@ -14,7 +14,7 @@ namespace bayesnet {
dimensions.clear();
numStates = 0;
}
string Node::getName() const
std::string Node::getName() const
{
return name;
}
@@ -34,11 +34,11 @@ namespace bayesnet {
{
children.push_back(child);
}
vector<Node*>& Node::getParents()
std::vector<Node*>& Node::getParents()
{
return parents;
}
vector<Node*>& Node::getChildren()
std::vector<Node*>& Node::getChildren()
{
return children;
}
@@ -63,28 +63,28 @@ namespace bayesnet {
*/
unsigned Node::minFill()
{
unordered_set<string> neighbors;
std::unordered_set<std::string> neighbors;
for (auto child : children) {
neighbors.emplace(child->getName());
}
for (auto parent : parents) {
neighbors.emplace(parent->getName());
}
auto source = vector<string>(neighbors.begin(), neighbors.end());
auto source = std::vector<std::string>(neighbors.begin(), neighbors.end());
return combinations(source).size();
}
vector<pair<string, string>> Node::combinations(const vector<string>& source)
std::vector<std::pair<std::string, std::string>> Node::combinations(const std::vector<std::string>& source)
{
vector<pair<string, string>> result;
std::vector<std::pair<std::string, std::string>> result;
for (int i = 0; i < source.size(); ++i) {
string temp = source[i];
std::string temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
}
}
return result;
}
void Node::computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
{
dimensions.clear();
// Get dimensions of the CPT
@@ -96,7 +96,7 @@ namespace bayesnet {
// Fill table with counts
auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) {
throw logic_error("Feature " + name + " not found in dataset");
throw std::logic_error("Feature " + name + " not found in dataset");
}
int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
@@ -105,7 +105,7 @@ namespace bayesnet {
for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName());
if (pos == features.end()) {
throw logic_error("Feature parent " + parent->getName() + " not found in dataset");
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
}
int parent_index = pos - features.begin();
coordinates.push_back(dataset.index({ parent_index, n_sample }));
@@ -116,17 +116,17 @@ namespace bayesnet {
// Normalize the counts
cpTable = cpTable / cpTable.sum(0);
}
float Node::getFactorValue(map<string, int>& evidence)
float Node::getFactorValue(std::map<std::string, int>& evidence)
{
c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<float>();
}
vector<string> Node::graph(const string& className)
std::vector<std::string> Node::graph(const std::string& className)
{
auto output = vector<string>();
auto output = std::vector<std::string>();
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
output.push_back(name + " [shape=circle" + suffix + "] \n");
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); });

View File

@@ -5,33 +5,32 @@
#include <vector>
#include <string>
namespace bayesnet {
using namespace std;
class Node {
private:
string name;
vector<Node*> parents;
vector<Node*> children;
std::string name;
std::vector<Node*> parents;
std::vector<Node*> children;
int numStates; // number of states of the variable
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
vector<int64_t> dimensions; // dimensions of the cpTable
std::vector<int64_t> dimensions; // dimensions of the cpTable
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
public:
vector<pair<string, string>> combinations(const vector<string>&);
explicit Node(const string&);
explicit Node(const std::string&);
void clear();
void addParent(Node*);
void addChild(Node*);
void removeParent(Node*);
void removeChild(Node*);
string getName() const;
vector<Node*>& getParents();
vector<Node*>& getChildren();
std::string getName() const;
std::vector<Node*>& getParents();
std::vector<Node*>& getChildren();
torch::Tensor& getCPT();
void computeCPT(const torch::Tensor& dataset, const vector<string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
int getNumStates() const;
void setNumStates(int);
unsigned minFill();
vector<string> graph(const string& clasName); // Returns a vector of strings representing the graph in graphviz format
float getFactorValue(map<string, int>&);
std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
float getFactorValue(std::map<std::string, int>&);
};
}
#endif

View File

@@ -2,7 +2,7 @@
#include "ArffFiles.h"
namespace bayesnet {
Proposal::Proposal(torch::Tensor& dataset_, vector<string>& features_, string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
Proposal::~Proposal()
{
for (auto& [key, value] : discretizers) {
@@ -18,14 +18,14 @@ namespace bayesnet {
throw std::invalid_argument("y must be an integer tensor");
}
}
map<string, vector<int>> Proposal::localDiscretizationProposal(const map<string, vector<int>>& oldStates, Network& model)
map<std::string, std::vector<int>> Proposal::localDiscretizationProposal(const map<std::string, std::vector<int>>& oldStates, Network& model)
{
// order of local discretization is important. no good 0, 1, 2...
// although we rediscretize features after the local discretization of every feature
auto order = model.topological_sort();
auto& nodes = model.getNodes();
map<string, vector<int>> states = oldStates;
vector<int> indicesToReDiscretize;
map<std::string, std::vector<int>> states = oldStates;
std::vector<int> indicesToReDiscretize;
bool upgrade = false; // Flag to check if we need to upgrade the model
for (auto feature : order) {
auto nodeParents = nodes[feature]->getParents();
@@ -33,16 +33,16 @@ namespace bayesnet {
upgrade = true;
int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin();
indicesToReDiscretize.push_back(index); // We need to re-discretize this feature
vector<string> parents;
std::vector<std::string> parents;
transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); });
// Remove class as parent as it will be added later
parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end());
// Get the indices of the parents
vector<int> indices;
std::vector<int> indices;
indices.push_back(-1); // Add class index
transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); });
// Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y)
vector<string> yJoinParents(Xf.size(1));
std::vector<std::string> yJoinParents(Xf.size(1));
for (auto idx : indices) {
for (int i = 0; i < Xf.size(1); ++i) {
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
@@ -51,16 +51,16 @@ namespace bayesnet {
auto arff = ArffFiles();
auto yxv = arff.factorize(yJoinParents);
auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
auto xvf = vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
discretizers[feature]->fit(xvf, yxv);
}
if (upgrade) {
// Discretize again X (only the affected indices) with the new fitted discretizers
for (auto index : indicesToReDiscretize) {
auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt)));
auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
auto xStates = std::vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
//Update new states of the feature/node
states[pFeatures[index]] = xStates;
@@ -70,28 +70,28 @@ namespace bayesnet {
}
return states;
}
map<string, vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
{
// Discretize the continuous input data and build pDataset (Classifier::dataset)
int m = Xf.size(1);
int n = Xf.size(0);
map<string, vector<int>> states;
pDataset = torch::zeros({ n + 1, m }, kInt32);
auto yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
map<std::string, std::vector<int>> states;
pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// discretize input data by feature(row)
for (auto i = 0; i < pFeatures.size(); ++i) {
auto* discretizer = new mdlp::CPPFImdlp();
auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
discretizer->fit(Xt, yv);
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
auto xStates = vector<int>(discretizer->getCutPoints().size() + 1);
auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
states[pFeatures[i]] = xStates;
discretizers[pFeatures[i]] = discretizer;
}
int n_classes = torch::max(y).item<int>() + 1;
auto yStates = vector<int>(n_classes);
auto yStates = std::vector<int>(n_classes);
iota(yStates.begin(), yStates.end(), 0);
states[pClassName] = yStates;
pDataset.index_put_({ n, "..." }, y);
@@ -101,7 +101,7 @@ namespace bayesnet {
{
auto Xtd = torch::zeros_like(X, torch::kInt32);
for (int i = 0; i < X.size(0); ++i) {
auto Xt = vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xt = std::vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xd = discretizers[pFeatures[i]]->transform(Xt);
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
}

View File

@@ -10,20 +10,20 @@
namespace bayesnet {
class Proposal {
public:
Proposal(torch::Tensor& pDataset, vector<string>& features_, string& className_);
Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_);
virtual ~Proposal();
protected:
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
torch::Tensor prepareX(torch::Tensor& X);
map<string, vector<int>> localDiscretizationProposal(const map<string, vector<int>>& states, Network& model);
map<string, vector<int>> fit_local_discretization(const torch::Tensor& y);
map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
torch::Tensor Xf; // X continuous nxm tensor
torch::Tensor y; // y discrete nx1 tensor
map<string, mdlp::CPPFImdlp*> discretizers;
map<std::string, mdlp::CPPFImdlp*> discretizers;
private:
torch::Tensor& pDataset; // (n+1)xm tensor
vector<string>& pFeatures;
string& pClassName;
std::vector<std::string>& pFeatures;
std::string& pClassName;
};
}

View File

@@ -17,7 +17,7 @@ namespace bayesnet {
}
}
}
vector<string> SPODE::graph(const string& name) const
std::vector<std::string> SPODE::graph(const std::string& name) const
{
return model.graph(name);
}

View File

@@ -10,8 +10,8 @@ namespace bayesnet {
void buildModel(const torch::Tensor& weights) override;
public:
explicit SPODE(int root);
virtual ~SPODE() {};
vector<string> graph(const string& name = "SPODE") const override;
virtual ~SPODE() = default;
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
};
}
#endif

View File

@@ -1,16 +1,15 @@
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
@@ -18,7 +17,7 @@ namespace bayesnet {
states = localDiscretizationProposal(states, model);
return *this;
}
SPODELd& SPODELd::fit(torch::Tensor& dataset, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
if (!torch::is_floating_point(dataset)) {
throw std::runtime_error("Dataset must be a floating point tensor");
@@ -27,7 +26,7 @@ namespace bayesnet {
y = dataset.index({ -1, "..." }).clone();
features = features_;
className = className_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
@@ -36,12 +35,12 @@ namespace bayesnet {
return *this;
}
Tensor SPODELd::predict(Tensor& X)
torch::Tensor SPODELd::predict(torch::Tensor& X)
{
auto Xt = prepareX(X);
return SPODE::predict(Xt);
}
vector<string> SPODELd::graph(const string& name) const
std::vector<std::string> SPODELd::graph(const std::string& name) const
{
return SPODE::graph(name);
}

View File

@@ -4,16 +4,15 @@
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class SPODELd : public SPODE, public Proposal {
public:
explicit SPODELd(int root);
virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
SPODELd& fit(torch::Tensor& dataset, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };
};
}
#endif // !SPODELD_H

View File

@@ -1,8 +1,6 @@
#include "TAN.h"
namespace bayesnet {
using namespace torch;
TAN::TAN() : Classifier(Network()) {}
void TAN::buildModel(const torch::Tensor& weights)
@@ -11,10 +9,10 @@ namespace bayesnet {
addNodes();
// 1. Compute mutual information between each feature and the class and set the root node
// as the highest mutual information with the class
auto mi = vector <pair<int, float >>();
Tensor class_dataset = dataset.index({ -1, "..." });
auto mi = std::vector <std::pair<int, float >>();
torch::Tensor class_dataset = dataset.index({ -1, "..." });
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
Tensor feature_dataset = dataset.index({ i, "..." });
torch::Tensor feature_dataset = dataset.index({ i, "..." });
auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights);
mi.push_back({ i, mi_value });
}
@@ -34,7 +32,7 @@ namespace bayesnet {
model.addEdge(className, feature);
}
}
vector<string> TAN::graph(const string& title) const
std::vector<std::string> TAN::graph(const std::string& title) const
{
return model.graph(title);
}

View File

@@ -2,15 +2,14 @@
#define TAN_H
#include "Classifier.h"
namespace bayesnet {
using namespace std;
class TAN : public Classifier {
private:
protected:
void buildModel(const torch::Tensor& weights) override;
public:
TAN();
virtual ~TAN() {};
vector<string> graph(const string& name = "TAN") const override;
virtual ~TAN() = default;
std::vector<std::string> graph(const std::string& name = "TAN") const override;
};
}
#endif

View File

@@ -1,16 +1,15 @@
#include "TANLd.h"
namespace bayesnet {
using namespace std;
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const vector<string>& features_, const string& className_, map<string, vector<int>>& states_)
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
@@ -19,12 +18,12 @@ namespace bayesnet {
return *this;
}
Tensor TANLd::predict(Tensor& X)
torch::Tensor TANLd::predict(torch::Tensor& X)
{
auto Xt = prepareX(X);
return TAN::predict(Xt);
}
vector<string> TANLd::graph(const string& name) const
std::vector<std::string> TANLd::graph(const std::string& name) const
{
return TAN::graph(name);
}

View File

@@ -4,16 +4,15 @@
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class TANLd : public TAN, public Proposal {
private:
public:
TANLd();
virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& features, const string& className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") const override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
std::vector<std::string> graph(const std::string& name = "TAN") const override;
torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };
};
}
#endif // !TANLD_H

View File

@@ -1,25 +1,23 @@
#include "bayesnetUtils.h"
namespace bayesnet {
using namespace std;
using namespace torch;
// Return the indices in descending order
vector<int> argsort(vector<double>& nums)
std::vector<int> argsort(std::vector<double>& nums)
{
int n = nums.size();
vector<int> indices(n);
std::vector<int> indices(n);
iota(indices.begin(), indices.end(), 0);
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
return indices;
}
vector<vector<int>> tensorToVector(Tensor& tensor)
std::vector<std::vector<int>> tensorToVector(torch::Tensor& tensor)
{
// convert mxn tensor to nxm vector
vector<vector<int>> result;
// convert mxn tensor to nxm std::vector
std::vector<std::vector<int>> result;
// Iterate over cols
for (int i = 0; i < tensor.size(1); ++i) {
auto col_tensor = tensor.index({ "...", i });
auto col = vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
auto col = std::vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
result.push_back(col);
}
return result;

View File

@@ -3,9 +3,7 @@
#include <torch/torch.h>
#include <vector>
namespace bayesnet {
using namespace std;
using namespace torch;
vector<int> argsort(vector<double>& nums);
vector<vector<int>> tensorToVector(Tensor& tensor);
std::vector<int> argsort(std::vector<double>& nums);
std::vector<std::vector<int>> tensorToVector(torch::Tensor& tensor);
}
#endif //BAYESNET_UTILS_H

View File

@@ -1,10 +0,0 @@
#ifndef BESTRESULT_H
#define BESTRESULT_H
#include <string>
class BestResult {
public:
static std::string title() { return "STree_default (linear-ovo)"; }
static double score() { return 22.109799; }
static std::string scoreName() { return "accuracy"; }
};
#endif

343
src/Platform/BestResults.cc Normal file
View File

@@ -0,0 +1,343 @@
#include <filesystem>
#include <set>
#include <fstream>
#include <iostream>
#include <sstream>
#include <algorithm>
#include "BestResults.h"
#include "Result.h"
#include "Colors.h"
#include "Statistics.h"
#include "BestResultsExcel.h"
#include "CLocale.h"
namespace fs = std::filesystem;
// function ftime_to_std::string, Code taken from
// https://stackoverflow.com/a/58237530/1389271
template <typename TP>
std::string ftime_to_string(TP tp)
{
auto sctp = std::chrono::time_point_cast<std::chrono::system_clock::duration>(tp - TP::clock::now()
+ std::chrono::system_clock::now());
auto tt = std::chrono::system_clock::to_time_t(sctp);
std::tm* gmt = std::gmtime(&tt);
std::stringstream buffer;
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
return buffer.str();
}
namespace platform {
std::string BestResults::build()
{
auto files = loadResultFiles();
if (files.size() == 0) {
std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
exit(1);
}
json bests;
for (const auto& file : files) {
auto result = Result(path, file);
auto data = result.load();
for (auto const& item : data.at("results")) {
bool update = false;
// Check if results file contains only one dataset
auto datasetName = item.at("dataset").get<std::string>();
if (bests.contains(datasetName)) {
if (item.at("score").get<double>() > bests[datasetName].at(0).get<double>()) {
update = true;
}
} else {
update = true;
}
if (update) {
bests[datasetName] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
}
}
}
std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
std::cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << std::endl;
}
std::ofstream file(bestFileName);
file << bests;
file.close();
return bestFileName;
}
std::string BestResults::bestResultFile()
{
return "best_results_" + score + "_" + model + ".json";
}
std::pair<std::string, std::string> getModelScore(std::string name)
{
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
int i = 0;
auto pos = name.find("_");
auto pos2 = name.find("_", pos + 1);
std::string score = name.substr(pos + 1, pos2 - pos - 1);
pos = name.find("_", pos2 + 1);
std::string model = name.substr(pos2 + 1, pos - pos2 - 1);
return { model, score };
}
std::vector<std::string> BestResults::loadResultFiles()
{
std::vector<std::string> files;
using std::filesystem::directory_iterator;
std::string fileModel, fileScore;
for (const auto& file : directory_iterator(path)) {
auto fileName = file.path().filename().string();
if (fileName.find(".json") != std::string::npos && fileName.find("results_") == 0) {
tie(fileModel, fileScore) = getModelScore(fileName);
if (score == fileScore && (model == fileModel || model == "any")) {
files.push_back(fileName);
}
}
}
return files;
}
json BestResults::loadFile(const std::string& fileName)
{
std::ifstream resultData(fileName);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
}
std::vector<std::string> BestResults::getModels()
{
std::set<std::string> models;
std::vector<std::string> result;
auto files = loadResultFiles();
if (files.size() == 0) {
std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl;
exit(1);
}
std::string fileModel, fileScore;
for (const auto& file : files) {
// extract the model from the file name
tie(fileModel, fileScore) = getModelScore(file);
// add the model to the std::vector of models
models.insert(fileModel);
}
result = std::vector<std::string>(models.begin(), models.end());
return result;
}
std::vector<std::string> BestResults::getDatasets(json table)
{
std::vector<std::string> datasets;
for (const auto& dataset : table.items()) {
datasets.push_back(dataset.key());
}
return datasets;
}
void BestResults::buildAll()
{
auto models = getModels();
for (const auto& model : models) {
std::cout << "Building best results for model: " << model << std::endl;
this->model = model;
build();
}
model = "any";
}
void BestResults::listFile()
{
std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
exit(1);
}
auto temp = ConfigLocale();
auto date = ftime_to_string(std::filesystem::last_write_time(bestFileName));
auto data = loadFile(bestFileName);
auto datasets = getDatasets(data);
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
int maxFileName = 0;
int maxHyper = 15;
for (auto const& item : data.items()) {
maxHyper = std::max(maxHyper, (int)item.value().at(1).dump().size());
maxFileName = std::max(maxFileName, (int)item.value().at(2).get<std::string>().size());
}
std::stringstream oss;
oss << Colors::GREEN() << "Best results for " << model << " as of " << date << std::endl;
std::cout << oss.str();
std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << "Dataset" << "Score " << std::setw(maxFileName) << "File" << " Hyperparameters" << std::endl;
std::cout << "=== " << std::string(maxDatasetName, '=') << " =========== " << std::string(maxFileName, '=') << " " << std::string(maxHyper, '=') << std::endl;
auto i = 0;
bool odd = true;
double total = 0;
for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
double value = item.value().at(0).get<double>();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
std::cout << std::setw(maxDatasetName) << std::left << item.key() << " ";
std::cout << std::setw(11) << std::setprecision(9) << std::fixed << value << " ";
std::cout << std::setw(maxFileName) << item.value().at(2).get<std::string>() << " ";
std::cout << item.value().at(1) << " ";
std::cout << std::endl;
total += value;
odd = !odd;
}
std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ===========" << std::endl;
std::cout << std::setw(5 + maxDatasetName) << "Total.................. " << std::setw(11) << std::setprecision(8) << std::fixed << total << std::endl;
}
json BestResults::buildTableResults(std::vector<std::string> models)
{
json table;
auto maxDate = std::filesystem::file_time_type::max();
for (const auto& model : models) {
this->model = model;
std::string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl;
exit(1);
}
auto dateWrite = std::filesystem::last_write_time(bestFileName);
if (dateWrite < maxDate) {
maxDate = dateWrite;
}
auto data = loadFile(bestFileName);
table[model] = data;
}
table["dateTable"] = ftime_to_string(maxDate);
return table;
}
void BestResults::printTableResults(std::vector<std::string> models, json table)
{
std::stringstream oss;
oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<std::string>() << std::endl;
std::cout << oss.str();
std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset");
for (const auto& model : models) {
std::cout << std::setw(maxModelName) << std::left << model << " ";
}
std::cout << std::endl;
std::cout << "=== " << std::string(maxDatasetName, '=') << " ";
for (const auto& model : models) {
std::cout << std::string(maxModelName, '=') << " ";
}
std::cout << std::endl;
auto i = 0;
bool odd = true;
std::map<std::string, double> totals;
int nDatasets = table.begin().value().size();
for (const auto& model : models) {
totals[model] = 0.0;
}
auto datasets = getDatasets(table.begin().value());
for (auto const& dataset : datasets) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
std::cout << std::setw(maxDatasetName) << std::left << dataset << " ";
double maxValue = 0;
// Find out the max value for this dataset
for (const auto& model : models) {
double value = table[model].at(dataset).at(0).get<double>();
if (value > maxValue) {
maxValue = value;
}
}
// Print the row with red colors on max values
for (const auto& model : models) {
std::string efectiveColor = color;
double value = table[model].at(dataset).at(0).get<double>();
if (value == maxValue) {
efectiveColor = Colors::RED();
}
totals[model] += value;
std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " ";
}
std::cout << std::endl;
odd = !odd;
}
std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ";
for (const auto& model : models) {
std::cout << std::string(maxModelName, '=') << " ";
}
std::cout << std::endl;
std::cout << Colors::GREEN() << std::setw(5 + maxDatasetName) << " Totals...................";
double max = 0.0;
for (const auto& total : totals) {
if (total.second > max) {
max = total.second;
}
}
for (const auto& model : models) {
std::string efectiveColor = Colors::GREEN();
if (totals[model] == max) {
efectiveColor = Colors::RED();
}
std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << totals[model] << " ";
}
std::cout << std::endl;
}
void BestResults::reportSingle(bool excel)
{
listFile();
if (excel) {
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value());
BestResultsExcel excel(score, datasets);
excel.reportSingle(model, path + bestResultFile());
messageExcelFile(excel.getFileName());
}
}
void BestResults::reportAll(bool excel)
{
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value());
maxModelName = (*max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxModelName = std::max(12, maxModelName);
maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxDatasetName = std::max(25, maxDatasetName);
// Print the table of results
printTableResults(models, table);
// Compute the Friedman test
std::map<std::string, std::map<std::string, float>> ranksModels;
if (friedman) {
Statistics stats(models, datasets, table, significance);
auto result = stats.friedmanTest();
stats.postHocHolmTest(result);
ranksModels = stats.getRanks();
}
if (excel) {
BestResultsExcel excel(score, datasets);
excel.reportAll(models, table, ranksModels, friedman, significance);
if (friedman) {
int idx = -1;
double min = 2000;
// Find out the control model
auto totals = std::vector<double>(models.size(), 0.0);
for (const auto& dataset : datasets) {
for (int i = 0; i < models.size(); ++i) {
totals[i] += ranksModels[dataset][models[i]];
}
}
for (int i = 0; i < models.size(); ++i) {
if (totals[i] < min) {
min = totals[i];
idx = i;
}
}
model = models.at(idx);
excel.reportSingle(model, path + bestResultFile());
}
messageExcelFile(excel.getFileName());
}
}
void BestResults::messageExcelFile(const std::string& fileName)
{
std::cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << std::endl;
}
}

View File

@@ -0,0 +1,36 @@
#ifndef BESTRESULTS_H
#define BESTRESULTS_H
#include <string>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
class BestResults {
public:
explicit BestResults(const std::string& path, const std::string& score, const std::string& model, bool friedman, double significance = 0.05)
: path(path), score(score), model(model), friedman(friedman), significance(significance)
{
}
std::string build();
void reportSingle(bool excel);
void reportAll(bool excel);
void buildAll();
private:
std::vector<std::string> getModels();
std::vector<std::string> getDatasets(json table);
std::vector<std::string> loadResultFiles();
void messageExcelFile(const std::string& fileName);
json buildTableResults(std::vector<std::string> models);
void printTableResults(std::vector<std::string> models, json table);
std::string bestResultFile();
json loadFile(const std::string& fileName);
void listFile();
std::string path;
std::string score;
std::string model;
bool friedman;
double significance;
int maxModelName = 0;
int maxDatasetName = 0;
};
}
#endif //BESTRESULTS_H

View File

@@ -0,0 +1,300 @@
#include <sstream>
#include "BestResultsExcel.h"
#include "Paths.h"
#include <map>
#include <nlohmann/json.hpp>
#include "Statistics.h"
#include "ReportExcel.h"
namespace platform {
json loadResultData(const std::string& fileName)
{
json data;
std::ifstream resultData(fileName);
if (resultData.is_open()) {
data = json::parse(resultData);
} else {
throw std::invalid_argument("Unable to open result file. [" + fileName + "]");
}
return data;
}
std::string getColumnName(int colNum)
{
std::string columnName = "";
if (colNum == 0)
return "A";
while (colNum > 0) {
int modulo = colNum % 26;
columnName = char(65 + modulo) + columnName;
colNum = (int)((colNum - modulo) / 26);
}
return columnName;
}
BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets)
{
workbook = workbook_new((Paths::excel() + fileName).c_str());
setProperties("Best Results");
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
datasetNameSize = std::max(datasetNameSize, maxDatasetName);
createFormats();
}
void BestResultsExcel::reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance)
{
this->table = table;
this->models = models;
ranksModels = ranks;
this->friedman = friedman;
this->significance = significance;
worksheet = workbook_add_worksheet(workbook, "Best Results");
int maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
modelNameSize = std::max(modelNameSize, maxModelName);
formatColumns();
build();
}
void BestResultsExcel::reportSingle(const std::string& model, const std::string& fileName)
{
worksheet = workbook_add_worksheet(workbook, "Report");
if (FILE* fileTest = fopen(fileName.c_str(), "r")) {
fclose(fileTest);
} else {
std::cerr << "File " << fileName << " doesn't exist." << std::endl;
exit(1);
}
json data = loadResultData(fileName);
std::string title = "Best results for " + model;
worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]);
// Body header
row = 3;
int col = 1;
writeString(row, 0, "", "bodyHeader");
writeString(row, 1, "Dataset", "bodyHeader");
writeString(row, 2, "Score", "bodyHeader");
writeString(row, 3, "File", "bodyHeader");
writeString(row, 4, "Hyperparameters", "bodyHeader");
auto i = 0;
std::string hyperparameters;
int hypSize = 22;
std::map<std::string, std::string> files; // map of files imported and their tabs
for (auto const& item : data.items()) {
row++;
writeInt(row, 0, i++, "ints");
writeString(row, 1, item.key().c_str(), "text");
writeDouble(row, 2, item.value().at(0).get<double>(), "result");
auto fileName = item.value().at(2).get<std::string>();
std::string hyperlink = "";
try {
hyperlink = files.at(fileName);
}
catch (const std::out_of_range& oor) {
auto tabName = "table_" + std::to_string(i);
auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str());
json data = loadResultData(Paths::results() + fileName);
auto report = ReportExcel(data, false, workbook, worksheetNew);
report.show();
hyperlink = "#table_" + std::to_string(i);
files[fileName] = hyperlink;
}
hyperlink += "!H" + std::to_string(i + 6);
std::string fileNameText = "=HYPERLINK(\"" + hyperlink + "\",\"" + fileName + "\")";
worksheet_write_formula(worksheet, row, 3, fileNameText.c_str(), efectiveStyle("text"));
hyperparameters = item.value().at(1).dump();
if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, 4, hyperparameters, "text");
}
row++;
// Set Totals
writeString(row, 1, "Total", "bodyHeader");
std::stringstream oss;
auto colName = getColumnName(2);
oss << "=sum(" << colName << "5:" << colName << row << ")";
worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]);
// Set format
worksheet_freeze_panes(worksheet, 4, 2);
std::vector<int> columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
BestResultsExcel::~BestResultsExcel()
{
workbook_close(workbook);
}
void BestResultsExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 4, 2);
std::vector<int> columns_sizes = { 5, datasetNameSize };
for (int i = 0; i < models.size(); ++i) {
columns_sizes.push_back(modelNameSize);
}
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void BestResultsExcel::addConditionalFormat(std::string formula)
{
// Add conditional format for max/min values in scores/ranks sheets
lxw_format* custom_format = workbook_add_format(workbook);
format_set_bg_color(custom_format, 0xFFC7CE);
format_set_font_color(custom_format, 0x9C0006);
// Create a conditional format object. A static object would also work.
lxw_conditional_format* conditional_format = (lxw_conditional_format*)calloc(1, sizeof(lxw_conditional_format));
conditional_format->type = LXW_CONDITIONAL_TYPE_FORMULA;
std::string col = getColumnName(models.size() + 1);
std::stringstream oss;
oss << "=C5=" << formula << "($C5:$" << col << "5)";
auto formulaValue = oss.str();
conditional_format->value_string = formulaValue.c_str();
conditional_format->format = custom_format;
worksheet_conditional_format_range(worksheet, 4, 2, datasets.size() + 3, models.size() + 1, conditional_format);
}
void BestResultsExcel::build()
{
// Create Sheet with scores
header(false);
body(false);
// Add conditional format for max values
addConditionalFormat("max");
footer(false);
if (friedman) {
// Create Sheet with ranks
worksheet = workbook_add_worksheet(workbook, "Ranks");
formatColumns();
header(true);
body(true);
addConditionalFormat("min");
footer(true);
// Create Sheet with Friedman Test
doFriedman();
}
}
std::string BestResultsExcel::getFileName()
{
return Paths::excel() + fileName;
}
void BestResultsExcel::header(bool ranks)
{
row = 0;
std::string message = ranks ? "Ranks for score " + score : "Best results for " + score;
worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), message.c_str(), styles["headerFirst"]);
// Body header
row = 3;
int col = 1;
writeString(row, 0, "", "bodyHeader");
writeString(row, 1, "Dataset", "bodyHeader");
for (const auto& model : models) {
writeString(row, ++col, model.c_str(), "bodyHeader");
}
}
void BestResultsExcel::body(bool ranks)
{
row = 4;
int i = 0;
json origin = table.begin().value();
for (auto const& item : origin.items()) {
writeInt(row, 0, i++, "ints");
writeString(row, 1, item.key().c_str(), "text");
int col = 1;
for (const auto& model : models) {
double value = ranks ? ranksModels[item.key()][model] : table[model].at(item.key()).at(0).get<double>();
writeDouble(row, ++col, value, "result");
}
++row;
}
}
void BestResultsExcel::footer(bool ranks)
{
// Set Totals
writeString(row, 1, "Total", "bodyHeader");
int col = 1;
for (const auto& model : models) {
std::stringstream oss;
auto colName = getColumnName(col + 1);
oss << "=SUM(" << colName << "5:" << colName << row << ")";
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
}
if (ranks) {
row++;
writeString(row, 1, "Average ranks", "bodyHeader");
int col = 1;
for (const auto& model : models) {
auto colName = getColumnName(col + 1);
std::stringstream oss;
oss << "=SUM(" << colName << "5:" << colName << row - 1 << ")/" << datasets.size();
worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]);
}
}
}
void BestResultsExcel::doFriedman()
{
worksheet = workbook_add_worksheet(workbook, "Friedman");
std::vector<int> columns_sizes = { 5, datasetNameSize };
for (int i = 0; i < models.size(); ++i) {
columns_sizes.push_back(modelNameSize);
}
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), "Friedman Test", styles["headerFirst"]);
row = 2;
Statistics stats(models, datasets, table, significance, false);
auto result = stats.friedmanTest();
stats.postHocHolmTest(result);
auto friedmanResult = stats.getFriedmanResult();
auto holmResult = stats.getHolmResult();
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]);
row += 2;
writeString(row, 1, "Friedman Q", "bodyHeader");
writeDouble(row, 2, friedmanResult.statistic, "bodyHeader");
row++;
writeString(row, 1, "Critical χ2 value", "bodyHeader");
writeDouble(row, 2, friedmanResult.criticalValue, "bodyHeader");
row++;
writeString(row, 1, "p-value", "bodyHeader");
writeDouble(row, 2, friedmanResult.pvalue, "bodyHeader");
writeString(row, 3, friedmanResult.reject ? "<" : ">", "bodyHeader");
writeDouble(row, 4, significance, "bodyHeader");
writeString(row, 5, friedmanResult.reject ? "Reject H0" : "Accept H0", "bodyHeader");
row += 3;
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Holm Test", styles["headerFirst"]);
row += 2;
worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between the control model and the other models.'", styles["headerSmall"]);
row += 2;
std::string controlModel = "Control Model: " + holmResult.model;
worksheet_merge_range(worksheet, row, 1, row, 7, controlModel.c_str(), styles["bodyHeader_odd"]);
row++;
writeString(row, 1, "Model", "bodyHeader");
writeString(row, 2, "p-value", "bodyHeader");
writeString(row, 3, "Rank", "bodyHeader");
writeString(row, 4, "Win", "bodyHeader");
writeString(row, 5, "Tie", "bodyHeader");
writeString(row, 6, "Loss", "bodyHeader");
writeString(row, 7, "Reject H0", "bodyHeader");
row++;
bool first = true;
for (const auto& item : holmResult.holmLines) {
writeString(row, 1, item.model, "text");
if (first) {
// Control model info
first = false;
writeString(row, 2, "", "text");
writeDouble(row, 3, item.rank, "result");
writeString(row, 4, "", "text");
writeString(row, 5, "", "text");
writeString(row, 6, "", "text");
writeString(row, 7, "", "textCentered");
} else {
// Rest of the models info
writeDouble(row, 2, item.pvalue, "result");
writeDouble(row, 3, item.rank, "result");
writeInt(row, 4, item.wtl.win, "ints");
writeInt(row, 5, item.wtl.tie, "ints");
writeInt(row, 6, item.wtl.loss, "ints");
writeString(row, 7, item.reject ? "Yes" : "No", "textCentered");
}
row++;
}
}
}

View File

@@ -0,0 +1,39 @@
#ifndef BESTRESULTS_EXCEL_H
#define BESTRESULTS_EXCEL_H
#include "ExcelFile.h"
#include <vector>
#include <map>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
class BestResultsExcel : ExcelFile {
public:
BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets);
~BestResultsExcel();
void reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance);
void reportSingle(const std::string& model, const std::string& fileName);
std::string getFileName();
private:
void build();
void header(bool ranks);
void body(bool ranks);
void footer(bool ranks);
void formatColumns();
void doFriedman();
void addConditionalFormat(std::string formula);
const std::string fileName = "BestResults.xlsx";
std::string score;
std::vector<std::string> models;
std::vector<std::string> datasets;
json table;
std::map<std::string, std::map<std::string, float>> ranksModels;
bool friedman;
double significance;
int modelNameSize = 12; // Min size of the column
int datasetNameSize = 25; // Min size of the column
};
}
#endif //BESTRESULTS_EXCEL_H

28
src/Platform/BestScore.h Normal file
View File

@@ -0,0 +1,28 @@
#ifndef BESTSCORE_H
#define BESTSCORE_H
#include <string>
#include <map>
#include <utility>
#include "DotEnv.h"
namespace platform {
class BestScore {
public:
static std::pair<std::string, double> getScore(const std::string& metric)
{
static std::map<std::pair<std::string, std::string>, std::pair<std::string, double>> data = {
{{"discretiz", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
{{"odte", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}},
};
auto env = platform::DotEnv();
std::string experiment = env.get("experiment");
try {
return data[{experiment, metric}];
}
catch (...) {
return { "", 0.0 };
}
}
};
}
#endif

22
src/Platform/CLocale.h Normal file
View File

@@ -0,0 +1,22 @@
#ifndef LOCALE_H
#define LOCALE_H
#include <locale>
#include <iostream>
#include <string>
namespace platform {
struct separation : std::numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
std::string do_grouping() const { return "\03"; }
};
class ConfigLocale {
public:
explicit ConfigLocale()
{
std::locale mylocale(std::cout.getloc(), new separation);
std::locale::global(mylocale);
std::cout.imbue(mylocale);
}
};
}
#endif

View File

@@ -1,16 +1,22 @@
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
add_executable(list list.cc platformUtils Datasets.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
else()
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp)
endif()
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
include_directories(${Python3_INCLUDE_DIRS})
include_directories(${MPI_CXX_INCLUDE_DIRS})
add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc HyperParameters.cc Folding.cc Datasets.cc Dataset.cc)
add_executable(b_list b_list.cc Datasets.cc Dataset.cc)
add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc)
add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc)
target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp)
target_link_libraries(b_grid BayesNet PyWrap ${MPI_CXX_LIBRARIES})
target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}")
target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap)
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)

View File

@@ -9,6 +9,7 @@ public:
static std::string YELLOW() { return "\033[1;33m"; }
static std::string RED() { return "\033[1;31m"; }
static std::string WHITE() { return "\033[1;37m"; }
static std::string IBLUE() { return "\033[0;94m"; }
static std::string RESET() { return "\033[0m"; }
};
#endif // COLORS_H

View File

@@ -0,0 +1,87 @@
#include "CommandParser.h"
#include <iostream>
#include <sstream>
#include <algorithm>
#include "Colors.h"
#include "Utils.h"
namespace platform {
void CommandParser::messageError(const std::string& message)
{
std::cout << Colors::RED() << message << Colors::RESET() << std::endl;
}
std::pair<char, int> CommandParser::parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex)
{
bool finished = false;
while (!finished) {
std::stringstream oss;
std::string line;
oss << color << "Choose option (";
bool first = true;
for (auto& option : options) {
if (first) {
first = false;
} else {
oss << ", ";
}
oss << std::get<char>(option) << "=" << std::get<std::string>(option);
}
oss << "): ";
std::cout << oss.str();
getline(std::cin, line);
std::cout << Colors::RESET();
line = trim(line);
if (line.size() == 0)
continue;
if (all_of(line.begin(), line.end(), ::isdigit)) {
command = defaultCommand;
index = stoi(line);
if (index > maxIndex || index < 0) {
messageError("Index out of range");
continue;
}
finished = true;
break;
}
bool found = false;
for (auto& option : options) {
if (line[0] == std::get<char>(option)) {
found = true;
// it's a match
line.erase(line.begin());
line = trim(line);
if (std::get<bool>(option)) {
// The option requires a value
if (line.size() == 0) {
messageError("Option " + std::get<std::string>(option) + " requires a value");
break;
}
try {
index = stoi(line);
if (index > maxIndex || index < 0) {
messageError("Index out of range");
break;
}
}
catch (const std::invalid_argument& ia) {
messageError("Invalid value: " + line);
break;
}
} else {
if (line.size() > 0) {
messageError("option " + std::get<std::string>(option) + " doesn't accept values");
break;
}
}
command = std::get<char>(option);
finished = true;
break;
}
}
if (!found) {
messageError("I don't know " + line);
}
}
return { command, index };
}
} /* namespace platform */

View File

@@ -0,0 +1,20 @@
#ifndef COMMAND_PARSER_H
#define COMMAND_PARSER_H
#include <string>
#include <vector>
#include <tuple>
namespace platform {
class CommandParser {
public:
CommandParser() = default;
std::pair<char, int> parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int maxIndex);
char getCommand() const { return command; };
int getIndex() const { return index; };
private:
void messageError(const std::string& message);
char command;
int index;
};
} /* namespace platform */
#endif /* COMMAND_PARSER_H */

215
src/Platform/Dataset.cc Normal file
View File

@@ -0,0 +1,215 @@
#include "Dataset.h"
#include "ArffFiles.h"
#include <fstream>
namespace platform {
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{
}
std::string Dataset::getName() const
{
return name;
}
std::string Dataset::getClassName() const
{
return className;
}
std::vector<std::string> Dataset::getFeatures() const
{
if (loaded) {
return features;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNFeatures() const
{
if (loaded) {
return n_features;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNSamples() const
{
if (loaded) {
return n_samples;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
std::map<std::string, std::vector<int>> Dataset::getStates() const
{
if (loaded) {
return states;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<std::vector<std::vector<float>>&, std::vector<int>&> Dataset::getVectors()
{
if (loaded) {
return { Xv, yv };
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<std::vector<std::vector<int>>&, std::vector<int>&> Dataset::getVectorsDiscretized()
{
if (loaded) {
return { Xd, yv };
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
{
if (loaded) {
buildTensors();
return { X, y };
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
void Dataset::load_csv()
{
ifstream file(path + "/" + name + ".csv");
if (file.is_open()) {
std::string line;
getline(file, line);
std::vector<std::string> tokens = split(line, ',');
features = std::vector<std::string>(tokens.begin(), tokens.end() - 1);
if (className == "-1") {
className = tokens.back();
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = split(line, ',');
for (auto i = 0; i < features.size(); ++i) {
Xv[i].push_back(stof(tokens[i]));
}
yv.push_back(stoi(tokens.back()));
}
file.close();
} else {
throw std::invalid_argument("Unable to open dataset file.");
}
}
void Dataset::computeStates()
{
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = std::vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
}
states[className] = std::vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0);
}
void Dataset::load_arff()
{
auto arff = ArffFiles();
arff.load(path + "/" + name + ".arff", className);
// Get Dataset X, y
Xv = arff.getX();
yv = arff.getY();
// Get className & Features
className = arff.getClassName();
auto attributes = arff.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
}
std::vector<std::string> tokenize(std::string line)
{
std::vector<std::string> tokens;
for (auto i = 0; i < line.size(); ++i) {
if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') {
std::string token = line.substr(0, i);
tokens.push_back(token);
line.erase(line.begin(), line.begin() + i + 1);
i = 0;
while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n')
line.erase(line.begin(), line.begin() + i + 1);
}
}
if (line.size() > 0) {
tokens.push_back(line);
}
return tokens;
}
void Dataset::load_rdata()
{
ifstream file(path + "/" + name + "_R.dat");
if (file.is_open()) {
std::string line;
getline(file, line);
line = ArffFiles::trim(line);
std::vector<std::string> tokens = tokenize(line);
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
if (className == "-1") {
className = ArffFiles::trim(tokens.back());
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = tokenize(line);
// We have to skip the first token, which is the instance number.
for (auto i = 1; i < features.size() + 1; ++i) {
const float value = stof(tokens[i]);
Xv[i - 1].push_back(value);
}
yv.push_back(stoi(tokens.back()));
}
file.close();
} else {
throw std::invalid_argument("Unable to open dataset file.");
}
}
void Dataset::load()
{
if (loaded) {
return;
}
if (fileType == CSV) {
load_csv();
} else if (fileType == ARFF) {
load_arff();
} else if (fileType == RDATA) {
load_rdata();
}
if (discretize) {
Xd = discretizeDataset(Xv, yv);
computeStates();
}
n_samples = Xv[0].size();
n_features = Xv.size();
loaded = true;
}
void Dataset::buildTensors()
{
if (discretize) {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
} else {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
}
for (int i = 0; i < features.size(); ++i) {
if (discretize) {
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
} else {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
}
}
y = torch::tensor(yv, torch::kInt32);
}
std::vector<mdlp::labels_t> Dataset::discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{
std::vector<mdlp::labels_t> Xd;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
Xd.push_back(xd);
}
return Xd;
}
}

78
src/Platform/Dataset.h Normal file
View File

@@ -0,0 +1,78 @@
#ifndef DATASET_H
#define DATASET_H
#include <torch/torch.h>
#include <map>
#include <vector>
#include <string>
#include "CPPFImdlp.h"
#include "Utils.h"
namespace platform {
enum fileType_t { CSV, ARFF, RDATA };
class SourceData {
public:
SourceData(std::string source)
{
if (source == "Surcov") {
path = "datasets/";
fileType = CSV;
} else if (source == "Arff") {
path = "datasets/";
fileType = ARFF;
} else if (source == "Tanveer") {
path = "data/";
fileType = RDATA;
} else {
throw std::invalid_argument("Unknown source.");
}
}
std::string getPath()
{
return path;
}
fileType_t getFileType()
{
return fileType;
}
private:
std::string path;
fileType_t fileType;
};
class Dataset {
private:
std::string path;
std::string name;
fileType_t fileType;
std::string className;
int n_samples{ 0 }, n_features{ 0 };
std::vector<std::string> features;
std::map<std::string, std::vector<int>> states;
bool loaded;
bool discretize;
torch::Tensor X, y;
std::vector<std::vector<float>> Xv;
std::vector<std::vector<int>> Xd;
std::vector<int> yv;
void buildTensors();
void load_csv();
void load_arff();
void load_rdata();
void computeStates();
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
public:
Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&);
std::string getName() const;
std::string getClassName() const;
std::vector<string> getFeatures() const;
std::map<std::string, std::vector<int>> getStates() const;
std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
std::pair<vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized();
std::pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures() const;
int getNSamples() const;
void load();
const bool inline isLoaded() const { return loaded; };
};
};
#endif

View File

@@ -1,47 +1,56 @@
#include "Datasets.h"
#include "platformUtils.h"
#include "ArffFiles.h"
#include <fstream>
namespace platform {
void Datasets::load()
{
ifstream catalog(path + "/all.txt");
auto sd = SourceData(sfileType);
fileType = sd.getFileType();
path = sd.getPath();
ifstream catalog(path + "all.txt");
if (catalog.is_open()) {
string line;
std::string line;
while (getline(catalog, line)) {
vector<string> tokens = split(line, ',');
string name = tokens[0];
string className = tokens[1];
if (line.empty() || line[0] == '#') {
continue;
}
std::vector<std::string> tokens = split(line, ',');
std::string name = tokens[0];
std::string className;
if (tokens.size() == 1) {
className = "-1";
} else {
className = tokens[1];
}
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
}
catalog.close();
} else {
throw invalid_argument("Unable to open catalog file. [" + path + "/all.txt" + "]");
throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]");
}
}
vector<string> Datasets::getNames()
std::vector<std::string> Datasets::getNames()
{
vector<string> result;
std::vector<std::string> result;
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result;
}
vector<string> Datasets::getFeatures(const string& name) const
std::vector<std::string> Datasets::getFeatures(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getFeatures();
} else {
throw invalid_argument("Dataset not loaded.");
throw std::invalid_argument("Dataset not loaded.");
}
}
map<string, vector<int>> Datasets::getStates(const string& name) const
map<std::string, std::vector<int>> Datasets::getStates(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getStates();
} else {
throw invalid_argument("Dataset not loaded.");
throw std::invalid_argument("Dataset not loaded.");
}
}
void Datasets::loadDataset(const string& name) const
void Datasets::loadDataset(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return;
@@ -49,23 +58,23 @@ namespace platform {
datasets.at(name)->load();
}
}
string Datasets::getClassName(const string& name) const
std::string Datasets::getClassName(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getClassName();
} else {
throw invalid_argument("Dataset not loaded.");
throw std::invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNSamples(const string& name) const
int Datasets::getNSamples(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getNSamples();
} else {
throw invalid_argument("Dataset not loaded.");
throw std::invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNClasses(const string& name)
int Datasets::getNClasses(const std::string& name)
{
if (datasets.at(name)->isLoaded()) {
auto className = datasets.at(name)->getClassName();
@@ -74,195 +83,47 @@ namespace platform {
return states.at(className).size();
}
auto [Xv, yv] = getVectors(name);
return *max_element(yv.begin(), yv.end()) + 1;
return *std::max_element(yv.begin(), yv.end()) + 1;
} else {
throw invalid_argument("Dataset not loaded.");
throw std::invalid_argument("Dataset not loaded.");
}
}
vector<int> Datasets::getClassesCounts(const string& name) const
std::vector<int> Datasets::getClassesCounts(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
auto [Xv, yv] = datasets.at(name)->getVectors();
vector<int> counts(*max_element(yv.begin(), yv.end()) + 1);
std::vector<int> counts(*std::max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) {
counts[y]++;
}
return counts;
} else {
throw invalid_argument("Dataset not loaded.");
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(const string& name)
pair<std::vector<std::vector<float>>&, std::vector<int>&> Datasets::getVectors(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectors();
}
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(const string& name)
pair<std::vector<std::vector<int>>&, std::vector<int>&> Datasets::getVectorsDiscretized(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectorsDiscretized();
}
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const string& name)
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getTensors();
}
bool Datasets::isDataset(const string& name) const
bool Datasets::isDataset(const std::string& name) const
{
return datasets.find(name) != datasets.end();
}
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{
}
string Dataset::getName() const
{
return name;
}
string Dataset::getClassName() const
{
return className;
}
vector<string> Dataset::getFeatures() const
{
if (loaded) {
return features;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNFeatures() const
{
if (loaded) {
return n_features;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNSamples() const
{
if (loaded) {
return n_samples;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
map<string, vector<int>> Dataset::getStates() const
{
if (loaded) {
return states;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Dataset::getVectors()
{
if (loaded) {
return { Xv, yv };
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<int>>&, vector<int>&> Dataset::getVectorsDiscretized()
{
if (loaded) {
return { Xd, yv };
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
{
if (loaded) {
buildTensors();
return { X, y };
} else {
throw invalid_argument("Dataset not loaded.");
}
}
void Dataset::load_csv()
{
ifstream file(path + "/" + name + ".csv");
if (file.is_open()) {
string line;
getline(file, line);
vector<string> tokens = split(line, ',');
features = vector<string>(tokens.begin(), tokens.end() - 1);
className = tokens.back();
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(vector<float>());
}
while (getline(file, line)) {
tokens = split(line, ',');
for (auto i = 0; i < features.size(); ++i) {
Xv[i].push_back(stof(tokens[i]));
}
yv.push_back(stoi(tokens.back()));
}
file.close();
} else {
throw invalid_argument("Unable to open dataset file.");
}
}
void Dataset::computeStates()
{
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
}
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0);
}
void Dataset::load_arff()
{
auto arff = ArffFiles();
arff.load(path + "/" + name + ".arff", className);
// Get Dataset X, y
Xv = arff.getX();
yv = arff.getY();
// Get className & Features
className = arff.getClassName();
auto attributes = arff.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
}
void Dataset::load()
{
if (loaded) {
return;
}
if (fileType == CSV) {
load_csv();
} else if (fileType == ARFF) {
load_arff();
}
if (discretize) {
Xd = discretizeDataset(Xv, yv);
computeStates();
}
n_samples = Xv[0].size();
n_features = Xv.size();
loaded = true;
}
void Dataset::buildTensors()
{
if (discretize) {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
} else {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
}
for (int i = 0; i < features.size(); ++i) {
if (discretize) {
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
} else {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
}
}
y = torch::tensor(yv, torch::kInt32);
}
}

View File

@@ -1,67 +1,29 @@
#ifndef DATASETS_H
#define DATASETS_H
#include <torch/torch.h>
#include <map>
#include <vector>
#include <string>
#include "Dataset.h"
namespace platform {
using namespace std;
enum fileType_t { CSV, ARFF };
class Dataset {
private:
string path;
string name;
fileType_t fileType;
string className;
int n_samples{ 0 }, n_features{ 0 };
vector<string> features;
map<string, vector<int>> states;
bool loaded;
bool discretize;
torch::Tensor X, y;
vector<vector<float>> Xv;
vector<vector<int>> Xd;
vector<int> yv;
void buildTensors();
void load_csv();
void load_arff();
void computeStates();
public:
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&);
string getName() const;
string getClassName() const;
vector<string> getFeatures() const;
map<string, vector<int>> getStates() const;
pair<vector<vector<float>>&, vector<int>&> getVectors();
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures() const;
int getNSamples() const;
void load();
const bool inline isLoaded() const { return loaded; };
};
class Datasets {
private:
string path;
std::string path;
fileType_t fileType;
map<string, unique_ptr<Dataset>> datasets;
std::string sfileType;
std::map<std::string, std::unique_ptr<Dataset>> datasets;
bool discretize;
void load(); // Loads the list of datasets
public:
explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
vector<string> getNames();
vector<string> getFeatures(const string& name) const;
int getNSamples(const string& name) const;
string getClassName(const string& name) const;
int getNClasses(const string& name);
vector<int> getClassesCounts(const string& name) const;
map<string, vector<int>> getStates(const string& name) const;
pair<vector<vector<float>>&, vector<int>&> getVectors(const string& name);
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(const string& name);
pair<torch::Tensor&, torch::Tensor&> getTensors(const string& name);
bool isDataset(const string& name) const;
void loadDataset(const string& name) const;
explicit Datasets(bool discretize, std::string sfileType) : discretize(discretize), sfileType(sfileType) { load(); };
std::vector<string> getNames();
std::vector<string> getFeatures(const std::string& name) const;
int getNSamples(const std::string& name) const;
std::string getClassName(const std::string& name) const;
int getNClasses(const std::string& name);
std::vector<int> getClassesCounts(const std::string& name) const;
std::map<std::string, std::vector<int>> getStates(const std::string& name) const;
std::pair<std::vector<std::vector<float>>&, std::vector<int>&> getVectors(const std::string& name);
std::pair<std::vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized(const std::string& name);
std::pair<torch::Tensor&, torch::Tensor&> getTensors(const std::string& name);
bool isDataset(const std::string& name) const;
void loadDataset(const std::string& name) const;
};
};

View File

@@ -4,22 +4,15 @@
#include <map>
#include <fstream>
#include <sstream>
#include "platformUtils.h"
#include <algorithm>
#include <iostream>
#include "Utils.h"
//#include "Dataset.h"
namespace platform {
class DotEnv {
private:
std::map<std::string, std::string> env;
std::string trim(const std::string& str)
{
std::string result = str;
result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) {
return !std::isspace(ch);
}));
result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), result.end());
return result;
}
public:
DotEnv()
{
@@ -43,7 +36,7 @@ namespace platform {
}
std::string get(const std::string& key)
{
return env[key];
return env.at(key);
}
std::vector<int> getSeeds()
{

168
src/Platform/ExcelFile.cc Normal file
View File

@@ -0,0 +1,168 @@
#include "ExcelFile.h"
namespace platform {
ExcelFile::ExcelFile()
{
setDefault();
}
ExcelFile::ExcelFile(lxw_workbook* workbook) : workbook(workbook)
{
setDefault();
}
ExcelFile::ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet) : workbook(workbook), worksheet(worksheet)
{
setDefault();
}
void ExcelFile::setDefault()
{
normalSize = 14; //font size for report body
row = 0;
colorTitle = 0xB1A0C7;
colorOdd = 0xDCE6F1;
colorEven = 0xFDE9D9;
}
lxw_workbook* ExcelFile::getWorkbook()
{
return workbook;
}
void ExcelFile::setProperties(std::string title)
{
char line[title.size() + 1];
strcpy(line, title.c_str());
lxw_doc_properties properties = {
.title = line,
.subject = (char*)"Machine learning results",
.author = (char*)"Ricardo Montañana Gómez",
.manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta",
.company = (char*)"UCLM",
.comments = (char*)"Created with libxlsxwriter and c++",
};
workbook_set_properties(workbook, &properties);
}
lxw_format* ExcelFile::efectiveStyle(const std::string& style)
{
lxw_format* efectiveStyle = NULL;
if (style != "") {
std::string suffix = row % 2 ? "_odd" : "_even";
try {
efectiveStyle = styles.at(style + suffix);
}
catch (const std::out_of_range& oor) {
try {
efectiveStyle = styles.at(style);
}
catch (const std::out_of_range& oor) {
throw std::invalid_argument("Style " + style + " not found");
}
}
}
return efectiveStyle;
}
void ExcelFile::writeString(int row, int col, const std::string& text, const std::string& style)
{
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
}
void ExcelFile::writeInt(int row, int col, const int number, const std::string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ExcelFile::writeDouble(int row, int col, const double number, const std::string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ExcelFile::addColor(lxw_format* style, bool odd)
{
uint32_t efectiveColor = odd ? colorEven : colorOdd;
format_set_bg_color(style, lxw_color_t(efectiveColor));
}
void ExcelFile::createStyle(const std::string& name, lxw_format* style, bool odd)
{
addColor(style, odd);
if (name == "textCentered") {
format_set_align(style, LXW_ALIGN_CENTER);
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "text") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "bodyHeader") {
format_set_bold(style);
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_CENTER);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
format_set_bg_color(style, lxw_color_t(colorTitle));
} else if (name == "result") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "0.0000000");
} else if (name == "time") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "#,##0.000000");
} else if (name == "ints") {
format_set_font_size(style, normalSize);
format_set_num_format(style, "###,##0");
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "floats") {
format_set_border(style, LXW_BORDER_THIN);
format_set_font_size(style, normalSize);
format_set_num_format(style, "#,##0.00");
}
}
void ExcelFile::createFormats()
{
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
lxw_format* style;
for (std::string name : styleNames) {
lxw_format* style = workbook_add_format(workbook);
style = workbook_add_format(workbook);
createStyle(name, style, true);
styles[name + "_odd"] = style;
style = workbook_add_format(workbook);
createStyle(name, style, false);
styles[name + "_even"] = style;
}
// Header 1st line
lxw_format* headerFirst = workbook_add_format(workbook);
format_set_bold(headerFirst);
format_set_font_size(headerFirst, 18);
format_set_align(headerFirst, LXW_ALIGN_CENTER);
format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerFirst, LXW_BORDER_THIN);
format_set_bg_color(headerFirst, lxw_color_t(colorTitle));
// Header rest
lxw_format* headerRest = workbook_add_format(workbook);
format_set_bold(headerRest);
format_set_align(headerRest, LXW_ALIGN_CENTER);
format_set_font_size(headerRest, 16);
format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerRest, LXW_BORDER_THIN);
format_set_bg_color(headerRest, lxw_color_t(colorOdd));
// Header small
lxw_format* headerSmall = workbook_add_format(workbook);
format_set_bold(headerSmall);
format_set_align(headerSmall, LXW_ALIGN_LEFT);
format_set_font_size(headerSmall, 12);
format_set_border(headerSmall, LXW_BORDER_THIN);
format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER);
format_set_bg_color(headerSmall, lxw_color_t(colorOdd));
// Summary style
lxw_format* summaryStyle = workbook_add_format(workbook);
format_set_bold(summaryStyle);
format_set_font_size(summaryStyle, 16);
format_set_border(summaryStyle, LXW_BORDER_THIN);
format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER);
styles["headerFirst"] = headerFirst;
styles["headerRest"] = headerRest;
styles["headerSmall"] = headerSmall;
styles["summaryStyle"] = summaryStyle;
}
}

43
src/Platform/ExcelFile.h Normal file
View File

@@ -0,0 +1,43 @@
#ifndef EXCELFILE_H
#define EXCELFILE_H
#include <locale>
#include <string>
#include <map>
#include "xlsxwriter.h"
namespace platform {
struct separated : std::numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
std::string do_grouping() const { return "\03"; }
};
class ExcelFile {
public:
ExcelFile();
ExcelFile(lxw_workbook* workbook);
ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet);
lxw_workbook* getWorkbook();
protected:
void setProperties(std::string title);
void writeString(int row, int col, const std::string& text, const std::string& style = "");
void writeInt(int row, int col, const int number, const std::string& style = "");
void writeDouble(int row, int col, const double number, const std::string& style = "");
void createFormats();
void createStyle(const std::string& name, lxw_format* style, bool odd);
void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const std::string& name);
lxw_workbook* workbook;
lxw_worksheet* worksheet;
std::map<std::string, lxw_format*> styles;
int row;
int normalSize; //font size for report body
uint32_t colorTitle;
uint32_t colorOdd;
uint32_t colorEven;
private:
void setDefault();
};
}
#endif // !EXCELFILE_H

View File

@@ -1,11 +1,12 @@
#include <fstream>
#include "Experiment.h"
#include "Datasets.h"
#include "Models.h"
#include "ReportConsole.h"
#include <fstream>
#include "Paths.h"
namespace platform {
using json = nlohmann::json;
string get_date()
std::string get_date()
{
time_t rawtime;
tm* timeinfo;
@@ -15,7 +16,7 @@ namespace platform {
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
string get_time()
std::string get_time()
{
time_t rawtime;
tm* timeinfo;
@@ -25,10 +26,9 @@ namespace platform {
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
Experiment::Experiment() : hyperparameters(json::parse("{}")) {}
string Experiment::get_file_name()
std::string Experiment::get_file_name()
{
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
return result;
}
@@ -80,7 +80,7 @@ namespace platform {
}
return result;
}
void Experiment::save(const string& path)
void Experiment::save(const std::string& path)
{
json data = build_json();
ofstream file(path + "/" + get_file_name());
@@ -98,20 +98,20 @@ namespace platform {
void Experiment::show()
{
json data = build_json();
cout << data.dump(4) << endl;
std::cout << data.dump(4) << std::endl;
}
void Experiment::go(vector<string> filesToProcess, const string& path)
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet)
{
cout << "*** Starting experiment: " << title << " ***" << endl;
std::cout << "*** Starting experiment: " << title << " ***" << std::endl;
for (auto fileName : filesToProcess) {
cout << "- " << setw(20) << left << fileName << " " << right << flush;
cross_validation(path, fileName);
cout << endl;
std::cout << "- " << setw(20) << left << fileName << " " << right << flush;
cross_validation(fileName, quiet);
std::cout << std::endl;
}
}
string getColor(bayesnet::status_t status)
std::string getColor(bayesnet::status_t status)
{
switch (status) {
case bayesnet::NORMAL:
@@ -125,28 +125,30 @@ namespace platform {
}
}
void showProgress(int fold, const string& color, const string& phase)
void showProgress(int fold, const std::string& color, const std::string& phase)
{
string prefix = phase == "a" ? "" : "\b\b\b\b";
cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
}
void Experiment::cross_validation(const string& path, const string& fileName)
void Experiment::cross_validation(const std::string& fileName, bool quiet)
{
auto datasets = platform::Datasets(path, discretized, platform::ARFF);
auto datasets = Datasets(discretized, Paths::datasets());
// Get dataset
auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName);
auto features = datasets.getFeatures(fileName);
auto samples = datasets.getNSamples(fileName);
auto className = datasets.getClassName(fileName);
cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
if (!quiet) {
std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
}
// Prepare Result
auto result = Result();
auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
result.setHyperparameters(hyperparameters);
// Initialize results vectors
result.setHyperparameters(hyperparameters.get(fileName));
// Initialize results std::vectors
int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
@@ -158,7 +160,8 @@ namespace platform {
Timer train_timer, test_timer;
int item = 0;
for (auto seed : randomSeeds) {
cout << "(" << seed << ") doing Fold: " << flush;
if (!quiet)
std::cout << "(" << seed << ") doing Fold: " << flush;
Fold* fold;
if (stratified)
fold = new StratifiedKFold(nfolds, y, seed);
@@ -167,9 +170,9 @@ namespace platform {
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion());
if (hyperparameters.size() != 0) {
clf->setHyperparameters(hyperparameters);
}
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, fileName);
clf->setHyperparameters(hyperparameters.get(fileName));
// Split train - test dataset
train_timer.start();
auto [train, test] = fold->getFold(nfold);
@@ -179,9 +182,11 @@ namespace platform {
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
// Train model
clf->fit(X_train, y_train, features, className, states);
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
@@ -190,22 +195,24 @@ namespace platform {
// Score train
auto accuracy_train_value = clf->score(X_train, y_train);
// Test model
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value;
cout << "\b\b\b, " << flush;
// Store results and times in vector
if (!quiet)
std::cout << "\b\b\b, " << flush;
// Store results and times in std::vector
result.addScoreTrain(accuracy_train_value);
result.addScoreTest(accuracy_test_value);
result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>());
item++;
clf.reset();
}
cout << "end. " << flush;
if (!quiet)
std::cout << "end. " << flush;
delete fold;
}
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());

View File

@@ -3,41 +3,27 @@
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <string>
#include <chrono>
#include "Folding.h"
#include "BaseClassifier.h"
#include "HyperParameters.h"
#include "TAN.h"
#include "KDB.h"
#include "AODE.h"
#include "Timer.h"
using namespace std;
namespace platform {
using json = nlohmann::json;
class Timer {
private:
chrono::high_resolution_clock::time_point begin;
public:
Timer() = default;
~Timer() = default;
void start() { begin = chrono::high_resolution_clock::now(); }
double getDuration()
{
chrono::high_resolution_clock::time_point end = chrono::high_resolution_clock::now();
chrono::duration<double> time_span = chrono::duration_cast<chrono::duration<double>>(end - begin);
return time_span.count();
}
};
class Result {
private:
string dataset, model_version;
std::string dataset, model_version;
json hyperparameters;
int samples{ 0 }, features{ 0 }, classes{ 0 };
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
vector<double> scores_train, scores_test, times_train, times_test;
std::vector<double> scores_train, scores_test, times_train, times_test;
public:
Result() = default;
Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; }
Result& setDataset(const std::string& dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; }
@@ -59,7 +45,7 @@ namespace platform {
Result& addTimeTest(double time) { times_test.push_back(time); return *this; }
const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; }
const string& getDataset() const { return dataset; }
const std::string& getDataset() const { return dataset; }
const json& getHyperparameters() const { return hyperparameters; }
const int getSamples() const { return samples; }
const int getFeatures() const { return features; }
@@ -75,43 +61,43 @@ namespace platform {
const float getNodes() const { return nodes; }
const float getLeaves() const { return leaves; }
const float getDepth() const { return depth; }
const vector<double>& getScoresTrain() const { return scores_train; }
const vector<double>& getScoresTest() const { return scores_test; }
const vector<double>& getTimesTrain() const { return times_train; }
const vector<double>& getTimesTest() const { return times_test; }
const std::vector<double>& getScoresTrain() const { return scores_train; }
const std::vector<double>& getScoresTest() const { return scores_test; }
const std::vector<double>& getTimesTrain() const { return times_train; }
const std::vector<double>& getTimesTest() const { return times_test; }
};
class Experiment {
private:
string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
vector<Result> results;
vector<int> randomSeeds;
json hyperparameters = "{}";
int nfolds{ 0 };
float duration{ 0 };
json build_json();
public:
Experiment();
Experiment& setTitle(const string& title) { this->title = title; return *this; }
Experiment& setModel(const string& model) { this->model = model; return *this; }
Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; }
Experiment& setScoreName(const string& score_name) { this->score_name = score_name; return *this; }
Experiment& setModelVersion(const string& model_version) { this->model_version = model_version; return *this; }
Experiment& setLanguage(const string& language) { this->language = language; return *this; }
Experiment& setLanguageVersion(const string& language_version) { this->language_version = language_version; return *this; }
Experiment() = default;
Experiment& setTitle(const std::string& title) { this->title = title; return *this; }
Experiment& setModel(const std::string& model) { this->model = model; return *this; }
Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; }
Experiment& setScoreName(const std::string& score_name) { this->score_name = score_name; return *this; }
Experiment& setModelVersion(const std::string& model_version) { this->model_version = model_version; return *this; }
Experiment& setLanguage(const std::string& language) { this->language = language; return *this; }
Experiment& setLanguageVersion(const std::string& language_version) { this->language_version = language_version; return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
Experiment& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
string get_file_name();
void save(const string& path);
void cross_validation(const string& path, const string& fileName);
void go(vector<string> filesToProcess, const string& path);
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
std::string get_file_name();
void save(const std::string& path);
void cross_validation(const std::string& fileName, bool quiet);
void go(std::vector<std::string> filesToProcess, bool quiet);
void show();
void report();
private:
std::string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
std::vector<Result> results;
std::vector<int> randomSeeds;
HyperParameters hyperparameters;
int nfolds{ 0 };
float duration{ 0 };
json build_json();
};
}
#endif

View File

@@ -4,23 +4,23 @@
namespace platform {
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
{
random_device rd;
random_seed = default_random_engine(seed == -1 ? rd() : seed);
srand(seed == -1 ? time(0) : seed);
std::random_device rd;
random_seed = std::default_random_engine(seed == -1 ? rd() : seed);
std::srand(seed == -1 ? time(0) : seed);
}
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector<int>(n))
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(std::vector<int>(n))
{
iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
std::iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
shuffle(indices.begin(), indices.end(), random_seed);
}
pair<vector<int>, vector<int>> KFold::getFold(int nFold)
std::pair<std::vector<int>, std::vector<int>> KFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")");
}
int nTest = n / k;
auto train = vector<int>();
auto test = vector<int>();
auto train = std::vector<int>();
auto test = std::vector<int>();
for (int i = 0; i < n; i++) {
if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
test.push_back(indices[i]);
@@ -33,10 +33,10 @@ namespace platform {
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{
n = y.numel();
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
this->y = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build();
}
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed)
StratifiedKFold::StratifiedKFold(int k, const std::vector<int>& y, int seed)
: Fold(k, y.size(), seed)
{
this->y = y;
@@ -45,11 +45,12 @@ namespace platform {
}
void StratifiedKFold::build()
{
stratified_indices = vector<vector<int>>(k);
stratified_indices = std::vector<std::vector<int>>(k);
int fold_size = n / k;
// Compute class counts and indices
auto class_indices = map<int, vector<int>>();
vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
auto class_indices = std::map<int, std::vector<int>>();
std::vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
for (auto i = 0; i < n; ++i) {
class_counts[y[i]]++;
class_indices[y[i]].push_back(i);
@@ -60,20 +61,26 @@ namespace platform {
}
// Assign indices to folds
for (auto label = 0; label < class_counts.size(); ++label) {
auto num_samples_to_take = class_counts[label] / k;
if (num_samples_to_take == 0)
auto num_samples_to_take = class_counts.at(label) / k;
if (num_samples_to_take == 0) {
std::cerr << "Warning! The number of samples in class " << label << " (" << class_counts.at(label)
<< ") is less than the number of folds (" << k << ")." << std::endl;
faulty = true;
continue;
}
auto remainder_samples_to_take = class_counts[label] % k;
for (auto fold = 0; fold < k; ++fold) {
auto it = next(class_indices[label].begin(), num_samples_to_take);
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
class_indices[label].erase(class_indices[label].begin(), it);
}
auto chosen = std::vector<bool>(k, false);
while (remainder_samples_to_take > 0) {
int fold = (rand() % static_cast<int>(k));
if (stratified_indices[fold].size() == fold_size + 1) {
if (chosen.at(fold)) {
continue;
}
chosen[fold] = true;
auto it = next(class_indices[label].begin(), 1);
stratified_indices[fold].push_back(*class_indices[label].begin());
class_indices[label].erase(class_indices[label].begin(), it);
@@ -81,13 +88,13 @@ namespace platform {
}
}
}
pair<vector<int>, vector<int>> StratifiedKFold::getFold(int nFold)
std::pair<std::vector<int>, std::vector<int>> StratifiedKFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")");
}
vector<int> test_indices = stratified_indices[nFold];
vector<int> train_indices;
std::vector<int> test_indices = stratified_indices[nFold];
std::vector<int> train_indices;
for (int i = 0; i < k; ++i) {
if (i == nFold) continue;
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());

View File

@@ -3,36 +3,37 @@
#include <torch/torch.h>
#include <vector>
#include <random>
using namespace std;
namespace platform {
class Fold {
protected:
int k;
int n;
int seed;
default_random_engine random_seed;
std::default_random_engine random_seed;
public:
Fold(int k, int n, int seed = -1);
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
virtual std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default;
int getNumberOfFolds() { return k; }
};
class KFold : public Fold {
private:
vector<int> indices;
std::vector<int> indices;
public:
KFold(int k, int n, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) override;
};
class StratifiedKFold : public Fold {
private:
vector<int> y;
vector<vector<int>> stratified_indices;
std::vector<int> y;
std::vector<std::vector<int>> stratified_indices;
void build();
bool faulty = false; // Only true if the number of samples of any class is less than the number of folds.
public:
StratifiedKFold(int k, const vector<int>& y, int seed = -1);
StratifiedKFold(int k, const std::vector<int>& y, int seed = -1);
StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
std::pair<std::vector<int>, std::vector<int>> getFold(int nFold) override;
bool isFaulty() { return faulty; }
};
}
#endif

75
src/Platform/GridData.cc Normal file
View File

@@ -0,0 +1,75 @@
#include "GridData.h"
#include <fstream>
namespace platform {
GridData::GridData(const std::string& fileName)
{
json grid_file;
std::ifstream resultData(fileName);
if (resultData.is_open()) {
grid_file = json::parse(resultData);
} else {
throw std::invalid_argument("Unable to open input file. [" + fileName + "]");
}
for (const auto& item : grid_file.items()) {
auto key = item.key();
auto value = item.value();
grid[key] = value;
}
}
int GridData::computeNumCombinations(const json& line)
{
int numCombinations = 1;
for (const auto& item : line.items()) {
numCombinations *= item.value().size();
}
return numCombinations;
}
int GridData::getNumCombinations(const std::string& dataset)
{
int numCombinations = 0;
auto selected = decide_dataset(dataset);
for (const auto& line : grid.at(selected)) {
numCombinations += computeNumCombinations(line);
}
return numCombinations;
}
json GridData::generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination)
{
if (index == last) {
// If we reached the end of input, store the current combination
output.push_back(currentCombination);
return currentCombination;
}
const auto& key = index.key();
const auto& values = index.value();
for (const auto& value : values) {
auto combination = currentCombination;
combination[key] = value;
json::iterator nextIndex = index;
generateCombinations(++nextIndex, last, output, combination);
}
return currentCombination;
}
std::vector<json> GridData::getGrid(const std::string& dataset)
{
auto selected = decide_dataset(dataset);
auto result = std::vector<json>();
for (json line : grid.at(selected)) {
generateCombinations(line.begin(), line.end(), result, json({}));
}
return result;
}
json& GridData::getInputGrid(const std::string& dataset)
{
auto selected = decide_dataset(dataset);
return grid.at(selected);
}
std::string GridData::decide_dataset(const std::string& dataset)
{
if (grid.find(dataset) != grid.end())
return dataset;
return ALL_DATASETS;
}
} /* namespace platform */

26
src/Platform/GridData.h Normal file
View File

@@ -0,0 +1,26 @@
#ifndef GRIDDATA_H
#define GRIDDATA_H
#include <string>
#include <vector>
#include <map>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
const std::string ALL_DATASETS = "all";
class GridData {
public:
explicit GridData(const std::string& fileName);
~GridData() = default;
std::vector<json> getGrid(const std::string& dataset = ALL_DATASETS);
int getNumCombinations(const std::string& dataset = ALL_DATASETS);
json& getInputGrid(const std::string& dataset = ALL_DATASETS);
std::map<std::string, json>& getGridFile() { return grid; }
private:
std::string decide_dataset(const std::string& dataset);
json generateCombinations(json::iterator index, const json::iterator last, std::vector<json>& output, json currentCombination);
int computeNumCombinations(const json& line);
std::map<std::string, json> grid;
};
} /* namespace platform */
#endif /* GRIDDATA_H */

441
src/Platform/GridSearch.cc Normal file
View File

@@ -0,0 +1,441 @@
#include <iostream>
#include <cstddef>
#include <torch/torch.h>
#include "GridSearch.h"
#include "Models.h"
#include "Paths.h"
#include "Folding.h"
#include "Colors.h"
namespace platform {
std::string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
std::string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
std::string get_color_rank(int rank)
{
auto colors = { Colors::WHITE(), Colors::RED(), Colors::GREEN(), Colors::BLUE(), Colors::MAGENTA(), Colors::CYAN() };
return *(colors.begin() + rank % colors.size());
}
GridSearch::GridSearch(struct ConfigGrid& config) : config(config)
{
}
json GridSearch::loadResults()
{
std::ifstream file(Paths::grid_output(config.model));
if (file.is_open()) {
return json::parse(file);
}
return json();
}
std::vector<std::string> GridSearch::filterDatasets(Datasets& datasets) const
{
// Load datasets
auto datasets_names = datasets.getNames();
if (config.continue_from != NO_CONTINUE()) {
// Continue previous execution:
if (std::find(datasets_names.begin(), datasets_names.end(), config.continue_from) == datasets_names.end()) {
throw std::invalid_argument("Dataset " + config.continue_from + " not found");
}
// Remove datasets already processed
std::vector<string>::iterator it = datasets_names.begin();
while (it != datasets_names.end()) {
if (*it != config.continue_from) {
it = datasets_names.erase(it);
} else {
if (config.only)
++it;
else
break;
}
}
}
// Exclude datasets
for (const auto& name : config.excluded) {
auto dataset = name.get<std::string>();
auto it = std::find(datasets_names.begin(), datasets_names.end(), dataset);
if (it == datasets_names.end()) {
throw std::invalid_argument("Dataset " + dataset + " already excluded or doesn't exist!");
}
datasets_names.erase(it);
}
return datasets_names;
}
json GridSearch::build_tasks_mpi(int rank)
{
auto tasks = json::array();
auto grid = GridData(Paths::grid_input(config.model));
auto datasets = Datasets(false, Paths::datasets());
auto all_datasets = datasets.getNames();
auto datasets_names = filterDatasets(datasets);
for (int idx_dataset = 0; idx_dataset < datasets_names.size(); ++idx_dataset) {
auto dataset = datasets_names[idx_dataset];
for (const auto& seed : config.seeds) {
auto combinations = grid.getGrid(dataset);
for (int n_fold = 0; n_fold < config.n_folds; n_fold++) {
json task = {
{ "dataset", dataset },
{ "idx_dataset", idx_dataset},
{ "seed", seed },
{ "fold", n_fold},
};
tasks.push_back(task);
}
}
}
// Shuffle the array so heavy datasets are spread across the workers
std::mt19937 g{ 271 }; // Use fixed seed to obtain the same shuffle
std::shuffle(tasks.begin(), tasks.end(), g);
std::cout << get_color_rank(rank) << "* Number of tasks: " << tasks.size() << std::endl;
std::cout << "|";
for (int i = 0; i < tasks.size(); ++i) {
std::cout << (i + 1) % 10;
}
std::cout << "|" << std::endl << "|" << std::flush;
return tasks;
}
void process_task_mpi_consumer(struct ConfigGrid& config, struct ConfigMPI& config_mpi, json& tasks, int n_task, Datasets& datasets, Task_Result* result)
{
// initialize
Timer timer;
timer.start();
json task = tasks[n_task];
auto model = config.model;
auto grid = GridData(Paths::grid_input(model));
auto dataset = task["dataset"].get<std::string>();
auto idx_dataset = task["idx_dataset"].get<int>();
auto seed = task["seed"].get<int>();
auto n_fold = task["fold"].get<int>();
bool stratified = config.stratified;
// Generate the hyperparamters combinations
auto combinations = grid.getGrid(dataset);
auto [X, y] = datasets.getTensors(dataset);
auto states = datasets.getStates(dataset);
auto features = datasets.getFeatures(dataset);
auto className = datasets.getClassName(dataset);
//
// Start working on task
//
Fold* fold;
if (stratified)
fold = new StratifiedKFold(config.n_folds, y, seed);
else
fold = new KFold(config.n_folds, y.size(0), seed);
auto [train, test] = fold->getFold(n_fold);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
auto X_train = X.index({ "...", train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
double best_fold_score = 0.0;
int best_idx_combination = -1;
json best_fold_hyper;
for (int idx_combination = 0; idx_combination < combinations.size(); ++idx_combination) {
auto hyperparam_line = combinations[idx_combination];
auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line);
Fold* nested_fold;
if (config.stratified)
nested_fold = new StratifiedKFold(config.nested, y_train, seed);
else
nested_fold = new KFold(config.nested, y_train.size(0), seed);
double score = 0.0;
for (int n_nested_fold = 0; n_nested_fold < config.nested; n_nested_fold++) {
// Nested level fold
auto [train_nested, test_nested] = nested_fold->getFold(n_nested_fold);
auto train_nested_t = torch::tensor(train_nested);
auto test_nested_t = torch::tensor(test_nested);
auto X_nested_train = X_train.index({ "...", train_nested_t });
auto y_nested_train = y_train.index({ train_nested_t });
auto X_nested_test = X_train.index({ "...", test_nested_t });
auto y_nested_test = y_train.index({ test_nested_t });
// Build Classifier with selected hyperparameters
auto clf = Models::instance()->create(config.model);
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, dataset);
clf->setHyperparameters(hyperparameters.get(dataset));
// Train model
clf->fit(X_nested_train, y_nested_train, features, className, states);
// Test model
score += clf->score(X_nested_test, y_nested_test);
}
delete nested_fold;
score /= config.nested;
if (score > best_fold_score) {
best_fold_score = score;
best_idx_combination = idx_combination;
best_fold_hyper = hyperparam_line;
}
}
delete fold;
// Build Classifier with the best hyperparameters to obtain the best score
auto hyperparameters = platform::HyperParameters(datasets.getNames(), best_fold_hyper);
auto clf = Models::instance()->create(config.model);
auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, dataset);
clf->setHyperparameters(best_fold_hyper);
clf->fit(X_train, y_train, features, className, states);
best_fold_score = clf->score(X_test, y_test);
// Return the result
result->idx_dataset = task["idx_dataset"].get<int>();
result->idx_combination = best_idx_combination;
result->score = best_fold_score;
result->n_fold = n_fold;
result->time = timer.getDuration();
// Update progress bar
std::cout << get_color_rank(config_mpi.rank) << "*" << std::flush;
}
json store_result(std::vector<std::string>& names, Task_Result& result, json& results)
{
json json_result = {
{ "score", result.score },
{ "combination", result.idx_combination },
{ "fold", result.n_fold },
{ "time", result.time },
{ "dataset", result.idx_dataset }
};
auto name = names[result.idx_dataset];
if (!results.contains(name)) {
results[name] = json::array();
}
results[name].push_back(json_result);
return results;
}
json producer(std::vector<std::string>& names, json& tasks, struct ConfigMPI& config_mpi, MPI_Datatype& MPI_Result)
{
Task_Result result;
json results;
int num_tasks = tasks.size();
//
// 2a.1 Producer will loop to send all the tasks to the consumers and receive the results
//
for (int i = 0; i < num_tasks; ++i) {
MPI_Status status;
MPI_Recv(&result, 1, MPI_Result, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == TAG_RESULT) {
//Store result
store_result(names, result, results);
}
MPI_Send(&i, 1, MPI_INT, status.MPI_SOURCE, TAG_TASK, MPI_COMM_WORLD);
}
//
// 2a.2 Producer will send the end message to all the consumers
//
for (int i = 0; i < config_mpi.n_procs - 1; ++i) {
MPI_Status status;
MPI_Recv(&result, 1, MPI_Result, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == TAG_RESULT) {
//Store result
store_result(names, result, results);
}
MPI_Send(&i, 1, MPI_INT, status.MPI_SOURCE, TAG_END, MPI_COMM_WORLD);
}
return results;
}
void select_best_results_folds(json& results, json& all_results, std::string& model)
{
Timer timer;
auto grid = GridData(Paths::grid_input(model));
//
// Select the best result of the computed outer folds
//
for (const auto& result : all_results.items()) {
// each result has the results of all the outer folds as each one were a different task
double best_score = 0.0;
json best;
for (const auto& result_fold : result.value()) {
double score = result_fold["score"].get<double>();
if (score > best_score) {
best_score = score;
best = result_fold;
}
}
auto dataset = result.key();
auto combinations = grid.getGrid(dataset);
json json_best = {
{ "score", best_score },
{ "hyperparameters", combinations[best["combination"].get<int>()] },
{ "date", get_date() + " " + get_time() },
{ "grid", grid.getInputGrid(dataset) },
{ "duration", timer.translate2String(best["time"].get<double>()) }
};
results[dataset] = json_best;
}
}
void consumer(Datasets& datasets, json& tasks, struct ConfigGrid& config, struct ConfigMPI& config_mpi, MPI_Datatype& MPI_Result)
{
Task_Result result;
//
// 2b.1 Consumers announce to the producer that they are ready to receive a task
//
MPI_Send(&result, 1, MPI_Result, config_mpi.manager, TAG_QUERY, MPI_COMM_WORLD);
int task;
while (true) {
MPI_Status status;
//
// 2b.2 Consumers receive the task from the producer and process it
//
MPI_Recv(&task, 1, MPI_INT, config_mpi.manager, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
if (status.MPI_TAG == TAG_END) {
break;
}
process_task_mpi_consumer(config, config_mpi, tasks, task, datasets, &result);
//
// 2b.3 Consumers send the result to the producer
//
MPI_Send(&result, 1, MPI_Result, config_mpi.manager, TAG_RESULT, MPI_COMM_WORLD);
}
}
void GridSearch::go(struct ConfigMPI& config_mpi)
{
/*
* Each task is a json object with the following structure:
* {
* "dataset": "dataset_name",
* "idx_dataset": idx_dataset, // used to identify the dataset in the results
* // this index is relative to the used datasets in the actual run not to the whole datasets
* "seed": # of seed to use,
* "Fold": # of fold to process
* }
*
* The overall process consists in these steps:
* 0. Create the MPI result type & tasks
* 0.1 Create the MPI result type
* 0.2 Manager creates the tasks
* 1. Manager will broadcast the tasks to all the processes
* 1.1 Broadcast the number of tasks
* 1.2 Broadcast the length of the following string
* 1.2 Broadcast the tasks as a char* string
* 2a. Producer delivers the tasks to the consumers
* 2a.1 Producer will loop to send all the tasks to the consumers and receive the results
* 2a.2 Producer will send the end message to all the consumers
* 2b. Consumers process the tasks and send the results to the producer
* 2b.1 Consumers announce to the producer that they are ready to receive a task
* 2b.2 Consumers receive the task from the producer and process it
* 2b.3 Consumers send the result to the producer
* 3. Manager select the bests sccores for each dataset
* 3.1 Loop thru all the results obtained from each outer fold (task) and select the best
* 3.2 Save the results
*/
//
// 0.1 Create the MPI result type
//
Task_Result result;
int tasks_size;
MPI_Datatype MPI_Result;
MPI_Datatype type[5] = { MPI_UNSIGNED, MPI_UNSIGNED, MPI_INT, MPI_DOUBLE, MPI_DOUBLE };
int blocklen[5] = { 1, 1, 1, 1, 1 };
MPI_Aint disp[5];
disp[0] = offsetof(Task_Result, idx_dataset);
disp[1] = offsetof(Task_Result, idx_combination);
disp[2] = offsetof(Task_Result, n_fold);
disp[3] = offsetof(Task_Result, score);
disp[4] = offsetof(Task_Result, time);
MPI_Type_create_struct(5, blocklen, disp, type, &MPI_Result);
MPI_Type_commit(&MPI_Result);
//
// 0.2 Manager creates the tasks
//
char* msg;
json tasks;
if (config_mpi.rank == config_mpi.manager) {
timer.start();
tasks = build_tasks_mpi(config_mpi.rank);
auto tasks_str = tasks.dump();
tasks_size = tasks_str.size();
msg = new char[tasks_size + 1];
strcpy(msg, tasks_str.c_str());
}
//
// 1. Manager will broadcast the tasks to all the processes
//
MPI_Bcast(&tasks_size, 1, MPI_INT, config_mpi.manager, MPI_COMM_WORLD);
if (config_mpi.rank != config_mpi.manager) {
msg = new char[tasks_size + 1];
}
MPI_Bcast(msg, tasks_size + 1, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD);
tasks = json::parse(msg);
delete[] msg;
auto datasets = Datasets(config.discretize, Paths::datasets());
if (config_mpi.rank == config_mpi.manager) {
//
// 2a. Producer delivers the tasks to the consumers
//
auto datasets_names = filterDatasets(datasets);
json all_results = producer(datasets_names, tasks, config_mpi, MPI_Result);
std::cout << get_color_rank(config_mpi.rank) << "|" << std::endl;
//
// 3. Manager select the bests sccores for each dataset
//
auto results = initializeResults();
select_best_results_folds(results, all_results, config.model);
//
// 3.2 Save the results
//
save(results);
} else {
//
// 2b. Consumers process the tasks and send the results to the producer
//
consumer(datasets, tasks, config, config_mpi, MPI_Result);
}
}
json GridSearch::initializeResults()
{
// Load previous results if continue is set
json results;
if (config.continue_from != NO_CONTINUE()) {
if (!config.quiet)
std::cout << "* Loading previous results" << std::endl;
try {
std::ifstream file(Paths::grid_output(config.model));
if (file.is_open()) {
results = json::parse(file);
results = results["results"];
}
}
catch (const std::exception& e) {
std::cerr << "* There were no previous results" << std::endl;
std::cerr << "* Initizalizing new results" << std::endl;
results = json();
}
}
return results;
}
void GridSearch::save(json& results)
{
std::ofstream file(Paths::grid_output(config.model));
json output = {
{ "model", config.model },
{ "score", config.score },
{ "discretize", config.discretize },
{ "stratified", config.stratified },
{ "n_folds", config.n_folds },
{ "seeds", config.seeds },
{ "date", get_date() + " " + get_time()},
{ "nested", config.nested},
{ "platform", config.platform },
{ "duration", timer.getDurationString(true)},
{ "results", results }
};
file << output.dump(4);
}
} /* namespace platform */

60
src/Platform/GridSearch.h Normal file
View File

@@ -0,0 +1,60 @@
#ifndef GRIDSEARCH_H
#define GRIDSEARCH_H
#include <string>
#include <map>
#include <mpi.h>
#include <nlohmann/json.hpp>
#include "Datasets.h"
#include "HyperParameters.h"
#include "GridData.h"
#include "Timer.h"
namespace platform {
using json = nlohmann::json;
struct ConfigGrid {
std::string model;
std::string score;
std::string continue_from;
std::string platform;
bool quiet;
bool only; // used with continue_from to only compute that dataset
bool discretize;
bool stratified;
int nested;
int n_folds;
json excluded;
std::vector<int> seeds;
};
struct ConfigMPI {
int rank;
int n_procs;
int manager;
};
typedef struct {
uint idx_dataset;
uint idx_combination;
int n_fold;
double score;
double time;
} Task_Result;
const int TAG_QUERY = 1;
const int TAG_RESULT = 2;
const int TAG_TASK = 3;
const int TAG_END = 4;
class GridSearch {
public:
explicit GridSearch(struct ConfigGrid& config);
void go(struct ConfigMPI& config_mpi);
~GridSearch() = default;
json loadResults();
static inline std::string NO_CONTINUE() { return "NO_CONTINUE"; }
private:
void save(json& results);
json initializeResults();
std::vector<std::string> filterDatasets(Datasets& datasets) const;
struct ConfigGrid config;
json build_tasks_mpi(int rank);
Timer timer; // used to measure the time of the whole process
};
} /* namespace platform */
#endif /* GRIDSEARCH_H */

View File

@@ -0,0 +1,55 @@
#include "HyperParameters.h"
#include <fstream>
#include <sstream>
#include <iostream>
namespace platform {
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_)
{
// Initialize all datasets with the given hyperparameters
for (const auto& item : datasets) {
hyperparameters[item] = hyperparameters_;
}
}
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/
std::string join(std::vector<std::string> const& strings, std::string delim)
{
std::stringstream ss;
std::copy(strings.begin(), strings.end(),
std::ostream_iterator<std::string>(ss, delim.c_str()));
return ss.str();
}
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file)
{
// Check if file exists
std::ifstream file(hyperparameters_file);
if (!file.is_open()) {
throw std::runtime_error("File " + hyperparameters_file + " not found");
}
// Check if file is a json
json input_hyperparameters = json::parse(file);
// Check if hyperparameters are valid
for (const auto& dataset : datasets) {
if (!input_hyperparameters.contains(dataset)) {
std::cerr << "*Warning: Dataset " << dataset << " not found in hyperparameters file" << " assuming default hyperparameters" << std::endl;
hyperparameters[dataset] = json({});
continue;
}
hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get<json>();
}
}
void HyperParameters::check(const std::vector<std::string>& valid, const std::string& fileName)
{
json result = hyperparameters.at(fileName);
for (const auto& item : result.items()) {
if (find(valid.begin(), valid.end(), item.key()) == valid.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid. Passed Hyperparameters are: "
+ result.dump(4) + "\n Valid hyperparameters are: {" + join(valid, ",") + "}");
}
}
}
json HyperParameters::get(const std::string& fileName)
{
return hyperparameters.at(fileName);
}
} /* namespace platform */

View File

@@ -0,0 +1,23 @@
#ifndef HYPERPARAMETERS_H
#define HYPERPARAMETERS_H
#include <string>
#include <map>
#include <vector>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
class HyperParameters {
public:
HyperParameters() = default;
explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_);
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file);
~HyperParameters() = default;
bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); }
void check(const std::vector<std::string>& valid, const std::string& fileName);
json get(const std::string& fileName);
private:
std::map<std::string, json> hyperparameters;
};
} /* namespace platform */
#endif /* HYPERPARAMETERS_H */

View File

@@ -0,0 +1,213 @@
#include "ManageResults.h"
#include "CommandParser.h"
#include <filesystem>
#include <tuple>
#include "Colors.h"
#include "CLocale.h"
#include "Paths.h"
#include "ReportConsole.h"
#include "ReportExcel.h"
namespace platform {
ManageResults::ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare) :
numFiles{ numFiles }, complete{ complete }, partial{ partial }, compare{ compare }, results(Results(Paths::results(), model, score, complete, partial))
{
indexList = true;
openExcel = false;
workbook = NULL;
if (numFiles == 0) {
this->numFiles = results.size();
}
}
void ManageResults::doMenu()
{
if (results.empty()) {
std::cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl;
return;
}
results.sortDate();
list();
menu();
if (openExcel) {
workbook_close(workbook);
}
std::cout << Colors::RESET() << "Done!" << std::endl;
}
void ManageResults::list()
{
auto temp = ConfigLocale();
std::string suffix = numFiles != results.size() ? " of " + std::to_string(results.size()) : "";
std::stringstream oss;
oss << "Results on screen: " << numFiles << suffix;
std::cout << Colors::GREEN() << oss.str() << std::endl;
std::cout << std::string(oss.str().size(), '-') << std::endl;
if (complete) {
std::cout << Colors::MAGENTA() << "Only listing complete results" << std::endl;
}
if (partial) {
std::cout << Colors::MAGENTA() << "Only listing partial results" << std::endl;
}
auto i = 0;
int maxModel = results.maxModelSize();
std::cout << Colors::GREEN() << " # Date " << std::setw(maxModel) << std::left << "Model" << " Score Name Score C/P Duration Title" << std::endl;
std::cout << "=== ========== " << std::string(maxModel, '=') << " =========== =========== === ========= =============================================================" << std::endl;
bool odd = true;
for (auto& result : results) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
std::cout << result.to_string(maxModel) << std::endl;
if (i == numFiles) {
break;
}
odd = !odd;
}
}
bool ManageResults::confirmAction(const std::string& intent, const std::string& fileName) const
{
std::string color;
if (intent == "delete") {
color = Colors::RED();
} else {
color = Colors::YELLOW();
}
std::string line;
bool finished = false;
while (!finished) {
std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): ";
getline(std::cin, line);
finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n'));
}
if (tolower(line[0]) == 'y') {
return true;
}
std::cout << "Not done!" << std::endl;
return false;
}
void ManageResults::report(const int index, const bool excelReport)
{
std::cout << Colors::YELLOW() << "Reporting " << results.at(index).getFilename() << std::endl;
auto data = results.at(index).load();
if (excelReport) {
ReportExcel reporter(data, compare, workbook);
reporter.show();
openExcel = true;
workbook = reporter.getWorkbook();
std::cout << "Adding sheet to " << Paths::excel() + Paths::excelResults() << std::endl;
} else {
ReportConsole reporter(data, compare);
reporter.show();
}
}
void ManageResults::showIndex(const int index, const int idx)
{
// Show a dataset result inside a report
auto data = results.at(index).load();
std::cout << Colors::YELLOW() << "Showing " << results.at(index).getFilename() << std::endl;
ReportConsole reporter(data, compare, idx);
reporter.show();
}
void ManageResults::sortList()
{
std::cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): ";
std::string line;
char option;
getline(std::cin, line);
if (line.size() == 0)
return;
if (line.size() > 1) {
std::cout << "Invalid option" << std::endl;
return;
}
option = line[0];
switch (option) {
case 'd':
results.sortDate();
break;
case 's':
results.sortScore();
break;
case 'u':
results.sortDuration();
break;
case 'm':
results.sortModel();
break;
default:
std::cout << "Invalid option" << std::endl;
}
}
void ManageResults::menu()
{
char option;
int index, subIndex;
bool finished = false;
std::string filename;
// tuple<Option, digit, requires value>
std::vector<std::tuple<std::string, char, bool>> mainOptions = {
{"quit", 'q', false},
{"list", 'l', false},
{"delete", 'd', true},
{"hide", 'h', true},
{"sort", 's', false},
{"report", 'r', true},
{"excel", 'e', true}
};
std::vector<std::tuple<std::string, char, bool>> listOptions = {
{"report", 'r', true},
{"list", 'l', false},
{"quit", 'q', false}
};
auto parser = CommandParser();
while (!finished) {
if (indexList) {
std::tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1);
} else {
std::tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).load()["results"].size() - 1);
}
switch (option) {
case 'q':
finished = true;
break;
case 'l':
list();
indexList = true;
break;
case 'd':
filename = results.at(index).getFilename();
if (!confirmAction("delete", filename))
break;
std::cout << "Deleting " << filename << std::endl;
results.deleteResult(index);
std::cout << "File: " + filename + " deleted!" << std::endl;
list();
break;
case 'h':
filename = results.at(index).getFilename();
if (!confirmAction("hide", filename))
break;
filename = results.at(index).getFilename();
std::cout << "Hiding " << filename << std::endl;
results.hideResult(index, Paths::hiddenResults());
std::cout << "File: " + filename + " hidden! (moved to " << Paths::hiddenResults() << ")" << std::endl;
list();
break;
case 's':
sortList();
list();
break;
case 'r':
if (indexList) {
report(index, false);
indexList = false;
} else {
showIndex(index, subIndex);
}
break;
case 'e':
report(index, true);
break;
}
}
}
} /* namespace platform */

View File

@@ -0,0 +1,31 @@
#ifndef MANAGE_RESULTS_H
#define MANAGE_RESULTS_H
#include "Results.h"
#include "xlsxwriter.h"
namespace platform {
class ManageResults {
public:
ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare);
~ManageResults() = default;
void doMenu();
private:
void list();
bool confirmAction(const std::string& intent, const std::string& fileName) const;
void report(const int index, const bool excelReport);
void showIndex(const int index, const int idx);
void sortList();
void menu();
int numFiles;
bool indexList;
bool openExcel;
bool complete;
bool partial;
bool compare;
Results results;
lxw_workbook* workbook;
};
}
#endif /* MANAGE_RESULTS_H */

View File

@@ -1,6 +1,5 @@
#include "Models.h"
namespace platform {
using namespace std;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
Models* Models::factory = nullptr;;
Models* Models::instance()
@@ -10,13 +9,13 @@ namespace platform {
factory = new Models();
return factory;
}
void Models::registerFactoryFunction(const string& name,
void Models::registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
functionRegistry[name] = classFactoryFunction;
}
shared_ptr<bayesnet::BaseClassifier> Models::create(const string& name)
shared_ptr<bayesnet::BaseClassifier> Models::create(const std::string& name)
{
bayesnet::BaseClassifier* instance = nullptr;
@@ -30,23 +29,22 @@ namespace platform {
else
return nullptr;
}
vector<string> Models::getNames()
std::vector<std::string> Models::getNames()
{
vector<string> names;
std::vector<std::string> names;
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
[](const pair<string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
[](const pair<std::string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
return names;
}
string Models::toString()
std::string Models::tostring()
{
string result = "";
std::string result = "";
for (const auto& pair : functionRegistry) {
result += pair.first + ", ";
}
return "{" + result.substr(0, result.size() - 2) + "}";
}
Registrar::Registrar(const string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
Registrar::Registrar(const std::string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
Models::instance()->registerFactoryFunction(name, classFactoryFunction);

View File

@@ -11,10 +11,14 @@
#include "SPODELd.h"
#include "AODELd.h"
#include "BoostAODE.h"
#include "STree.h"
#include "ODTE.h"
#include "SVC.h"
#include "RandomForest.h"
namespace platform {
class Models {
private:
map<string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
static Models* factory; //singleton
Models() {};
public:
@@ -22,16 +26,16 @@ namespace platform {
void operator=(const Models&) = delete;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
static Models* instance();
shared_ptr<bayesnet::BaseClassifier> create(const string& name);
void registerFactoryFunction(const string& name,
shared_ptr<bayesnet::BaseClassifier> create(const std::string& name);
void registerFactoryFunction(const std::string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
vector<string> getNames();
string toString();
std::vector<string> getNames();
std::string tostring();
};
class Registrar {
public:
Registrar(const string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
};
}
#endif

View File

@@ -1,12 +1,39 @@
#ifndef PATHS_H
#define PATHS_H
#include <string>
#include <filesystem>
#include "DotEnv.h"
namespace platform {
class Paths {
public:
static std::string datasets() { return "datasets/"; }
static std::string results() { return "results/"; }
static std::string hiddenResults() { return "hidden_results/"; }
static std::string excel() { return "excel/"; }
static std::string grid() { return "grid/"; }
static std::string datasets()
{
auto env = platform::DotEnv();
return env.get("source_data");
}
static void createPath(const std::string& path)
{
// Create directory if it does not exist
try {
std::filesystem::create_directory(path);
}
catch (std::exception& e) {
throw std::runtime_error("Could not create directory " + path);
}
}
static std::string excelResults() { return "some_results.xlsx"; }
static std::string grid_input(const std::string& model)
{
return grid() + "grid_" + model + "_input.json";
}
static std::string grid_output(const std::string& model)
{
return grid() + "grid_" + model + "_output.json";
}
};
}
#endif

View File

@@ -2,14 +2,13 @@
#include <locale>
#include "Datasets.h"
#include "ReportBase.h"
#include "BestResult.h"
#include "DotEnv.h"
namespace platform {
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
{
stringstream oss;
oss << "Better than ZeroR + " << setprecision(1) << fixed << margin * 100 << "%";
std::stringstream oss;
oss << "Better than ZeroR + " << std::setprecision(1) << fixed << margin * 100 << "%";
meaning = {
{Symbols::equal_best, "Equal to best"},
{Symbols::better_best, "Better than best"},
@@ -17,10 +16,10 @@ namespace platform {
{Symbols::upward_arrow, oss.str()}
};
}
string ReportBase::fromVector(const string& key)
std::string ReportBase::fromVector(const std::string& key)
{
stringstream oss;
string sep = "";
std::stringstream oss;
std::string sep = "";
oss << "[";
for (auto& item : data[key]) {
oss << sep << item.get<double>();
@@ -29,13 +28,13 @@ namespace platform {
oss << "]";
return oss.str();
}
string ReportBase::fVector(const string& title, const json& data, const int width, const int precision)
std::string ReportBase::fVector(const std::string& title, const json& data, const int width, const int precision)
{
stringstream oss;
string sep = "";
std::stringstream oss;
std::string sep = "";
oss << title << "[";
for (const auto& item : data) {
oss << sep << fixed << setw(width) << setprecision(precision) << item.get<double>();
oss << sep << fixed << setw(width) << std::setprecision(precision) << item.get<double>();
sep = ", ";
}
oss << "]";
@@ -46,25 +45,25 @@ namespace platform {
header();
body();
}
string ReportBase::compareResult(const string& dataset, double result)
std::string ReportBase::compareResult(const std::string& dataset, double result)
{
string status = " ";
std::string status = " ";
if (compare) {
double best = bestResult(dataset, data["model"].get<string>());
double best = bestResult(dataset, data["model"].get<std::string>());
if (result == best) {
status = Symbols::equal_best;
} else if (result > best) {
status = Symbols::better_best;
}
} else {
if (data["score_name"].get<string>() == "accuracy") {
auto dt = Datasets(Paths::datasets(), false);
if (data["score_name"].get<std::string>() == "accuracy") {
auto dt = Datasets(false, Paths::datasets());
dt.loadDataset(dataset);
auto numClasses = dt.getNClasses(dataset);
if (numClasses == 2) {
vector<int> distribution = dt.getClassesCounts(dataset);
std::vector<int> distribution = dt.getClassesCounts(dataset);
double nSamples = dt.getNSamples(dataset);
vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin);
if (mark > 1) {
mark = 0.9995;
@@ -83,17 +82,19 @@ namespace platform {
}
return status;
}
double ReportBase::bestResult(const string& dataset, const string& model)
double ReportBase::bestResult(const std::string& dataset, const std::string& model)
{
double value = 0.0;
if (bestResults.size() == 0) {
// try to load the best results
string score = data["score_name"];
std::string score = data["score_name"];
replace(score.begin(), score.end(), '_', '-');
string fileName = "best_results_" + score + "_" + model + ".json";
std::string fileName = "best_results_" + score + "_" + model + ".json";
ifstream resultData(Paths::results() + "/" + fileName);
if (resultData.is_open()) {
bestResults = json::parse(resultData);
} else {
existBestFile = false;
}
}
try {
@@ -101,7 +102,12 @@ namespace platform {
}
catch (exception) {
value = 1.0;
}
return value;
}
bool ReportBase::getExistBestFile()
{
return existBestFile;
}
}

View File

@@ -3,22 +3,12 @@
#include <string>
#include <iostream>
#include "Paths.h"
#include "Symbols.h"
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
using namespace std;
class Symbols {
public:
inline static const string check_mark{ "\u2714" };
inline static const string exclamation{ "\u2757" };
inline static const string black_star{ "\u2605" };
inline static const string cross{ "\u2717" };
inline static const string upward_arrow{ "\u27B6" };
inline static const string down_arrow{ "\u27B4" };
inline static const string equal_best{ check_mark };
inline static const string better_best{ black_star };
};
class ReportBase {
public:
explicit ReportBase(json data_, bool compare);
@@ -26,19 +16,21 @@ namespace platform {
void show();
protected:
json data;
string fromVector(const string& key);
string fVector(const string& title, const json& data, const int width, const int precision);
std::string fromVector(const std::string& key);
std::string fVector(const std::string& title, const json& data, const int width, const int precision);
bool getExistBestFile();
virtual void header() = 0;
virtual void body() = 0;
virtual void showSummary() = 0;
string compareResult(const string& dataset, double result);
map<string, int> summary;
std::string compareResult(const std::string& dataset, double result);
std::map<std::string, int> summary;
double margin;
map<string, string> meaning;
private:
double bestResult(const string& dataset, const string& model);
std::map<std::string, std::string> meaning;
bool compare;
private:
double bestResult(const std::string& dataset, const std::string& model);
json bestResults;
bool existBestFile = true;
};
};
#endif

View File

@@ -1,43 +1,48 @@
#include <iostream>
#include <sstream>
#include <locale>
#include "ReportConsole.h"
#include "BestResult.h"
#include "BestScore.h"
#include "CLocale.h"
namespace platform {
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
string ReportConsole::headerLine(const string& text, int utf = 0)
std::string ReportConsole::headerLine(const std::string& text, int utf = 0)
{
int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n;
return "* " + text + string(n + utf, ' ') + "*\n";
return "* " + text + std::string(n + utf, ' ') + "*\n";
}
void ReportConsole::header()
{
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
stringstream oss;
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
cout << headerLine("Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>());
cout << headerLine(data["title"].get<string>());
cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
oss << "Execution took " << setprecision(2) << fixed << data["duration"].get<float>() << " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<string>();
cout << headerLine(oss.str());
cout << headerLine("Score is " + data["score_name"].get<string>());
cout << string(MAXL, '*') << endl;
cout << endl;
std::stringstream oss;
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
std::cout << headerLine(
"Report " + data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>()
+ " with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size())
+ " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
);
std::cout << headerLine(data["title"].get<std::string>());
std::cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
oss << "Execution took " << std::setprecision(2) << std::fixed << data["duration"].get<float>()
<< " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<std::string>();
std::cout << headerLine(oss.str());
std::cout << headerLine("Score is " + data["score_name"].get<std::string>());
std::cout << std::string(MAXL, '*') << std::endl;
std::cout << std::endl;
}
void ReportConsole::body()
{
cout << Colors::GREEN() << " # Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl;
cout << "=== ========================= ====== ===== === ========= ========= ========= =============== =================== ====================" << endl;
auto tmp = ConfigLocale();
int maxHyper = 15;
int maxDataset = 7;
for (const auto& r : data["results"]) {
maxHyper = std::max(maxHyper, (int)r["hyperparameters"].dump().size());
maxDataset = std::max(maxDataset, (int)r["dataset"].get<std::string>().size());
}
std::cout << Colors::GREEN() << " # " << std::setw(maxDataset) << std::left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << std::endl;
std::cout << "=== " << std::string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << std::string(maxHyper, '=') << std::endl;
json lastResult;
double totalScore = 0.0;
bool odd = true;
@@ -48,37 +53,33 @@ namespace platform {
continue;
}
auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color;
cout << setw(3) << index++ << " ";
cout << setw(25) << left << r["dataset"].get<string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>();
const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
cout << status;
cout << setw(12) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " ";
try {
cout << r["hyperparameters"].get<string>();
}
catch (const exception& err) {
cout << r["hyperparameters"];
}
cout << endl;
std::cout << color;
std::cout << std::setw(3) << std::right << index++ << " ";
std::cout << std::setw(maxDataset) << std::left << r["dataset"].get<std::string>() << " ";
std::cout << std::setw(6) << std::right << r["samples"].get<int>() << " ";
std::cout << std::setw(5) << std::right << r["features"].get<int>() << " ";
std::cout << std::setw(3) << std::right << r["classes"].get<int>() << " ";
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["nodes"].get<float>() << " ";
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["leaves"].get<float>() << " ";
std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["depth"].get<float>() << " ";
std::cout << std::setw(8) << std::right << std::setprecision(6) << std::fixed << r["score"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["score_std"].get<double>();
const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
std::cout << status;
std::cout << std::setw(12) << std::right << std::setprecision(6) << std::fixed << r["time"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["time_std"].get<double>() << " ";
std::cout << r["hyperparameters"].dump();
std::cout << std::endl;
std::cout << std::flush;
lastResult = r;
totalScore += r["score"].get<double>();
odd = !odd;
}
if (data["results"].size() == 1 || selectedIndex != -1) {
cout << string(MAXL, '*') << endl;
cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
cout << string(MAXL, '*') << endl;
std::cout << std::string(MAXL, '*') << std::endl;
std::cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
std::cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
std::cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
std::cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
std::cout << std::string(MAXL, '*') << std::endl;
} else {
footer(totalScore);
}
@@ -86,24 +87,28 @@ namespace platform {
void ReportConsole::showSummary()
{
for (const auto& item : summary) {
stringstream oss;
oss << setw(3) << left << item.first;
oss << setw(3) << right << item.second << " ";
oss << left << meaning.at(item.first);
cout << headerLine(oss.str(), 2);
std::stringstream oss;
oss << std::setw(3) << std::left << item.first;
oss << std::setw(3) << std::right << item.second << " ";
oss << std::left << meaning.at(item.first);
std::cout << headerLine(oss.str(), 2);
}
}
void ReportConsole::footer(double totalScore)
{
cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
showSummary();
auto score = data["score_name"].get<string>();
if (score == BestResult::scoreName()) {
stringstream oss;
oss << score << " compared to " << BestResult::title() << " .: " << totalScore / BestResult::score();
cout << headerLine(oss.str());
auto score = data["score_name"].get<std::string>();
auto best = BestScore::getScore(score);
if (best.first != "") {
std::stringstream oss;
oss << score << " compared to " << best.first << " .: " << totalScore / best.second;
std::cout << headerLine(oss.str());
}
cout << string(MAXL, '*') << endl << Colors::RESET();
if (!getExistBestFile() && compare) {
std::cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
}
std::cout << std::string(MAXL, '*') << std::endl << Colors::RESET();
}
}

View File

@@ -1,12 +1,10 @@
#ifndef REPORTCONSOLE_H
#define REPORTCONSOLE_H
#include <string>
#include <iostream>
#include "ReportBase.h"
#include "Colors.h"
namespace platform {
using namespace std;
const int MAXL = 133;
class ReportConsole : public ReportBase {
public:
@@ -14,11 +12,11 @@ namespace platform {
virtual ~ReportConsole() = default;
private:
int selectedIndex;
string headerLine(const string& text, int utf);
std::string headerLine(const std::string& text, int utf);
void header() override;
void body() override;
void footer(double totalScore);
void showSummary();
void showSummary() override;
};
};
#endif

View File

@@ -1,200 +1,54 @@
#include <sstream>
#include <locale>
#include "ReportExcel.h"
#include "BestResult.h"
#include "BestScore.h"
namespace platform {
struct separated : numpunct<char> {
char do_decimal_point() const { return ','; }
char do_thousands_sep() const { return '.'; }
string do_grouping() const { return "\03"; }
};
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook) : ReportBase(data_, compare), row(0), workbook(workbook)
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet) : ReportBase(data_, compare), ExcelFile(workbook, worksheet)
{
normalSize = 14; //font size for report body
colorTitle = 0xB1A0C7;
colorOdd = 0xDCE6F1;
colorEven = 0xFDE9D9;
createFile();
}
lxw_workbook* ReportExcel::getWorkbook()
{
return workbook;
}
lxw_format* ReportExcel::efectiveStyle(const string& style)
{
lxw_format* efectiveStyle;
if (style == "") {
efectiveStyle = NULL;
} else {
string suffix = row % 2 ? "_odd" : "_even";
efectiveStyle = styles.at(style + suffix);
}
return efectiveStyle;
}
void ReportExcel::writeString(int row, int col, const string& text, const string& style)
{
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
}
void ReportExcel::writeInt(int row, int col, const int number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::writeDouble(int row, int col, const double number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 6, 1);
vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
std::vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void ReportExcel::addColor(lxw_format* style, bool odd)
void ReportExcel::createWorksheet()
{
uint32_t efectiveColor = odd ? colorEven : colorOdd;
format_set_bg_color(style, lxw_color_t(efectiveColor));
}
void ReportExcel::createStyle(const string& name, lxw_format* style, bool odd)
{
addColor(style, odd);
if (name == "textCentered") {
format_set_align(style, LXW_ALIGN_CENTER);
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "text") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "bodyHeader") {
format_set_bold(style);
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_CENTER);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
format_set_bg_color(style, lxw_color_t(colorTitle));
} else if (name == "result") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "0.0000000");
} else if (name == "time") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "#,##0.000000");
} else if (name == "ints") {
format_set_font_size(style, normalSize);
format_set_num_format(style, "###,##0");
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "floats") {
format_set_border(style, LXW_BORDER_THIN);
format_set_font_size(style, normalSize);
format_set_num_format(style, "#,##0.00");
}
}
void ReportExcel::createFormats()
{
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
lxw_format* style;
for (string name : styleNames) {
lxw_format* style = workbook_add_format(workbook);
style = workbook_add_format(workbook);
createStyle(name, style, true);
styles[name + "_odd"] = style;
style = workbook_add_format(workbook);
createStyle(name, style, false);
styles[name + "_even"] = style;
}
// Header 1st line
lxw_format* headerFirst = workbook_add_format(workbook);
format_set_bold(headerFirst);
format_set_font_size(headerFirst, 18);
format_set_align(headerFirst, LXW_ALIGN_CENTER);
format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerFirst, LXW_BORDER_THIN);
format_set_bg_color(headerFirst, lxw_color_t(colorTitle));
// Header rest
lxw_format* headerRest = workbook_add_format(workbook);
format_set_bold(headerRest);
format_set_align(headerRest, LXW_ALIGN_CENTER);
format_set_font_size(headerRest, 16);
format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerRest, LXW_BORDER_THIN);
format_set_bg_color(headerRest, lxw_color_t(colorOdd));
// Header small
lxw_format* headerSmall = workbook_add_format(workbook);
format_set_bold(headerSmall);
format_set_align(headerSmall, LXW_ALIGN_LEFT);
format_set_font_size(headerSmall, 12);
format_set_border(headerSmall, LXW_BORDER_THIN);
format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER);
format_set_bg_color(headerSmall, lxw_color_t(colorOdd));
// Summary style
lxw_format* summaryStyle = workbook_add_format(workbook);
format_set_bold(summaryStyle);
format_set_font_size(summaryStyle, 16);
format_set_border(summaryStyle, LXW_BORDER_THIN);
format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER);
styles["headerFirst"] = headerFirst;
styles["headerRest"] = headerRest;
styles["headerSmall"] = headerSmall;
styles["summaryStyle"] = summaryStyle;
}
void ReportExcel::setProperties()
{
char line[data["title"].get<string>().size() + 1];
strcpy(line, data["title"].get<string>().c_str());
lxw_doc_properties properties = {
.title = line,
.subject = "Machine learning results",
.author = "Ricardo Montañana Gómez",
.manager = "Dr. J. A. Gámez, Dr. J. M. Puerta",
.company = "UCLM",
.comments = "Created with libxlsxwriter and c++",
};
workbook_set_properties(workbook, &properties);
}
void ReportExcel::createFile()
{
if (workbook == NULL) {
workbook = workbook_new((Paths::excel() + fileName).c_str());
}
const string name = data["model"].get<string>();
string suffix = "";
string efectiveName;
const std::string name = data["model"].get<std::string>();
std::string suffix = "";
std::string efectiveName;
int num = 1;
// Create a sheet with the name of the model
while (true) {
efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = to_string(++num);
suffix = std::to_string(++num);
} else {
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break;
}
if (num > 100) {
throw invalid_argument("Couldn't create sheet " + efectiveName);
throw std::invalid_argument("Couldn't create sheet " + efectiveName);
}
}
cout << "Adding sheet " << efectiveName << " to " << Paths::excel() + fileName << endl;
setProperties();
}
void ReportExcel::createFile()
{
if (workbook == NULL) {
workbook = workbook_new((Paths::excel() + Paths::excelResults()).c_str());
}
if (worksheet == NULL) {
createWorksheet();
}
setProperties(data["title"].get<std::string>());
createFormats();
formatColumns();
}
@@ -206,26 +60,26 @@ namespace platform {
void ReportExcel::header()
{
locale mylocale(cout.getloc(), new separated);
locale::global(mylocale);
cout.imbue(mylocale);
stringstream oss;
string message = data["model"].get<string>() + " ver. " + data["version"].get<string>() + " " +
data["language"].get<string>() + " ver. " + data["language_version"].get<string>() +
" with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>();
std::locale mylocale(std::cout.getloc(), new separated);
std::locale::global(mylocale);
std::cout.imbue(mylocale);
std::stringstream oss;
std::string message = data["model"].get<std::string>() + " ver. " + data["version"].get<std::string>() + " " +
data["language"].get<std::string>() + " ver. " + data["language_version"].get<std::string>() +
" with " + std::to_string(data["folds"].get<int>()) + " Folds cross validation and " + std::to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>();
worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<string>()).c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<std::string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<std::string>()).c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
oss << setprecision(2) << fixed << data["duration"].get<float>() << " s";
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() << " s";
worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
oss.str("");
oss.clear();
oss << setprecision(2) << fixed << data["duration"].get<float>() / 3600 << " h";
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() / 3600 << " h";
worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<std::string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
@@ -239,7 +93,7 @@ namespace platform {
void ReportExcel::body()
{
auto head = vector<string>(
auto head = std::vector<std::string>(
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time",
"Time Std.", "Hyperparameters" });
int col = 0;
@@ -251,9 +105,9 @@ namespace platform {
int hypSize = 22;
json lastResult;
double totalScore = 0.0;
string hyperparameters;
std::string hyperparameters;
for (const auto& r : data["results"]) {
writeString(row, col, r["dataset"].get<string>(), "text");
writeString(row, col, r["dataset"].get<std::string>(), "text");
writeInt(row, col + 1, r["samples"].get<int>(), "ints");
writeInt(row, col + 2, r["features"].get<int>(), "ints");
writeInt(row, col + 3, r["classes"].get<int>(), "ints");
@@ -262,18 +116,11 @@ namespace platform {
writeDouble(row, col + 6, r["depth"].get<double>(), "floats");
writeDouble(row, col + 7, r["score"].get<double>(), "result");
writeDouble(row, col + 8, r["score_std"].get<double>(), "result");
const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
writeString(row, col + 9, status, "textCentered");
writeDouble(row, col + 10, r["time"].get<double>(), "time");
writeDouble(row, col + 11, r["time_std"].get<double>(), "time");
try {
hyperparameters = r["hyperparameters"].get<string>();
}
catch (const exception& err) {
stringstream oss;
oss << r["hyperparameters"];
hyperparameters = oss.str();
}
hyperparameters = r["hyperparameters"].dump();
if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
@@ -281,18 +128,17 @@ namespace platform {
lastResult = r;
totalScore += r["score"].get<double>();
row++;
}
// Set the right column width of hyperparameters with the maximum length
worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL);
// Show totals if only one dataset is present in the result
if (data["results"].size() == 1) {
for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
for (const std::string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
row++;
col = 1;
writeString(row, col, group, "text");
for (double item : lastResult[group]) {
string style = group.find("scores") != string::npos ? "result" : "time";
std::string style = group.find("scores") != std::string::npos ? "result" : "time";
writeDouble(row, ++col, item, style);
}
}
@@ -321,10 +167,14 @@ namespace platform {
{
showSummary();
row += 4 + summary.size();
auto score = data["score_name"].get<string>();
if (score == BestResult::scoreName()) {
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestResult::title() + " .:").c_str(), efectiveStyle("text"));
writeDouble(row, 6, totalScore / BestResult::score(), "result");
auto score = data["score_name"].get<std::string>();
auto best = BestScore::getScore(score);
if (best.first != "") {
worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + best.first + " .:").c_str(), efectiveStyle("text"));
writeDouble(row, 6, totalScore / best.second, "result");
}
if (!getExistBestFile() && compare) {
worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]);
}
}
}

View File

@@ -3,40 +3,22 @@
#include<map>
#include "xlsxwriter.h"
#include "ReportBase.h"
#include "ExcelFile.h"
#include "Colors.h"
namespace platform {
using namespace std;
const int MAXLL = 128;
class ReportExcel : public ReportBase {
class ReportExcel : public ReportBase, public ExcelFile {
public:
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook);
lxw_workbook* getWorkbook();
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL);
private:
void writeString(int row, int col, const string& text, const string& style = "");
void writeInt(int row, int col, const int number, const string& style = "");
void writeDouble(int row, int col, const double number, const string& style = "");
void formatColumns();
void createFormats();
void setProperties();
void createFile();
void createWorksheet();
void closeFile();
void showSummary();
lxw_workbook* workbook;
lxw_worksheet* worksheet;
map<string, lxw_format*> styles;
int row;
int normalSize; //font size for report body
uint32_t colorTitle;
uint32_t colorOdd;
uint32_t colorEven;
const string fileName = "some_results.xlsx";
void header() override;
void body() override;
void showSummary() override;
void footer(double totalScore, int row);
void createStyle(const string& name, lxw_format* style, bool odd);
void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const string& name);
};
};
#endif // !REPORTEXCEL_H

58
src/Platform/Result.cc Normal file
View File

@@ -0,0 +1,58 @@
#include "Result.h"
#include "BestScore.h"
#include <filesystem>
#include <fstream>
#include <sstream>
#include "Colors.h"
#include "DotEnv.h"
#include "CLocale.h"
namespace platform {
Result::Result(const std::string& path, const std::string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
auto best = BestScore::getScore(scoreName);
if (best.first != "") {
score /= best.second;
}
title = data["title"];
duration = data["duration"];
model = data["model"];
complete = data["results"].size() > 1;
}
json Result::load() const
{
std::ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw std::invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
std::string Result::to_string(int maxModel) const
{
auto tmp = ConfigLocale();
std::stringstream oss;
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
oss << date << " ";
oss << std::setw(maxModel) << std::left << model << " ";
oss << std::setw(11) << std::left << scoreName << " ";
oss << std::right << std::setw(11) << std::setprecision(7) << std::fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << std::setw(1) << " " << completeString << " ";
oss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
oss << std::setw(50) << std::left << title << " ";
return oss.str();
}
}

Some files were not shown because too many files have changed in this diff Show More