From 55af0714cd2ec37815c48408324a5bf8acc560cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sun, 7 Jan 2024 19:58:22 +0100 Subject: [PATCH] Remove other projects' sources --- .clang-uml | 31 - .gitmodules | 8 +- CMakeLists.txt | 32 +- Makefile | 46 +- README.md | 77 +- config/config.h.in | 1 + data/glass.net | 25 - data/mfeat-factors-kdb2.net | 645 -------------- data/mfeat-factors-kdb3.net | 859 ------------------- data/mfeat-factors.net | 859 ------------------- gcovr.cfg | 2 +- grid_stree.json | 162 ---- lib/folding | 1 + lib/libxlsxwriter | 1 - sample/CMakeLists.txt | 10 - sample/sample.cc | 235 ----- src/BayesNet/BoostAODE.cc | 6 +- src/BayesNet/CMakeLists.txt | 7 +- src/BayesNet/Network.h | 3 +- src/Platform/BestResults.cc | 343 -------- src/Platform/BestResults.h | 36 - src/Platform/BestResultsExcel.cc | 300 ------- src/Platform/BestResultsExcel.h | 39 - src/Platform/BestScore.h | 28 - src/Platform/CLocale.h | 22 - src/Platform/CMakeLists.txt | 25 - src/Platform/Colors.h | 15 - src/Platform/CommandParser.cc | 87 -- src/Platform/CommandParser.h | 20 - src/Platform/Dataset.cc | 215 ----- src/Platform/Dataset.h | 78 -- src/Platform/Datasets.cc | 129 --- src/Platform/Datasets.h | 30 - src/Platform/DotEnv.h | 55 -- src/Platform/ExcelFile.cc | 168 ---- src/Platform/ExcelFile.h | 43 - src/Platform/Experiment.cc | 226 ----- src/Platform/Experiment.h | 103 --- src/Platform/Folding.cc | 104 --- src/Platform/Folding.h | 39 - src/Platform/GridData.cc | 75 -- src/Platform/GridData.h | 26 - src/Platform/GridSearch.cc | 441 ---------- src/Platform/GridSearch.h | 60 -- src/Platform/HyperParameters.cc | 55 -- src/Platform/HyperParameters.h | 23 - src/Platform/ManageResults.cc | 213 ----- src/Platform/ManageResults.h | 31 - src/Platform/Models.cc | 52 -- src/Platform/Models.h | 41 - src/Platform/Paths.h | 39 - src/Platform/ReportBase.cc | 113 --- src/Platform/ReportBase.h | 36 - src/Platform/ReportConsole.cc | 114 --- src/Platform/ReportConsole.h | 22 - src/Platform/ReportExcel.cc | 180 ---- src/Platform/ReportExcel.h | 24 - src/Platform/Result.cc | 58 -- src/Platform/Result.h | 35 - src/Platform/Results.cc | 74 -- src/Platform/Results.h | 36 - src/Platform/Statistics.cc | 252 ------ src/Platform/Statistics.h | 63 -- src/Platform/Symbols.h | 17 - src/Platform/Timer.h | 43 - src/Platform/Utils.h | 30 - src/Platform/b_best.cc | 85 -- src/Platform/b_grid.cc | 232 ----- src/Platform/b_list.cc | 56 -- src/Platform/b_main.cc | 135 --- src/Platform/b_manage.cc | 49 -- src/Platform/modelRegister.h | 29 - src/PyClassifiers/CMakeLists.txt | 10 - src/PyClassifiers/ODTE.cc | 24 - src/PyClassifiers/ODTE.h | 17 - src/PyClassifiers/PyClassifier.cc | 96 --- src/PyClassifiers/PyClassifier.h | 56 -- src/PyClassifiers/PyClf.h | 15 - src/PyClassifiers/PyHelper.hpp | 87 -- src/PyClassifiers/PyWrap.cc | 255 ------ src/PyClassifiers/PyWrap.h | 49 -- src/PyClassifiers/Pyclf.cc | 18 - src/PyClassifiers/RandomForest.cc | 20 - src/PyClassifiers/RandomForest.h | 15 - src/PyClassifiers/STree.cc | 24 - src/PyClassifiers/STree.h | 17 - src/PyClassifiers/SVC.cc | 8 - src/PyClassifiers/SVC.h | 13 - src/PyClassifiers/TypeId.h | 6 - src/PyClassifiers/XGBoost.cc | 18 - src/PyClassifiers/XGBoost.h | 13 - stree_results.json | 835 ------------------ tests/CMakeLists.txt | 23 +- tests/TestBayesModels.cc | 8 +- tests/TestBayesNetwork.cc | 4 +- tests/TestFolding.cc | 95 -- tests/TestUtils.cc | 9 +- tests/TestUtils.h | 4 +- {data => tests/data}/diabetes.arff | 0 {data => tests/data}/ecoli.arff | 0 {data => tests/data}/glass.arff | 0 {data => tests/data}/iris.arff | 0 {data => tests/data}/iris.net | 0 {data => tests/data}/kdd_JapaneseVowels.arff | 0 {data => tests/data}/letter.arff | 0 {data => tests/data}/liver-disorders.arff | 0 {data => tests/data}/mfeat-factors.arff | 0 107 files changed, 49 insertions(+), 9544 deletions(-) delete mode 100644 .clang-uml delete mode 100644 data/glass.net delete mode 100644 data/mfeat-factors-kdb2.net delete mode 100644 data/mfeat-factors-kdb3.net delete mode 100644 data/mfeat-factors.net delete mode 100644 grid_stree.json create mode 160000 lib/folding delete mode 160000 lib/libxlsxwriter delete mode 100644 sample/CMakeLists.txt delete mode 100644 sample/sample.cc delete mode 100644 src/Platform/BestResults.cc delete mode 100644 src/Platform/BestResults.h delete mode 100644 src/Platform/BestResultsExcel.cc delete mode 100644 src/Platform/BestResultsExcel.h delete mode 100644 src/Platform/BestScore.h delete mode 100644 src/Platform/CLocale.h delete mode 100644 src/Platform/CMakeLists.txt delete mode 100644 src/Platform/Colors.h delete mode 100644 src/Platform/CommandParser.cc delete mode 100644 src/Platform/CommandParser.h delete mode 100644 src/Platform/Dataset.cc delete mode 100644 src/Platform/Dataset.h delete mode 100644 src/Platform/Datasets.cc delete mode 100644 src/Platform/Datasets.h delete mode 100644 src/Platform/DotEnv.h delete mode 100644 src/Platform/ExcelFile.cc delete mode 100644 src/Platform/ExcelFile.h delete mode 100644 src/Platform/Experiment.cc delete mode 100644 src/Platform/Experiment.h delete mode 100644 src/Platform/Folding.cc delete mode 100644 src/Platform/Folding.h delete mode 100644 src/Platform/GridData.cc delete mode 100644 src/Platform/GridData.h delete mode 100644 src/Platform/GridSearch.cc delete mode 100644 src/Platform/GridSearch.h delete mode 100644 src/Platform/HyperParameters.cc delete mode 100644 src/Platform/HyperParameters.h delete mode 100644 src/Platform/ManageResults.cc delete mode 100644 src/Platform/ManageResults.h delete mode 100644 src/Platform/Models.cc delete mode 100644 src/Platform/Models.h delete mode 100644 src/Platform/Paths.h delete mode 100644 src/Platform/ReportBase.cc delete mode 100644 src/Platform/ReportBase.h delete mode 100644 src/Platform/ReportConsole.cc delete mode 100644 src/Platform/ReportConsole.h delete mode 100644 src/Platform/ReportExcel.cc delete mode 100644 src/Platform/ReportExcel.h delete mode 100644 src/Platform/Result.cc delete mode 100644 src/Platform/Result.h delete mode 100644 src/Platform/Results.cc delete mode 100644 src/Platform/Results.h delete mode 100644 src/Platform/Statistics.cc delete mode 100644 src/Platform/Statistics.h delete mode 100644 src/Platform/Symbols.h delete mode 100644 src/Platform/Timer.h delete mode 100644 src/Platform/Utils.h delete mode 100644 src/Platform/b_best.cc delete mode 100644 src/Platform/b_grid.cc delete mode 100644 src/Platform/b_list.cc delete mode 100644 src/Platform/b_main.cc delete mode 100644 src/Platform/b_manage.cc delete mode 100644 src/Platform/modelRegister.h delete mode 100644 src/PyClassifiers/CMakeLists.txt delete mode 100644 src/PyClassifiers/ODTE.cc delete mode 100644 src/PyClassifiers/ODTE.h delete mode 100644 src/PyClassifiers/PyClassifier.cc delete mode 100644 src/PyClassifiers/PyClassifier.h delete mode 100644 src/PyClassifiers/PyClf.h delete mode 100644 src/PyClassifiers/PyHelper.hpp delete mode 100644 src/PyClassifiers/PyWrap.cc delete mode 100644 src/PyClassifiers/PyWrap.h delete mode 100644 src/PyClassifiers/Pyclf.cc delete mode 100644 src/PyClassifiers/RandomForest.cc delete mode 100644 src/PyClassifiers/RandomForest.h delete mode 100644 src/PyClassifiers/STree.cc delete mode 100644 src/PyClassifiers/STree.h delete mode 100644 src/PyClassifiers/SVC.cc delete mode 100644 src/PyClassifiers/SVC.h delete mode 100644 src/PyClassifiers/TypeId.h delete mode 100644 src/PyClassifiers/XGBoost.cc delete mode 100644 src/PyClassifiers/XGBoost.h delete mode 100644 stree_results.json delete mode 100644 tests/TestFolding.cc rename {data => tests/data}/diabetes.arff (100%) rename {data => tests/data}/ecoli.arff (100%) rename {data => tests/data}/glass.arff (100%) rename {data => tests/data}/iris.arff (100%) rename {data => tests/data}/iris.net (100%) rename {data => tests/data}/kdd_JapaneseVowels.arff (100%) rename {data => tests/data}/letter.arff (100%) rename {data => tests/data}/liver-disorders.arff (100%) rename {data => tests/data}/mfeat-factors.arff (100%) diff --git a/.clang-uml b/.clang-uml deleted file mode 100644 index a94c7c5..0000000 --- a/.clang-uml +++ /dev/null @@ -1,31 +0,0 @@ -compilation_database_dir: build -output_directory: puml -diagrams: - BayesNet: - type: class - glob: - - src/BayesNet/*.cc - - src/Platform/*.cc - using_namespace: bayesnet - include: - namespaces: - - bayesnet - - platform - plantuml: - after: - - "note left of {{ alias(\"MyProjectMain\") }}: Main class of myproject library." - sequence: - type: sequence - glob: - - src/Platform/main.cc - combine_free_functions_into_file_participants: true - using_namespace: - - std - - bayesnet - - platform - include: - paths: - - src/BayesNet - - src/Platform - start_from: - - function: main(int,const char **) diff --git a/.gitmodules b/.gitmodules index 6be5a87..57c8e07 100644 --- a/.gitmodules +++ b/.gitmodules @@ -18,8 +18,6 @@ url = https://github.com/nlohmann/json.git master = master update = merge -[submodule "lib/libxlsxwriter"] - path = lib/libxlsxwriter - url = https://github.com/jmcnamara/libxlsxwriter.git - main = main - update = merge +[submodule "lib/folding"] + path = lib/folding + url = https://github.com/rmontanana/folding diff --git a/CMakeLists.txt b/CMakeLists.txt index e33b67c..fc89da2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) project(BayesNet - VERSION 0.2.0 + VERSION 1.0.0 DESCRIPTION "Bayesian Network and basic classifiers Library." HOMEPAGE_URL "https://github.com/rmontanana/bayesnet" LANGUAGES CXX @@ -30,27 +30,6 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF) option(ENABLE_TESTING "Unit testing build" OFF) option(CODE_COVERAGE "Collect coverage from test library" OFF) -option(MPI_ENABLED "Enable MPI options" ON) - -if (MPI_ENABLED) - find_package(MPI REQUIRED) - message("MPI_CXX_LIBRARIES=${MPI_CXX_LIBRARIES}") - message("MPI_CXX_INCLUDE_DIRS=${MPI_CXX_INCLUDE_DIRS}") -endif (MPI_ENABLED) - -# Boost Library -set(Boost_USE_STATIC_LIBS OFF) -set(Boost_USE_MULTITHREADED ON) -set(Boost_USE_STATIC_RUNTIME OFF) -find_package(Boost 1.66.0 REQUIRED COMPONENTS python3 numpy3) -if(Boost_FOUND) - message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}") - include_directories(${Boost_INCLUDE_DIRS}) -endif() - -# Python -find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED) -message("Python3_LIBRARIES=${Python3_LIBRARIES}") # CMakes modules # -------------- @@ -76,23 +55,14 @@ add_git_submodule("lib/mdlp") add_git_submodule("lib/argparse") add_git_submodule("lib/json") - -find_library(XLSXWRITER_LIB NAMES libxlsxwriter.dylib libxlsxwriter.so PATHS ${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/lib) -message("XLSXWRITER_LIB=${XLSXWRITER_LIB}") - - # Subdirectories # -------------- add_subdirectory(config) add_subdirectory(lib/Files) add_subdirectory(src/BayesNet) -add_subdirectory(src/Platform) -add_subdirectory(src/PyClassifiers) -add_subdirectory(sample) file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h) file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cc ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cpp) -file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform/*.cc ${BayesNet_SOURCE_DIR}/src/Platform/*.cpp) # Testing # ------- diff --git a/Makefile b/Makefile index 0886104..2cda612 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ SHELL := /bin/bash .DEFAULT_GOAL := help -.PHONY: coverage setup help build test clean debug release +.PHONY: coverage setup help buildr buildd test clean debug release f_release = build_release f_debug = build_debug -app_targets = b_best b_list b_main b_manage b_grid -test_targets = unit_tests_bayesnet unit_tests_platform +app_targets = BayesNet +test_targets = unit_tests_bayesnet n_procs = -j 16 define ClearTests @@ -31,37 +31,22 @@ setup: ## Install dependencies for tests and coverage pip install gcovr; \ fi -dest ?= ${HOME}/bin -install: ## Copy binary files to bin folder - @echo "Destination folder: $(dest)" - make buildr - @echo "*******************************************" - @echo ">>> Copying files to $(dest)" - @echo "*******************************************" - @for item in $(app_targets); do \ - echo ">>> Copying $$item" ; \ - cp $(f_release)/src/Platform/$$item $(dest) ; \ - done - dependency: ## Create a dependency graph diagram of the project (build/dependency.png) @echo ">>> Creating dependency graph diagram of the project..."; $(MAKE) debug cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png buildd: ## Build the debug targets - cmake --build $(f_debug) -t $(app_targets) BayesNetSample $(n_procs) + cmake --build $(f_debug) -t $(app_targets) $(n_procs) buildr: ## Build the release targets - cmake --build $(f_release) -t $(app_targets) BayesNetSample $(n_procs) + cmake --build $(f_release) -t $(app_targets) $(n_procs) clean: ## Clean the tests info @echo ">>> Cleaning Debug BayesNet tests..."; $(call ClearTests) @echo ">>> Done"; -clang-uml: ## Create uml class and sequence diagrams - clang-uml -p --add-compile-flag -I /usr/lib/gcc/x86_64-redhat-linux/8/include/ - debug: ## Build a debug version of the project @echo ">>> Building Debug BayesNet..."; @if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi @@ -89,27 +74,10 @@ test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximu done @echo ">>> Done"; -opt = "" -testp: ## Run platform tests (opt="-s") to verbose output the tests, (opt="-c='Stratified Fold Test'") to run only that section - @echo ">>> Running Platform tests..."; - @$(MAKE) clean - @cmake --build $(f_debug) --target unit_tests_platform $(n_procs) - @if [ -f $(f_debug)/tests/unit_tests_platform ]; then cd $(f_debug)/tests ; ./unit_tests_platform $(opt) ; fi ; - @echo ">>> Done"; - -opt = "" -testb: ## Run BayesNet tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section - @echo ">>> Running BayesNet tests..."; - @$(MAKE) clean - @cmake --build $(f_debug) --target unit_tests_bayesnet $(n_procs) - @if [ -f $(f_debug)/tests/unit_tests_bayesnet ]; then cd $(f_debug)/tests ; ./unit_tests_bayesnet $(opt) ; fi ; - @echo ">>> Done"; - coverage: ## Run tests and generate coverage report (build/index.html) - @echo ">>> Building tests with coverage..."; + @echo ">>> Building tests with coverage..." @$(MAKE) test - @cd $(f_debug) ; \ - gcovr --config ../gcovr.cfg tests ; + @gcovr $(f_debug)/tests @echo ">>> Done"; diff --git a/README.md b/README.md index a3a4f6a..ee21fa4 100644 --- a/README.md +++ b/README.md @@ -2,90 +2,21 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -Bayesian Network Classifier with libtorch from scratch - -## 0. Setup - -Before compiling BayesNet. - -### Miniconda - -To be able to run Python Classifiers such as STree, ODTE, SVC, etc. it is needed to install Miniconda. To do so, download the installer from [Miniconda](https://docs.conda.io/en/latest/miniconda.html) and run it. It is recommended to install it in the home folder. - -In Linux sometimes the library libstdc++ is mistaken from the miniconda installation and produces the next message when running the b_xxxx executables: - -```bash -libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by b_xxxx) -``` - -The solution is to erase the libstdc++ library from the miniconda installation: - -### MPI - -In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable: - -```bash -export MPI_HOME="/usr/lib64/openmpi" -``` - -In Mac OS X, install mpich with brew and if cmake doesn't find it, edit mpicxx wrapper to remove the ",-commons,use_dylibs" from final_ldflags - -```bash -vi /opt/homebrew/bin/mpicx -``` - -### boost library - -[Getting Started]() - -The best option is install the packages that the Linux distribution have in its repository. If this is the case: - -```bash -sudo dnf install boost-devel -``` - -If this is not possible and the compressed packaged is installed, the following environment variable has to be set pointing to the folder where it was unzipped to: - -```bash -export BOOST_ROOT=/path/to/library/ -``` - -In some cases, it is needed to build the library, to do so: - -```bash -cd /path/to/library -mkdir own -./bootstrap.sh --prefix=/path/to/library/own -./b2 install -export BOOST_ROOT=/path/to/library/own/ -``` - -Don't forget to add the export BOOST_ROOT statement to .bashrc or wherever it is meant to be. - -### libxlswriter - -```bash -cd lib/libxlsxwriter -make -make install DESTDIR=/home/rmontanana/Code PREFIX= -``` - -Environment variable has to be set: - -```bash - export LD_LIBRARY_PATH=/usr/local/lib - ``` +Bayesian Network Classifiers using libtorch from scratch ### Release ```bash make release +make buildr ``` ### Debug & Tests ```bash make debug +make test +make coverage ``` ## 1. Introduction diff --git a/config/config.h.in b/config/config.h.in index ad83bda..832c3a5 100644 --- a/config/config.h.in +++ b/config/config.h.in @@ -11,3 +11,4 @@ static constexpr std::string_view project_name = "@PROJECT_NAME@"; static constexpr std::string_view project_version = "@PROJECT_VERSION@"; static constexpr std::string_view project_description = "@PROJECT_DESCRIPTION@"; static constexpr std::string_view git_sha = "@GIT_SHA@"; +static constexpr std::string_view data_path = "@BayesNet_SOURCE_DIR@/tests/data/"; \ No newline at end of file diff --git a/data/glass.net b/data/glass.net deleted file mode 100644 index 39592f8..0000000 --- a/data/glass.net +++ /dev/null @@ -1,25 +0,0 @@ -Type Si -Type Fe -Type RI -Type Na -Type Ba -Type Ca -Type Al -Type K -Type Mg -Fe RI -Fe Ba -Fe Ca -RI Na -RI Ba -RI Ca -RI Al -RI K -RI Mg -Ba Ca -Ba Al -Ca Al -Ca K -Ca Mg -Al K -K Mg \ No newline at end of file diff --git a/data/mfeat-factors-kdb2.net b/data/mfeat-factors-kdb2.net deleted file mode 100644 index c96508e..0000000 --- a/data/mfeat-factors-kdb2.net +++ /dev/null @@ -1,645 +0,0 @@ -class att215 -class att25 -class att131 -class att95 -class att122 -class att17 -class att28 -class att5 -class att121 -class att214 -class att197 -class att116 -class att182 -class att60 -class att168 -class att178 -class att206 -class att89 -class att77 -class att209 -class att73 -class att126 -class att16 -class att74 -class att27 -class att61 -class att20 -class att101 -class att85 -class att76 -class att137 -class att211 -class att143 -class att14 -class att40 -class att210 -class att155 -class att170 -class att160 -class att23 -class att162 -class att203 -class att164 -class att107 -class att62 -class att42 -class att71 -class att128 -class att138 -class att83 -class att171 -class att92 -class att163 -class att49 -class att161 -class att158 -class att176 -class att11 -class att145 -class att4 -class att172 -class att196 -class att58 -class att68 -class att169 -class att80 -class att32 -class att175 -class att87 -class att88 -class att159 -class att18 -class att52 -class att98 -class att136 -class att150 -class att156 -class att110 -class att100 -class att63 -class att148 -class att90 -class att167 -class att35 -class att205 -class att51 -class att21 -class att142 -class att46 -class att134 -class att39 -class att102 -class att208 -class att130 -class att149 -class att96 -class att75 -class att118 -class att78 -class att213 -class att112 -class att38 -class att174 -class att189 -class att70 -class att179 -class att59 -class att79 -class att15 -class att47 -class att124 -class att34 -class att54 -class att191 -class att86 -class att56 -class att151 -class att66 -class att173 -class att44 -class att198 -class att139 -class att216 -class att129 -class att152 -class att69 -class att81 -class att50 -class att153 -class att41 -class att204 -class att188 -class att26 -class att13 -class att117 -class att114 -class att10 -class att64 -class att200 -class att9 -class att3 -class att119 -class att45 -class att104 -class att140 -class att30 -class att183 -class att146 -class att141 -class att202 -class att194 -class att24 -class att147 -class att8 -class att212 -class att123 -class att166 -class att187 -class att127 -class att190 -class att105 -class att106 -class att184 -class att82 -class att2 -class att135 -class att154 -class att111 -class att115 -class att99 -class att22 -class att84 -class att207 -class att94 -class att177 -class att103 -class att93 -class att201 -class att43 -class att36 -class att12 -class att125 -class att165 -class att180 -class att195 -class att157 -class att48 -class att6 -class att113 -class att193 -class att91 -class att72 -class att31 -class att132 -class att33 -class att57 -class att144 -class att192 -class att185 -class att37 -class att53 -class att120 -class att186 -class att199 -class att65 -class att108 -class att133 -class att29 -class att19 -class att7 -class att97 -class att67 -class att55 -class att1 -class att109 -class att181 -att215 att25 -att215 att131 -att215 att95 -att25 att131 -att25 att121 -att25 att73 -att25 att61 -att25 att85 -att25 att169 -att25 att13 -att131 att95 -att131 att122 -att131 att17 -att131 att28 -att131 att121 -att131 att214 -att131 att116 -att131 att126 -att131 att143 -att95 att122 -att95 att17 -att95 att28 -att95 att5 -att95 att214 -att95 att116 -att95 att60 -att95 att143 -att95 att155 -att95 att71 -att122 att182 -att122 att170 -att17 att5 -att17 att197 -att17 att89 -att17 att77 -att17 att161 -att28 att206 -att28 att16 -att28 att76 -att28 att172 -att28 att124 -att28 att64 -att5 att197 -att5 att89 -att5 att209 -att121 att73 -att214 att178 -att214 att58 -att214 att142 -att197 att209 -att197 att101 -att116 att182 -att116 att60 -att116 att168 -att116 att178 -att116 att206 -att116 att126 -att116 att16 -att116 att27 -att116 att20 -att116 att211 -att116 att164 -att116 att128 -att182 att27 -att182 att14 -att60 att168 -att60 att156 -att168 att156 -att168 att96 -att178 att20 -att178 att58 -att178 att142 -att178 att130 -att206 att74 -att206 att170 -att206 att158 -att89 att77 -att89 att137 -att89 att149 -att89 att173 -att77 att137 -att77 att161 -att209 att101 -att209 att41 -att73 att61 -att73 att157 -att126 att162 -att126 att138 -att126 att150 -att16 att74 -att16 att76 -att16 att40 -att16 att4 -att74 att14 -att74 att62 -att27 att171 -att61 att85 -att61 att169 -att20 att211 -att20 att210 -att20 att164 -att20 att176 -att101 att41 -att85 att13 -att76 att40 -att76 att160 -att137 att149 -att211 att210 -att211 att162 -att211 att171 -att211 att163 -att211 att175 -att211 att79 -att143 att155 -att143 att23 -att143 att71 -att143 att83 -att143 att11 -att14 att98 -att40 att160 -att40 att4 -att40 att196 -att40 att52 -att210 att42 -att210 att114 -att155 att23 -att155 att203 -att155 att107 -att155 att11 -att170 att158 -att160 att52 -att23 att203 -att162 att138 -att162 att18 -att162 att150 -att162 att90 -att162 att174 -att203 att107 -att203 att49 -att203 att59 -att203 att191 -att203 att119 -att164 att62 -att164 att42 -att164 att128 -att164 att92 -att164 att163 -att164 att176 -att164 att145 -att164 att68 -att164 att80 -att164 att98 -att164 att110 -att164 att205 -att164 att21 -att164 att213 -att164 att112 -att164 att38 -att164 att56 -att164 att44 -att107 att59 -att107 att47 -att107 att191 -att71 att83 -att71 att167 -att71 att35 -att128 att92 -att138 att18 -att83 att167 -att171 att87 -att171 att159 -att171 att63 -att171 att51 -att171 att39 -att171 att75 -att163 att49 -att163 att175 -att163 att87 -att163 att79 -att163 att151 -att163 att139 -att163 att187 -att163 att91 -att161 att173 -att176 att145 -att176 att172 -att176 att68 -att176 att80 -att176 att32 -att176 att110 -att176 att205 -att176 att21 -att176 att134 -att176 att56 -att4 att196 -att4 att88 -att4 att136 -att4 att100 -att4 att148 -att4 att208 -att172 att112 -att172 att184 -att196 att88 -att196 att136 -att196 att100 -att196 att208 -att58 att46 -att68 att32 -att32 att200 -att87 att159 -att87 att63 -att87 att75 -att87 att15 -att87 att99 -att159 att195 -att18 att90 -att18 att102 -att18 att78 -att18 att198 -att52 att124 -att98 att86 -att150 att174 -att150 att66 -att156 att96 -att156 att216 -att156 att204 -att156 att24 -att156 att84 -att100 att148 -att63 att51 -att63 att3 -att63 att183 -att90 att102 -att90 att78 -att167 att35 -att167 att179 -att35 att179 -att51 att39 -att51 att3 -att21 att134 -att21 att213 -att21 att38 -att21 att189 -att21 att129 -att21 att81 -att21 att117 -att21 att9 -att142 att46 -att142 att130 -att142 att118 -att142 att10 -att142 att202 -att142 att190 -att142 att106 -att46 att70 -att46 att34 -att46 att166 -att134 att2 -att102 att54 -att130 att118 -att130 att10 -att130 att202 -att149 att125 -att96 att216 -att96 att24 -att75 att15 -att75 att99 -att118 att70 -att78 att198 -att213 att189 -att38 att50 -att38 att26 -att174 att54 -att174 att66 -att174 att30 -att189 att86 -att189 att129 -att189 att69 -att189 att81 -att189 att153 -att189 att117 -att189 att9 -att189 att45 -att189 att105 -att70 att34 -att59 att47 -att79 att151 -att79 att139 -att79 att187 -att79 att127 -att79 att103 -att79 att43 -att79 att91 -att79 att19 -att124 att64 -att54 att114 -att54 att30 -att191 att119 -att86 att194 -att56 att44 -att56 att152 -att56 att50 -att56 att188 -att56 att26 -att56 att104 -att56 att140 -att56 att146 -att56 att194 -att56 att8 -att56 att2 -att56 att133 -att56 att1 -att173 att125 -att173 att113 -att44 att152 -att44 att188 -att44 att200 -att44 att212 -att44 att1 -att139 att103 -att139 att43 -att139 att31 -att139 att199 -att139 att7 -att216 att204 -att216 att36 -att216 att12 -att216 att180 -att216 att108 -att129 att69 -att152 att140 -att69 att153 -att81 att45 -att153 att141 -att41 att53 -att204 att12 -att13 att157 -att114 att6 -att114 att186 -att10 att190 -att64 att184 -att200 att104 -att9 att146 -att9 att141 -att9 att177 -att9 att37 -att9 att133 -att9 att109 -att9 att181 -att3 att183 -att3 att147 -att3 att123 -att3 att135 -att3 att111 -att45 att105 -att45 att177 -att45 att93 -att45 att201 -att45 att193 -att45 att37 -att45 att97 -att140 att8 -att30 att6 -att183 att147 -att183 att123 -att202 att166 -att202 att106 -att202 att82 -att24 att84 -att24 att36 -att147 att135 -att8 att212 -att166 att82 -att187 att127 -att187 att115 -att127 att115 -att105 att93 -att106 att154 -att82 att154 -att82 att22 -att135 att111 -att135 att207 -att154 att22 -att154 att94 -att111 att207 -att22 att94 -att84 att48 -att177 att165 -att103 att195 -att103 att109 -att93 att201 -att93 att165 -att93 att193 -att93 att33 -att201 att33 -att201 att57 -att36 att180 -att36 att72 -att36 att132 -att36 att144 -att125 att113 -att125 att185 -att125 att65 -att125 att29 -att180 att48 -att180 att72 -att180 att192 -att180 att108 -att6 att186 -att113 att185 -att113 att53 -att193 att97 -att91 att31 -att91 att19 -att72 att132 -att72 att192 -att31 att199 -att31 att67 -att132 att144 -att132 att120 -att33 att57 -att144 att120 -att185 att65 -att199 att7 -att199 att67 -att199 att55 -att65 att29 -att67 att55 -att109 att181 diff --git a/data/mfeat-factors-kdb3.net b/data/mfeat-factors-kdb3.net deleted file mode 100644 index d5b6d19..0000000 --- a/data/mfeat-factors-kdb3.net +++ /dev/null @@ -1,859 +0,0 @@ -class att215 -class att25 -class att131 -class att95 -class att122 -class att17 -class att28 -class att5 -class att121 -class att214 -class att197 -class att116 -class att182 -class att60 -class att168 -class att178 -class att206 -class att89 -class att77 -class att209 -class att73 -class att126 -class att16 -class att74 -class att27 -class att61 -class att20 -class att101 -class att85 -class att76 -class att137 -class att211 -class att143 -class att14 -class att40 -class att210 -class att155 -class att170 -class att160 -class att23 -class att162 -class att203 -class att164 -class att107 -class att62 -class att42 -class att71 -class att128 -class att138 -class att83 -class att171 -class att92 -class att163 -class att49 -class att161 -class att158 -class att176 -class att11 -class att145 -class att4 -class att172 -class att196 -class att58 -class att68 -class att169 -class att80 -class att32 -class att175 -class att87 -class att88 -class att159 -class att18 -class att52 -class att98 -class att136 -class att150 -class att156 -class att110 -class att100 -class att63 -class att148 -class att90 -class att167 -class att35 -class att205 -class att51 -class att21 -class att142 -class att46 -class att134 -class att39 -class att102 -class att208 -class att130 -class att149 -class att96 -class att75 -class att118 -class att78 -class att213 -class att112 -class att38 -class att174 -class att189 -class att70 -class att179 -class att59 -class att79 -class att15 -class att47 -class att124 -class att34 -class att54 -class att191 -class att86 -class att56 -class att151 -class att66 -class att173 -class att44 -class att198 -class att139 -class att216 -class att129 -class att152 -class att69 -class att81 -class att50 -class att153 -class att41 -class att204 -class att188 -class att26 -class att13 -class att117 -class att114 -class att10 -class att64 -class att200 -class att9 -class att3 -class att119 -class att45 -class att104 -class att140 -class att30 -class att183 -class att146 -class att141 -class att202 -class att194 -class att24 -class att147 -class att8 -class att212 -class att123 -class att166 -class att187 -class att127 -class att190 -class att105 -class att106 -class att184 -class att82 -class att2 -class att135 -class att154 -class att111 -class att115 -class att99 -class att22 -class att84 -class att207 -class att94 -class att177 -class att103 -class att93 -class att201 -class att43 -class att36 -class att12 -class att125 -class att165 -class att180 -class att195 -class att157 -class att48 -class att6 -class att113 -class att193 -class att91 -class att72 -class att31 -class att132 -class att33 -class att57 -class att144 -class att192 -class att185 -class att37 -class att53 -class att120 -class att186 -class att199 -class att65 -class att108 -class att133 -class att29 -class att19 -class att7 -class att97 -class att67 -class att55 -class att1 -class att109 -class att181 -att215 att25 -att215 att131 -att215 att95 -att215 att17 -att215 att214 -att215 att143 -att25 att131 -att25 att95 -att25 att122 -att25 att121 -att25 att73 -att25 att61 -att25 att85 -att25 att169 -att25 att13 -att25 att157 -att131 att95 -att131 att122 -att131 att17 -att131 att28 -att131 att5 -att131 att121 -att131 att214 -att131 att116 -att131 att182 -att131 att60 -att131 att126 -att131 att16 -att131 att27 -att131 att20 -att131 att143 -att131 att155 -att95 att122 -att95 att17 -att95 att28 -att95 att5 -att95 att121 -att95 att214 -att95 att197 -att95 att116 -att95 att60 -att95 att168 -att95 att178 -att95 att143 -att95 att155 -att95 att23 -att95 att71 -att95 att167 -att122 att28 -att122 att182 -att122 att170 -att17 att5 -att17 att197 -att17 att89 -att17 att77 -att17 att209 -att17 att137 -att17 att161 -att17 att41 -att28 att206 -att28 att16 -att28 att76 -att28 att40 -att28 att210 -att28 att160 -att28 att172 -att28 att124 -att28 att64 -att5 att197 -att5 att89 -att5 att77 -att5 att209 -att5 att101 -att121 att73 -att121 att61 -att214 att116 -att214 att178 -att214 att206 -att214 att58 -att214 att142 -att214 att46 -att197 att89 -att197 att209 -att197 att101 -att116 att182 -att116 att60 -att116 att168 -att116 att178 -att116 att206 -att116 att73 -att116 att126 -att116 att16 -att116 att74 -att116 att27 -att116 att20 -att116 att211 -att116 att164 -att116 att128 -att116 att92 -att116 att176 -att116 att68 -att182 att27 -att182 att14 -att60 att168 -att60 att156 -att60 att96 -att168 att126 -att168 att156 -att168 att96 -att168 att216 -att178 att20 -att178 att211 -att178 att58 -att178 att142 -att178 att130 -att178 att166 -att206 att74 -att206 att170 -att206 att158 -att89 att77 -att89 att137 -att89 att149 -att89 att173 -att77 att137 -att77 att161 -att77 att149 -att209 att101 -att209 att41 -att73 att61 -att73 att85 -att73 att13 -att73 att157 -att126 att162 -att126 att138 -att126 att18 -att126 att150 -att16 att74 -att16 att76 -att16 att40 -att16 att4 -att16 att196 -att16 att136 -att74 att14 -att74 att62 -att27 att171 -att27 att63 -att61 att85 -att61 att169 -att20 att76 -att20 att211 -att20 att210 -att20 att170 -att20 att164 -att20 att128 -att20 att176 -att20 att80 -att101 att41 -att85 att169 -att85 att13 -att76 att14 -att76 att40 -att76 att160 -att76 att4 -att76 att52 -att137 att161 -att137 att149 -att137 att173 -att137 att125 -att211 att210 -att211 att162 -att211 att164 -att211 att62 -att211 att42 -att211 att171 -att211 att163 -att211 att175 -att211 att79 -att211 att151 -att211 att43 -att143 att155 -att143 att23 -att143 att203 -att143 att71 -att143 att83 -att143 att11 -att14 att98 -att40 att160 -att40 att4 -att40 att196 -att40 att88 -att40 att52 -att210 att162 -att210 att42 -att210 att114 -att155 att23 -att155 att203 -att155 att107 -att155 att11 -att170 att158 -att160 att52 -att160 att124 -att23 att203 -att23 att107 -att23 att71 -att23 att11 -att162 att138 -att162 att18 -att162 att150 -att162 att90 -att162 att102 -att162 att174 -att162 att66 -att203 att107 -att203 att49 -att203 att59 -att203 att47 -att203 att191 -att203 att119 -att164 att62 -att164 att42 -att164 att128 -att164 att171 -att164 att92 -att164 att163 -att164 att158 -att164 att176 -att164 att145 -att164 att172 -att164 att58 -att164 att68 -att164 att80 -att164 att32 -att164 att98 -att164 att156 -att164 att110 -att164 att205 -att164 att21 -att164 att134 -att164 att213 -att164 att112 -att164 att38 -att164 att189 -att164 att56 -att164 att44 -att164 att152 -att164 att8 -att107 att83 -att107 att49 -att107 att59 -att107 att47 -att107 att191 -att42 att138 -att42 att54 -att42 att114 -att71 att83 -att71 att167 -att71 att35 -att71 att179 -att128 att92 -att128 att112 -att138 att18 -att138 att150 -att83 att167 -att83 att35 -att171 att87 -att171 att159 -att171 att63 -att171 att51 -att171 att39 -att171 att75 -att92 att163 -att92 att145 -att92 att56 -att163 att49 -att163 att175 -att163 att87 -att163 att79 -att163 att151 -att163 att139 -att163 att187 -att163 att127 -att163 att103 -att163 att91 -att49 att37 -att161 att173 -att161 att113 -att176 att145 -att176 att172 -att176 att68 -att176 att80 -att176 att32 -att176 att175 -att176 att98 -att176 att110 -att176 att205 -att176 att21 -att176 att134 -att176 att213 -att176 att56 -att4 att196 -att4 att88 -att4 att136 -att4 att100 -att4 att148 -att4 att208 -att172 att112 -att172 att184 -att196 att88 -att196 att136 -att196 att100 -att196 att148 -att196 att208 -att58 att142 -att58 att46 -att58 att34 -att68 att32 -att80 att38 -att32 att110 -att32 att21 -att32 att44 -att32 att200 -att175 att87 -att175 att159 -att175 att79 -att175 att187 -att175 att115 -att87 att159 -att87 att63 -att87 att51 -att87 att75 -att87 att15 -att87 att99 -att159 att75 -att159 att15 -att159 att195 -att18 att90 -att18 att102 -att18 att78 -att18 att198 -att52 att124 -att52 att64 -att98 att86 -att136 att100 -att136 att208 -att150 att90 -att150 att174 -att150 att66 -att156 att205 -att156 att96 -att156 att216 -att156 att204 -att156 att24 -att156 att84 -att156 att36 -att156 att12 -att156 att108 -att100 att148 -att63 att51 -att63 att39 -att63 att3 -att63 att183 -att63 att147 -att90 att102 -att90 att78 -att167 att35 -att167 att179 -att35 att179 -att51 att39 -att51 att3 -att51 att183 -att21 att134 -att21 att213 -att21 att38 -att21 att189 -att21 att129 -att21 att81 -att21 att153 -att21 att117 -att21 att9 -att142 att46 -att142 att130 -att142 att118 -att142 att70 -att142 att10 -att142 att202 -att142 att190 -att142 att106 -att46 att130 -att46 att118 -att46 att70 -att46 att34 -att46 att166 -att46 att82 -att134 att2 -att39 att3 -att102 att78 -att102 att174 -att102 att54 -att102 att198 -att130 att118 -att130 att10 -att130 att202 -att130 att190 -att130 att106 -att149 att125 -att96 att216 -att96 att204 -att96 att24 -att75 att15 -att75 att99 -att118 att70 -att118 att10 -att118 att202 -att78 att198 -att213 att189 -att213 att129 -att213 att69 -att213 att81 -att38 att50 -att38 att26 -att174 att54 -att174 att66 -att174 att30 -att189 att86 -att189 att129 -att189 att69 -att189 att81 -att189 att153 -att189 att117 -att189 att9 -att189 att45 -att189 att141 -att189 att105 -att70 att34 -att70 att154 -att179 att59 -att59 att47 -att59 att191 -att59 att119 -att79 att86 -att79 att151 -att79 att139 -att79 att187 -att79 att127 -att79 att103 -att79 att43 -att79 att193 -att79 att91 -att79 att19 -att124 att64 -att54 att114 -att54 att30 -att54 att6 -att191 att119 -att86 att194 -att56 att44 -att56 att152 -att56 att50 -att56 att188 -att56 att26 -att56 att200 -att56 att104 -att56 att140 -att56 att146 -att56 att194 -att56 att8 -att56 att2 -att56 att133 -att56 att1 -att151 att139 -att66 att30 -att173 att125 -att173 att113 -att173 att185 -att44 att152 -att44 att50 -att44 att188 -att44 att200 -att44 att104 -att44 att140 -att44 att194 -att44 att212 -att44 att1 -att139 att26 -att139 att99 -att139 att103 -att139 att43 -att139 att91 -att139 att31 -att139 att199 -att139 att7 -att216 att204 -att216 att24 -att216 att84 -att216 att36 -att216 att12 -att216 att180 -att216 att108 -att129 att69 -att152 att188 -att152 att140 -att69 att153 -att69 att9 -att69 att177 -att81 att45 -att81 att105 -att153 att117 -att153 att141 -att41 att53 -att204 att12 -att204 att180 -att188 att146 -att188 att212 -att13 att157 -att114 att6 -att114 att186 -att10 att190 -att64 att184 -att200 att104 -att9 att45 -att9 att146 -att9 att141 -att9 att177 -att9 att37 -att9 att133 -att9 att109 -att9 att181 -att3 att183 -att3 att147 -att3 att123 -att3 att135 -att3 att111 -att45 att105 -att45 att177 -att45 att93 -att45 att201 -att45 att165 -att45 att193 -att45 att33 -att45 att37 -att45 att133 -att45 att97 -att140 att8 -att30 att6 -att30 att186 -att183 att147 -att183 att123 -att183 att135 -att146 att2 -att202 att166 -att202 att106 -att202 att82 -att24 att84 -att24 att36 -att24 att132 -att147 att123 -att147 att135 -att147 att111 -att147 att207 -att8 att212 -att166 att82 -att166 att22 -att166 att94 -att187 att127 -att187 att115 -att127 att115 -att105 att184 -att105 att93 -att105 att201 -att106 att154 -att82 att154 -att82 att22 -att135 att111 -att135 att207 -att154 att22 -att154 att94 -att111 att207 -att99 att195 -att22 att94 -att84 att48 -att177 att93 -att177 att165 -att177 att181 -att103 att195 -att103 att97 -att103 att109 -att93 att201 -att93 att165 -att93 att193 -att93 att33 -att93 att57 -att201 att33 -att201 att57 -att43 att31 -att36 att180 -att36 att48 -att36 att72 -att36 att132 -att36 att144 -att125 att113 -att125 att185 -att125 att65 -att125 att29 -att180 att48 -att180 att72 -att180 att192 -att180 att108 -att48 att72 -att6 att186 -att113 att185 -att113 att53 -att113 att65 -att193 att97 -att91 att31 -att91 att199 -att91 att19 -att72 att132 -att72 att144 -att72 att192 -att72 att120 -att31 att199 -att31 att7 -att31 att67 -att31 att55 -att31 att1 -att132 att144 -att132 att120 -att33 att57 -att144 att192 -att144 att120 -att185 att53 -att185 att65 -att185 att29 -att199 att19 -att199 att7 -att199 att67 -att199 att55 -att199 att109 -att65 att29 -att7 att67 -att67 att55 -att109 att181 - diff --git a/data/mfeat-factors.net b/data/mfeat-factors.net deleted file mode 100644 index d5b6d19..0000000 --- a/data/mfeat-factors.net +++ /dev/null @@ -1,859 +0,0 @@ -class att215 -class att25 -class att131 -class att95 -class att122 -class att17 -class att28 -class att5 -class att121 -class att214 -class att197 -class att116 -class att182 -class att60 -class att168 -class att178 -class att206 -class att89 -class att77 -class att209 -class att73 -class att126 -class att16 -class att74 -class att27 -class att61 -class att20 -class att101 -class att85 -class att76 -class att137 -class att211 -class att143 -class att14 -class att40 -class att210 -class att155 -class att170 -class att160 -class att23 -class att162 -class att203 -class att164 -class att107 -class att62 -class att42 -class att71 -class att128 -class att138 -class att83 -class att171 -class att92 -class att163 -class att49 -class att161 -class att158 -class att176 -class att11 -class att145 -class att4 -class att172 -class att196 -class att58 -class att68 -class att169 -class att80 -class att32 -class att175 -class att87 -class att88 -class att159 -class att18 -class att52 -class att98 -class att136 -class att150 -class att156 -class att110 -class att100 -class att63 -class att148 -class att90 -class att167 -class att35 -class att205 -class att51 -class att21 -class att142 -class att46 -class att134 -class att39 -class att102 -class att208 -class att130 -class att149 -class att96 -class att75 -class att118 -class att78 -class att213 -class att112 -class att38 -class att174 -class att189 -class att70 -class att179 -class att59 -class att79 -class att15 -class att47 -class att124 -class att34 -class att54 -class att191 -class att86 -class att56 -class att151 -class att66 -class att173 -class att44 -class att198 -class att139 -class att216 -class att129 -class att152 -class att69 -class att81 -class att50 -class att153 -class att41 -class att204 -class att188 -class att26 -class att13 -class att117 -class att114 -class att10 -class att64 -class att200 -class att9 -class att3 -class att119 -class att45 -class att104 -class att140 -class att30 -class att183 -class att146 -class att141 -class att202 -class att194 -class att24 -class att147 -class att8 -class att212 -class att123 -class att166 -class att187 -class att127 -class att190 -class att105 -class att106 -class att184 -class att82 -class att2 -class att135 -class att154 -class att111 -class att115 -class att99 -class att22 -class att84 -class att207 -class att94 -class att177 -class att103 -class att93 -class att201 -class att43 -class att36 -class att12 -class att125 -class att165 -class att180 -class att195 -class att157 -class att48 -class att6 -class att113 -class att193 -class att91 -class att72 -class att31 -class att132 -class att33 -class att57 -class att144 -class att192 -class att185 -class att37 -class att53 -class att120 -class att186 -class att199 -class att65 -class att108 -class att133 -class att29 -class att19 -class att7 -class att97 -class att67 -class att55 -class att1 -class att109 -class att181 -att215 att25 -att215 att131 -att215 att95 -att215 att17 -att215 att214 -att215 att143 -att25 att131 -att25 att95 -att25 att122 -att25 att121 -att25 att73 -att25 att61 -att25 att85 -att25 att169 -att25 att13 -att25 att157 -att131 att95 -att131 att122 -att131 att17 -att131 att28 -att131 att5 -att131 att121 -att131 att214 -att131 att116 -att131 att182 -att131 att60 -att131 att126 -att131 att16 -att131 att27 -att131 att20 -att131 att143 -att131 att155 -att95 att122 -att95 att17 -att95 att28 -att95 att5 -att95 att121 -att95 att214 -att95 att197 -att95 att116 -att95 att60 -att95 att168 -att95 att178 -att95 att143 -att95 att155 -att95 att23 -att95 att71 -att95 att167 -att122 att28 -att122 att182 -att122 att170 -att17 att5 -att17 att197 -att17 att89 -att17 att77 -att17 att209 -att17 att137 -att17 att161 -att17 att41 -att28 att206 -att28 att16 -att28 att76 -att28 att40 -att28 att210 -att28 att160 -att28 att172 -att28 att124 -att28 att64 -att5 att197 -att5 att89 -att5 att77 -att5 att209 -att5 att101 -att121 att73 -att121 att61 -att214 att116 -att214 att178 -att214 att206 -att214 att58 -att214 att142 -att214 att46 -att197 att89 -att197 att209 -att197 att101 -att116 att182 -att116 att60 -att116 att168 -att116 att178 -att116 att206 -att116 att73 -att116 att126 -att116 att16 -att116 att74 -att116 att27 -att116 att20 -att116 att211 -att116 att164 -att116 att128 -att116 att92 -att116 att176 -att116 att68 -att182 att27 -att182 att14 -att60 att168 -att60 att156 -att60 att96 -att168 att126 -att168 att156 -att168 att96 -att168 att216 -att178 att20 -att178 att211 -att178 att58 -att178 att142 -att178 att130 -att178 att166 -att206 att74 -att206 att170 -att206 att158 -att89 att77 -att89 att137 -att89 att149 -att89 att173 -att77 att137 -att77 att161 -att77 att149 -att209 att101 -att209 att41 -att73 att61 -att73 att85 -att73 att13 -att73 att157 -att126 att162 -att126 att138 -att126 att18 -att126 att150 -att16 att74 -att16 att76 -att16 att40 -att16 att4 -att16 att196 -att16 att136 -att74 att14 -att74 att62 -att27 att171 -att27 att63 -att61 att85 -att61 att169 -att20 att76 -att20 att211 -att20 att210 -att20 att170 -att20 att164 -att20 att128 -att20 att176 -att20 att80 -att101 att41 -att85 att169 -att85 att13 -att76 att14 -att76 att40 -att76 att160 -att76 att4 -att76 att52 -att137 att161 -att137 att149 -att137 att173 -att137 att125 -att211 att210 -att211 att162 -att211 att164 -att211 att62 -att211 att42 -att211 att171 -att211 att163 -att211 att175 -att211 att79 -att211 att151 -att211 att43 -att143 att155 -att143 att23 -att143 att203 -att143 att71 -att143 att83 -att143 att11 -att14 att98 -att40 att160 -att40 att4 -att40 att196 -att40 att88 -att40 att52 -att210 att162 -att210 att42 -att210 att114 -att155 att23 -att155 att203 -att155 att107 -att155 att11 -att170 att158 -att160 att52 -att160 att124 -att23 att203 -att23 att107 -att23 att71 -att23 att11 -att162 att138 -att162 att18 -att162 att150 -att162 att90 -att162 att102 -att162 att174 -att162 att66 -att203 att107 -att203 att49 -att203 att59 -att203 att47 -att203 att191 -att203 att119 -att164 att62 -att164 att42 -att164 att128 -att164 att171 -att164 att92 -att164 att163 -att164 att158 -att164 att176 -att164 att145 -att164 att172 -att164 att58 -att164 att68 -att164 att80 -att164 att32 -att164 att98 -att164 att156 -att164 att110 -att164 att205 -att164 att21 -att164 att134 -att164 att213 -att164 att112 -att164 att38 -att164 att189 -att164 att56 -att164 att44 -att164 att152 -att164 att8 -att107 att83 -att107 att49 -att107 att59 -att107 att47 -att107 att191 -att42 att138 -att42 att54 -att42 att114 -att71 att83 -att71 att167 -att71 att35 -att71 att179 -att128 att92 -att128 att112 -att138 att18 -att138 att150 -att83 att167 -att83 att35 -att171 att87 -att171 att159 -att171 att63 -att171 att51 -att171 att39 -att171 att75 -att92 att163 -att92 att145 -att92 att56 -att163 att49 -att163 att175 -att163 att87 -att163 att79 -att163 att151 -att163 att139 -att163 att187 -att163 att127 -att163 att103 -att163 att91 -att49 att37 -att161 att173 -att161 att113 -att176 att145 -att176 att172 -att176 att68 -att176 att80 -att176 att32 -att176 att175 -att176 att98 -att176 att110 -att176 att205 -att176 att21 -att176 att134 -att176 att213 -att176 att56 -att4 att196 -att4 att88 -att4 att136 -att4 att100 -att4 att148 -att4 att208 -att172 att112 -att172 att184 -att196 att88 -att196 att136 -att196 att100 -att196 att148 -att196 att208 -att58 att142 -att58 att46 -att58 att34 -att68 att32 -att80 att38 -att32 att110 -att32 att21 -att32 att44 -att32 att200 -att175 att87 -att175 att159 -att175 att79 -att175 att187 -att175 att115 -att87 att159 -att87 att63 -att87 att51 -att87 att75 -att87 att15 -att87 att99 -att159 att75 -att159 att15 -att159 att195 -att18 att90 -att18 att102 -att18 att78 -att18 att198 -att52 att124 -att52 att64 -att98 att86 -att136 att100 -att136 att208 -att150 att90 -att150 att174 -att150 att66 -att156 att205 -att156 att96 -att156 att216 -att156 att204 -att156 att24 -att156 att84 -att156 att36 -att156 att12 -att156 att108 -att100 att148 -att63 att51 -att63 att39 -att63 att3 -att63 att183 -att63 att147 -att90 att102 -att90 att78 -att167 att35 -att167 att179 -att35 att179 -att51 att39 -att51 att3 -att51 att183 -att21 att134 -att21 att213 -att21 att38 -att21 att189 -att21 att129 -att21 att81 -att21 att153 -att21 att117 -att21 att9 -att142 att46 -att142 att130 -att142 att118 -att142 att70 -att142 att10 -att142 att202 -att142 att190 -att142 att106 -att46 att130 -att46 att118 -att46 att70 -att46 att34 -att46 att166 -att46 att82 -att134 att2 -att39 att3 -att102 att78 -att102 att174 -att102 att54 -att102 att198 -att130 att118 -att130 att10 -att130 att202 -att130 att190 -att130 att106 -att149 att125 -att96 att216 -att96 att204 -att96 att24 -att75 att15 -att75 att99 -att118 att70 -att118 att10 -att118 att202 -att78 att198 -att213 att189 -att213 att129 -att213 att69 -att213 att81 -att38 att50 -att38 att26 -att174 att54 -att174 att66 -att174 att30 -att189 att86 -att189 att129 -att189 att69 -att189 att81 -att189 att153 -att189 att117 -att189 att9 -att189 att45 -att189 att141 -att189 att105 -att70 att34 -att70 att154 -att179 att59 -att59 att47 -att59 att191 -att59 att119 -att79 att86 -att79 att151 -att79 att139 -att79 att187 -att79 att127 -att79 att103 -att79 att43 -att79 att193 -att79 att91 -att79 att19 -att124 att64 -att54 att114 -att54 att30 -att54 att6 -att191 att119 -att86 att194 -att56 att44 -att56 att152 -att56 att50 -att56 att188 -att56 att26 -att56 att200 -att56 att104 -att56 att140 -att56 att146 -att56 att194 -att56 att8 -att56 att2 -att56 att133 -att56 att1 -att151 att139 -att66 att30 -att173 att125 -att173 att113 -att173 att185 -att44 att152 -att44 att50 -att44 att188 -att44 att200 -att44 att104 -att44 att140 -att44 att194 -att44 att212 -att44 att1 -att139 att26 -att139 att99 -att139 att103 -att139 att43 -att139 att91 -att139 att31 -att139 att199 -att139 att7 -att216 att204 -att216 att24 -att216 att84 -att216 att36 -att216 att12 -att216 att180 -att216 att108 -att129 att69 -att152 att188 -att152 att140 -att69 att153 -att69 att9 -att69 att177 -att81 att45 -att81 att105 -att153 att117 -att153 att141 -att41 att53 -att204 att12 -att204 att180 -att188 att146 -att188 att212 -att13 att157 -att114 att6 -att114 att186 -att10 att190 -att64 att184 -att200 att104 -att9 att45 -att9 att146 -att9 att141 -att9 att177 -att9 att37 -att9 att133 -att9 att109 -att9 att181 -att3 att183 -att3 att147 -att3 att123 -att3 att135 -att3 att111 -att45 att105 -att45 att177 -att45 att93 -att45 att201 -att45 att165 -att45 att193 -att45 att33 -att45 att37 -att45 att133 -att45 att97 -att140 att8 -att30 att6 -att30 att186 -att183 att147 -att183 att123 -att183 att135 -att146 att2 -att202 att166 -att202 att106 -att202 att82 -att24 att84 -att24 att36 -att24 att132 -att147 att123 -att147 att135 -att147 att111 -att147 att207 -att8 att212 -att166 att82 -att166 att22 -att166 att94 -att187 att127 -att187 att115 -att127 att115 -att105 att184 -att105 att93 -att105 att201 -att106 att154 -att82 att154 -att82 att22 -att135 att111 -att135 att207 -att154 att22 -att154 att94 -att111 att207 -att99 att195 -att22 att94 -att84 att48 -att177 att93 -att177 att165 -att177 att181 -att103 att195 -att103 att97 -att103 att109 -att93 att201 -att93 att165 -att93 att193 -att93 att33 -att93 att57 -att201 att33 -att201 att57 -att43 att31 -att36 att180 -att36 att48 -att36 att72 -att36 att132 -att36 att144 -att125 att113 -att125 att185 -att125 att65 -att125 att29 -att180 att48 -att180 att72 -att180 att192 -att180 att108 -att48 att72 -att6 att186 -att113 att185 -att113 att53 -att113 att65 -att193 att97 -att91 att31 -att91 att199 -att91 att19 -att72 att132 -att72 att144 -att72 att192 -att72 att120 -att31 att199 -att31 att7 -att31 att67 -att31 att55 -att31 att1 -att132 att144 -att132 att120 -att33 att57 -att144 att192 -att144 att120 -att185 att53 -att185 att65 -att185 att29 -att199 att19 -att199 att7 -att199 att67 -att199 att55 -att199 att109 -att65 att29 -att7 att67 -att67 att55 -att109 att181 - diff --git a/gcovr.cfg b/gcovr.cfg index c1518ad..89e0877 100644 --- a/gcovr.cfg +++ b/gcovr.cfg @@ -1,4 +1,4 @@ filter = src/ -exclude-directories = build/lib/ +exclude-directories = build_debug/lib/ print-summary = yes sort-percentage = yes diff --git a/grid_stree.json b/grid_stree.json deleted file mode 100644 index 9e6a712..0000000 --- a/grid_stree.json +++ /dev/null @@ -1,162 +0,0 @@ -{ - "balance-scale": { - "C": 10000.0, - "gamma": 0.1, - "kernel": "rbf", - "max_iter": 10000 - }, - "balloons": { - "C": 7, - "gamma": 0.1, - "kernel": "rbf", - "max_iter": 10000 - }, - "breast-cancer-wisc-diag": { - "C": 0.2, - "max_iter": 10000 - }, - "breast-cancer-wisc-prog": { - "C": 0.2, - "max_iter": 10000 - }, - "breast-cancer-wisc": {}, - "breast-cancer": {}, - "cardiotocography-10clases": {}, - "cardiotocography-3clases": {}, - "conn-bench-sonar-mines-rocks": {}, - "cylinder-bands": {}, - "dermatology": { - "C": 55, - "max_iter": 10000 - }, - "echocardiogram": { - "C": 7, - "gamma": 0.1, - "kernel": "poly", - "max_features": "auto", - "max_iter": 10000 - }, - "fertility": { - "C": 0.05, - "max_features": "auto", - "max_iter": 10000 - }, - "haberman-survival": {}, - "heart-hungarian": { - "C": 0.05, - "max_iter": 10000 - }, - "hepatitis": { - "C": 7, - "gamma": 0.1, - "kernel": "rbf", - "max_iter": 10000 - }, - "ilpd-indian-liver": {}, - "ionosphere": { - "C": 7, - "gamma": 0.1, - "kernel": "rbf", - "max_iter": 10000 - }, - "iris": {}, - "led-display": {}, - "libras": { - "C": 0.08, - "max_iter": 10000 - }, - "low-res-spect": { - "C": 0.05, - "max_iter": 10000 - }, - "lymphography": { - "C": 0.05, - "max_iter": 10000 - }, - "mammographic": {}, - "molec-biol-promoter": { - "C": 0.05, - "gamma": 0.1, - "kernel": "poly", - "max_iter": 10000 - }, - "musk-1": { - "C": 0.05, - "gamma": 0.1, - "kernel": "poly", - "max_iter": 10000 - }, - "oocytes_merluccius_nucleus_4d": { - "C": 8.25, - "gamma": 0.1, - "kernel": "poly" - }, - "oocytes_merluccius_states_2f": {}, - "oocytes_trisopterus_nucleus_2f": {}, - "oocytes_trisopterus_states_5b": { - "C": 0.11, - "max_iter": 10000 - }, - "parkinsons": {}, - "pima": {}, - "pittsburg-bridges-MATERIAL": { - "C": 7, - "gamma": 0.1, - "kernel": "rbf", - "max_iter": 10000 - }, - "pittsburg-bridges-REL-L": {}, - "pittsburg-bridges-SPAN": { - "C": 0.05, - "max_iter": 10000 - }, - "pittsburg-bridges-T-OR-D": {}, - "planning": { - "C": 7, - "gamma": 10.0, - "kernel": "rbf", - "max_iter": 10000 - }, - "post-operative": { - "C": 55, - "degree": 5, - "gamma": 0.1, - "kernel": "poly", - "max_iter": 10000 - }, - "seeds": { - "C": 10000.0, - "max_iter": 10000 - }, - "statlog-australian-credit": { - "C": 0.05, - "max_features": "auto", - "max_iter": 10000 - }, - "statlog-german-credit": {}, - "statlog-heart": {}, - "statlog-image": { - "C": 7, - "max_iter": 10000 - }, - "statlog-vehicle": {}, - "synthetic-control": { - "C": 0.55, - "max_iter": 10000 - }, - "tic-tac-toe": { - "C": 0.2, - "gamma": 0.1, - "kernel": "poly", - "max_iter": 10000 - }, - "vertebral-column-2clases": {}, - "wine": { - "C": 0.55, - "max_iter": 10000 - }, - "zoo": { - "C": 0.1, - "max_iter": 10000 - } -} \ No newline at end of file diff --git a/lib/folding b/lib/folding new file mode 160000 index 0000000..a3a2977 --- /dev/null +++ b/lib/folding @@ -0,0 +1 @@ +Subproject commit a3a2977996223b709c0f9149772c01a5f771e391 diff --git a/lib/libxlsxwriter b/lib/libxlsxwriter deleted file mode 160000 index 29355a0..0000000 --- a/lib/libxlsxwriter +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 29355a0887475488c7cc470ad43cc867fcfa92e2 diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt deleted file mode 100644 index 38b9e3d..0000000 --- a/sample/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -include_directories(${BayesNet_SOURCE_DIR}/src/Platform) -include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) -include_directories(${BayesNet_SOURCE_DIR}/src/PyClassifiers) -include_directories(${Python3_INCLUDE_DIRS}) -include_directories(${BayesNet_SOURCE_DIR}/lib/Files) -include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) -include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) -include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) -add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) -target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap) \ No newline at end of file diff --git a/sample/sample.cc b/sample/sample.cc deleted file mode 100644 index 9a19e79..0000000 --- a/sample/sample.cc +++ /dev/null @@ -1,235 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "ArffFiles.h" -#include "BayesMetrics.h" -#include "CPPFImdlp.h" -#include "Folding.h" -#include "Models.h" -#include "modelRegister.h" -#include - -const std::string PATH = "../../data/"; - -pair, map> discretize(std::vector& X, mdlp::labels_t& y, std::vector features) -{ - std::vectorXd; - map maxes; - - auto fimdlp = mdlp::CPPFImdlp(); - for (int i = 0; i < X.size(); i++) { - fimdlp.fit(X[i], y); - mdlp::labels_t& xd = fimdlp.transform(X[i]); - maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1; - Xd.push_back(xd); - } - return { Xd, maxes }; -} - -bool file_exists(const std::string& name) -{ - if (FILE* file = fopen(name.c_str(), "r")) { - fclose(file); - return true; - } else { - return false; - } -} -pair>, std::vector> extract_indices(std::vector indices, std::vector> X, std::vector y) -{ - std::vector> Xr; // nxm - std::vector yr; - for (int col = 0; col < X.size(); ++col) { - Xr.push_back(std::vector()); - } - for (auto index : indices) { - for (int col = 0; col < X.size(); ++col) { - Xr[col].push_back(X[col][index]); - } - yr.push_back(y[index]); - } - return { Xr, yr }; -} - -int main(int argc, char** argv) -{ - map datasets = { - {"diabetes", true}, - {"ecoli", true}, - {"glass", true}, - {"iris", true}, - {"kdd_JapaneseVowels", false}, - {"letter", true}, - {"liver-disorders", true}, - {"mfeat-factors", true}, - }; - auto valid_datasets = std::vector(); - transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets), - [](const pair& pair) { return pair.first; }); - argparse::ArgumentParser program("BayesNetSample"); - program.add_argument("-d", "--dataset") - .help("Dataset file name") - .action([valid_datasets](const std::string& value) { - if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { - return value; - } - throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}"); - } - ); - program.add_argument("-p", "--path") - .help(" folder where the data files are located, default") - .default_value(std::string{ PATH } - ); - program.add_argument("-m", "--model") - .help("Model to use " + platform::Models::instance()->tostring()) - .action([](const std::string& value) { - static const std::vector choices = platform::Models::instance()->getNames(); - if (find(choices.begin(), choices.end(), value) != choices.end()) { - return value; - } - throw runtime_error("Model must be one of " + platform::Models::instance()->tostring()); - } - ); - program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); - program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true); - program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); - program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); - program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const std::string& value) { - try { - auto k = stoi(value); - if (k < 2) { - throw runtime_error("Number of folds must be greater than 1"); - } - return k; - } - catch (const runtime_error& err) { - throw runtime_error(err.what()); - } - catch (...) { - throw runtime_error("Number of folds must be an integer"); - }}); - program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); - bool class_last, stratified, tensors, dump_cpt; - std::string model_name, file_name, path, complete_file_name; - int nFolds, seed; - try { - program.parse_args(argc, argv); - file_name = program.get("dataset"); - path = program.get("path"); - model_name = program.get("model"); - complete_file_name = path + file_name + ".arff"; - stratified = program.get("stratified"); - tensors = program.get("tensors"); - nFolds = program.get("folds"); - seed = program.get("seed"); - dump_cpt = program.get("dumpcpt"); - class_last = datasets[file_name]; - if (!file_exists(complete_file_name)) { - throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); - } - } - catch (const exception& err) { - cerr << err.what() << std::endl; - cerr << program; - exit(1); - } - - /* - * Begin Processing - */ - auto handler = ArffFiles(); - handler.load(complete_file_name, class_last); - // Get Dataset X, y - std::vector& X = handler.getX(); - mdlp::labels_t& y = handler.getY(); - // Get className & Features - auto className = handler.getClassName(); - std::vector features; - auto attributes = handler.getAttributes(); - transform(attributes.begin(), attributes.end(), back_inserter(features), - [](const pair& item) { return item.first; }); - // Discretize Dataset - auto [Xd, maxes] = discretize(X, y, features); - maxes[className] = *max_element(y.begin(), y.end()) + 1; - map> states; - for (auto feature : features) { - states[feature] = std::vector(maxes[feature]); - } - states[className] = std::vector(maxes[className]); - auto clf = platform::Models::instance()->create(model_name); - clf->fit(Xd, y, features, className, states); - if (dump_cpt) { - std::cout << "--- CPT Tables ---" << std::endl; - clf->dump_cpt(); - } - auto lines = clf->show(); - for (auto line : lines) { - std::cout << line << std::endl; - } - std::cout << "--- Topological Order ---" << std::endl; - auto order = clf->topological_order(); - for (auto name : order) { - std::cout << name << ", "; - } - std::cout << "end." << std::endl; - auto score = clf->score(Xd, y); - std::cout << "Score: " << score << std::endl; - auto graph = clf->graph(); - auto dot_file = model_name + "_" + file_name; - ofstream file(dot_file + ".dot"); - file << graph; - file.close(); - std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl; - std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl; - std::string stratified_string = stratified ? " Stratified" : ""; - std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl; - std::cout << "==========================================" << std::endl; - torch::Tensor Xt = torch::zeros({ static_cast(Xd.size()), static_cast(Xd[0].size()) }, torch::kInt32); - torch::Tensor yt = torch::tensor(y, torch::kInt32); - for (int i = 0; i < features.size(); ++i) { - Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32)); - } - float total_score = 0, total_score_train = 0, score_train, score_test; - platform::Fold* fold; - if (stratified) - fold = new platform::StratifiedKFold(nFolds, y, seed); - else - fold = new platform::KFold(nFolds, y.size(), seed); - for (auto i = 0; i < nFolds; ++i) { - auto [train, test] = fold->getFold(i); - std::cout << "Fold: " << i + 1 << std::endl; - if (tensors) { - auto ttrain = torch::tensor(train, torch::kInt64); - auto ttest = torch::tensor(test, torch::kInt64); - torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain); - torch::Tensor ytraint = yt.index({ ttrain }); - torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest); - torch::Tensor ytestt = yt.index({ ttest }); - clf->fit(Xtraint, ytraint, features, className, states); - auto temp = clf->predict(Xtraint); - score_train = clf->score(Xtraint, ytraint); - score_test = clf->score(Xtestt, ytestt); - } else { - auto [Xtrain, ytrain] = extract_indices(train, Xd, y); - auto [Xtest, ytest] = extract_indices(test, Xd, y); - clf->fit(Xtrain, ytrain, features, className, states); - score_train = clf->score(Xtrain, ytrain); - score_test = clf->score(Xtest, ytest); - } - if (dump_cpt) { - std::cout << "--- CPT Tables ---" << std::endl; - clf->dump_cpt(); - } - total_score_train += score_train; - total_score += score_test; - std::cout << "Score Train: " << score_train << std::endl; - std::cout << "Score Test : " << score_test << std::endl; - std::cout << "-------------------------------------------------------------------------------" << std::endl; - } - std::cout << "**********************************************************************************" << std::endl; - std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl; - std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0; -} \ No newline at end of file diff --git a/src/BayesNet/BoostAODE.cc b/src/BayesNet/BoostAODE.cc index c67424c..8178280 100644 --- a/src/BayesNet/BoostAODE.cc +++ b/src/BayesNet/BoostAODE.cc @@ -2,12 +2,10 @@ #include #include #include "BoostAODE.h" -#include "Colors.h" -#include "Folding.h" -#include "Paths.h" #include "CFS.h" #include "FCBF.h" #include "IWSS.h" +#include "folding.hpp" namespace bayesnet { BoostAODE::BoostAODE() : Ensemble() @@ -24,7 +22,7 @@ namespace bayesnet { auto y_ = dataset.index({ -1, "..." }); if (convergence) { // Prepare train & validation sets from train data - auto fold = platform::StratifiedKFold(5, y_, 271); + auto fold = folding::StratifiedKFold(5, y_, 271); dataset_ = torch::clone(dataset); // save input dataset auto [train, test] = fold.getFold(0); diff --git a/src/BayesNet/CMakeLists.txt b/src/BayesNet/CMakeLists.txt index f4ef172..d02c671 100644 --- a/src/BayesNet/CMakeLists.txt +++ b/src/BayesNet/CMakeLists.txt @@ -1,14 +1,13 @@ include_directories( ${BayesNet_SOURCE_DIR}/lib/mdlp ${BayesNet_SOURCE_DIR}/lib/Files + ${BayesNet_SOURCE_DIR}/lib/folding ${BayesNet_SOURCE_DIR}/lib/json/include ${BayesNet_SOURCE_DIR}/src/BayesNet - ${BayesNet_SOURCE_DIR}/src/Platform - ${BayesNet_SOURCE_DIR}/src/PyClassifiers - ${Python3_INCLUDE_DIRS} + ${CMAKE_BINARY_DIR}/configured_files/include ) add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc - Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc) + Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ) target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/BayesNet/Network.h b/src/BayesNet/Network.h index 34b06c2..2a3795e 100644 --- a/src/BayesNet/Network.h +++ b/src/BayesNet/Network.h @@ -3,6 +3,7 @@ #include "Node.h" #include #include +#include "config.h" namespace bayesnet { class Network { @@ -56,7 +57,7 @@ namespace bayesnet { std::vector graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format void initialize(); void dump_cpt() const; - inline std::string version() { return "0.2.0"; } + inline std::string version() { return { project_version.begin(), project_version.end() }; } }; } #endif \ No newline at end of file diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc deleted file mode 100644 index ba0c082..0000000 --- a/src/Platform/BestResults.cc +++ /dev/null @@ -1,343 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "BestResults.h" -#include "Result.h" -#include "Colors.h" -#include "Statistics.h" -#include "BestResultsExcel.h" -#include "CLocale.h" - - -namespace fs = std::filesystem; -// function ftime_to_std::string, Code taken from -// https://stackoverflow.com/a/58237530/1389271 -template -std::string ftime_to_string(TP tp) -{ - auto sctp = std::chrono::time_point_cast(tp - TP::clock::now() - + std::chrono::system_clock::now()); - auto tt = std::chrono::system_clock::to_time_t(sctp); - std::tm* gmt = std::gmtime(&tt); - std::stringstream buffer; - buffer << std::put_time(gmt, "%Y-%m-%d %H:%M"); - return buffer.str(); -} -namespace platform { - std::string BestResults::build() - { - auto files = loadResultFiles(); - if (files.size() == 0) { - std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl; - exit(1); - } - json bests; - for (const auto& file : files) { - auto result = Result(path, file); - auto data = result.load(); - for (auto const& item : data.at("results")) { - bool update = false; - // Check if results file contains only one dataset - auto datasetName = item.at("dataset").get(); - if (bests.contains(datasetName)) { - if (item.at("score").get() > bests[datasetName].at(0).get()) { - update = true; - } - } else { - update = true; - } - if (update) { - bests[datasetName] = { item.at("score").get(), item.at("hyperparameters"), file }; - } - } - } - std::string bestFileName = path + bestResultFile(); - if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { - fclose(fileTest); - std::cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << std::endl; - } - std::ofstream file(bestFileName); - file << bests; - file.close(); - return bestFileName; - } - std::string BestResults::bestResultFile() - { - return "best_results_" + score + "_" + model + ".json"; - } - std::pair getModelScore(std::string name) - { - // results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json - int i = 0; - auto pos = name.find("_"); - auto pos2 = name.find("_", pos + 1); - std::string score = name.substr(pos + 1, pos2 - pos - 1); - pos = name.find("_", pos2 + 1); - std::string model = name.substr(pos2 + 1, pos - pos2 - 1); - return { model, score }; - } - std::vector BestResults::loadResultFiles() - { - std::vector files; - using std::filesystem::directory_iterator; - std::string fileModel, fileScore; - for (const auto& file : directory_iterator(path)) { - auto fileName = file.path().filename().string(); - if (fileName.find(".json") != std::string::npos && fileName.find("results_") == 0) { - tie(fileModel, fileScore) = getModelScore(fileName); - if (score == fileScore && (model == fileModel || model == "any")) { - files.push_back(fileName); - } - } - } - return files; - } - json BestResults::loadFile(const std::string& fileName) - { - std::ifstream resultData(fileName); - if (resultData.is_open()) { - json data = json::parse(resultData); - return data; - } - throw std::invalid_argument("Unable to open result file. [" + fileName + "]"); - } - std::vector BestResults::getModels() - { - std::set models; - std::vector result; - auto files = loadResultFiles(); - if (files.size() == 0) { - std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl; - exit(1); - } - std::string fileModel, fileScore; - for (const auto& file : files) { - // extract the model from the file name - tie(fileModel, fileScore) = getModelScore(file); - // add the model to the std::vector of models - models.insert(fileModel); - } - result = std::vector(models.begin(), models.end()); - return result; - } - std::vector BestResults::getDatasets(json table) - { - std::vector datasets; - for (const auto& dataset : table.items()) { - datasets.push_back(dataset.key()); - } - return datasets; - } - void BestResults::buildAll() - { - auto models = getModels(); - for (const auto& model : models) { - std::cout << "Building best results for model: " << model << std::endl; - this->model = model; - build(); - } - model = "any"; - } - void BestResults::listFile() - { - std::string bestFileName = path + bestResultFile(); - if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { - fclose(fileTest); - } else { - std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl; - exit(1); - } - auto temp = ConfigLocale(); - auto date = ftime_to_string(std::filesystem::last_write_time(bestFileName)); - auto data = loadFile(bestFileName); - auto datasets = getDatasets(data); - int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); - int maxFileName = 0; - int maxHyper = 15; - for (auto const& item : data.items()) { - maxHyper = std::max(maxHyper, (int)item.value().at(1).dump().size()); - maxFileName = std::max(maxFileName, (int)item.value().at(2).get().size()); - } - std::stringstream oss; - oss << Colors::GREEN() << "Best results for " << model << " as of " << date << std::endl; - std::cout << oss.str(); - std::cout << std::string(oss.str().size() - 8, '-') << std::endl; - std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << "Dataset" << "Score " << std::setw(maxFileName) << "File" << " Hyperparameters" << std::endl; - std::cout << "=== " << std::string(maxDatasetName, '=') << " =========== " << std::string(maxFileName, '=') << " " << std::string(maxHyper, '=') << std::endl; - auto i = 0; - bool odd = true; - double total = 0; - for (auto const& item : data.items()) { - auto color = odd ? Colors::BLUE() : Colors::CYAN(); - double value = item.value().at(0).get(); - std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " "; - std::cout << std::setw(maxDatasetName) << std::left << item.key() << " "; - std::cout << std::setw(11) << std::setprecision(9) << std::fixed << value << " "; - std::cout << std::setw(maxFileName) << item.value().at(2).get() << " "; - std::cout << item.value().at(1) << " "; - std::cout << std::endl; - total += value; - odd = !odd; - } - std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ===========" << std::endl; - std::cout << std::setw(5 + maxDatasetName) << "Total.................. " << std::setw(11) << std::setprecision(8) << std::fixed << total << std::endl; - } - json BestResults::buildTableResults(std::vector models) - { - json table; - auto maxDate = std::filesystem::file_time_type::max(); - for (const auto& model : models) { - this->model = model; - std::string bestFileName = path + bestResultFile(); - if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { - fclose(fileTest); - } else { - std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl; - exit(1); - } - auto dateWrite = std::filesystem::last_write_time(bestFileName); - if (dateWrite < maxDate) { - maxDate = dateWrite; - } - auto data = loadFile(bestFileName); - table[model] = data; - } - table["dateTable"] = ftime_to_string(maxDate); - return table; - } - void BestResults::printTableResults(std::vector models, json table) - { - std::stringstream oss; - oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get() << std::endl; - std::cout << oss.str(); - std::cout << std::string(oss.str().size() - 8, '-') << std::endl; - std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset"); - for (const auto& model : models) { - std::cout << std::setw(maxModelName) << std::left << model << " "; - } - std::cout << std::endl; - std::cout << "=== " << std::string(maxDatasetName, '=') << " "; - for (const auto& model : models) { - std::cout << std::string(maxModelName, '=') << " "; - } - std::cout << std::endl; - auto i = 0; - bool odd = true; - std::map totals; - int nDatasets = table.begin().value().size(); - for (const auto& model : models) { - totals[model] = 0.0; - } - auto datasets = getDatasets(table.begin().value()); - for (auto const& dataset : datasets) { - auto color = odd ? Colors::BLUE() : Colors::CYAN(); - std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " "; - std::cout << std::setw(maxDatasetName) << std::left << dataset << " "; - double maxValue = 0; - // Find out the max value for this dataset - for (const auto& model : models) { - double value = table[model].at(dataset).at(0).get(); - if (value > maxValue) { - maxValue = value; - } - } - // Print the row with red colors on max values - for (const auto& model : models) { - std::string efectiveColor = color; - double value = table[model].at(dataset).at(0).get(); - if (value == maxValue) { - efectiveColor = Colors::RED(); - } - totals[model] += value; - std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " "; - } - std::cout << std::endl; - odd = !odd; - } - std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " "; - for (const auto& model : models) { - std::cout << std::string(maxModelName, '=') << " "; - } - std::cout << std::endl; - std::cout << Colors::GREEN() << std::setw(5 + maxDatasetName) << " Totals..................."; - double max = 0.0; - for (const auto& total : totals) { - if (total.second > max) { - max = total.second; - } - } - for (const auto& model : models) { - std::string efectiveColor = Colors::GREEN(); - if (totals[model] == max) { - efectiveColor = Colors::RED(); - } - std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << totals[model] << " "; - } - std::cout << std::endl; - } - void BestResults::reportSingle(bool excel) - { - listFile(); - if (excel) { - auto models = getModels(); - // Build the table of results - json table = buildTableResults(models); - std::vector datasets = getDatasets(table.begin().value()); - BestResultsExcel excel(score, datasets); - excel.reportSingle(model, path + bestResultFile()); - messageExcelFile(excel.getFileName()); - } - } - void BestResults::reportAll(bool excel) - { - auto models = getModels(); - // Build the table of results - json table = buildTableResults(models); - std::vector datasets = getDatasets(table.begin().value()); - maxModelName = (*max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); - maxModelName = std::max(12, maxModelName); - maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); - maxDatasetName = std::max(25, maxDatasetName); - // Print the table of results - printTableResults(models, table); - // Compute the Friedman test - std::map> ranksModels; - if (friedman) { - Statistics stats(models, datasets, table, significance); - auto result = stats.friedmanTest(); - stats.postHocHolmTest(result); - ranksModels = stats.getRanks(); - } - if (excel) { - BestResultsExcel excel(score, datasets); - excel.reportAll(models, table, ranksModels, friedman, significance); - if (friedman) { - int idx = -1; - double min = 2000; - // Find out the control model - auto totals = std::vector(models.size(), 0.0); - for (const auto& dataset : datasets) { - for (int i = 0; i < models.size(); ++i) { - totals[i] += ranksModels[dataset][models[i]]; - } - } - for (int i = 0; i < models.size(); ++i) { - if (totals[i] < min) { - min = totals[i]; - idx = i; - } - } - model = models.at(idx); - excel.reportSingle(model, path + bestResultFile()); - } - messageExcelFile(excel.getFileName()); - } - } - void BestResults::messageExcelFile(const std::string& fileName) - { - std::cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << std::endl; - } -} \ No newline at end of file diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h deleted file mode 100644 index 7d576b0..0000000 --- a/src/Platform/BestResults.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef BESTRESULTS_H -#define BESTRESULTS_H -#include -#include -using json = nlohmann::json; -namespace platform { - class BestResults { - public: - explicit BestResults(const std::string& path, const std::string& score, const std::string& model, bool friedman, double significance = 0.05) - : path(path), score(score), model(model), friedman(friedman), significance(significance) - { - } - std::string build(); - void reportSingle(bool excel); - void reportAll(bool excel); - void buildAll(); - private: - std::vector getModels(); - std::vector getDatasets(json table); - std::vector loadResultFiles(); - void messageExcelFile(const std::string& fileName); - json buildTableResults(std::vector models); - void printTableResults(std::vector models, json table); - std::string bestResultFile(); - json loadFile(const std::string& fileName); - void listFile(); - std::string path; - std::string score; - std::string model; - bool friedman; - double significance; - int maxModelName = 0; - int maxDatasetName = 0; - }; -} -#endif //BESTRESULTS_H \ No newline at end of file diff --git a/src/Platform/BestResultsExcel.cc b/src/Platform/BestResultsExcel.cc deleted file mode 100644 index d524665..0000000 --- a/src/Platform/BestResultsExcel.cc +++ /dev/null @@ -1,300 +0,0 @@ -#include -#include "BestResultsExcel.h" -#include "Paths.h" -#include -#include -#include "Statistics.h" -#include "ReportExcel.h" - -namespace platform { - json loadResultData(const std::string& fileName) - { - json data; - std::ifstream resultData(fileName); - if (resultData.is_open()) { - data = json::parse(resultData); - } else { - throw std::invalid_argument("Unable to open result file. [" + fileName + "]"); - } - return data; - } - std::string getColumnName(int colNum) - { - std::string columnName = ""; - if (colNum == 0) - return "A"; - while (colNum > 0) { - int modulo = colNum % 26; - columnName = char(65 + modulo) + columnName; - colNum = (int)((colNum - modulo) / 26); - } - return columnName; - } - BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector& datasets) : score(score), datasets(datasets) - { - workbook = workbook_new((Paths::excel() + fileName).c_str()); - setProperties("Best Results"); - int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); - datasetNameSize = std::max(datasetNameSize, maxDatasetName); - createFormats(); - } - void BestResultsExcel::reportAll(const std::vector& models, const json& table, const std::map>& ranks, bool friedman, double significance) - { - this->table = table; - this->models = models; - ranksModels = ranks; - this->friedman = friedman; - this->significance = significance; - worksheet = workbook_add_worksheet(workbook, "Best Results"); - int maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); - modelNameSize = std::max(modelNameSize, maxModelName); - formatColumns(); - build(); - } - void BestResultsExcel::reportSingle(const std::string& model, const std::string& fileName) - { - worksheet = workbook_add_worksheet(workbook, "Report"); - if (FILE* fileTest = fopen(fileName.c_str(), "r")) { - fclose(fileTest); - } else { - std::cerr << "File " << fileName << " doesn't exist." << std::endl; - exit(1); - } - json data = loadResultData(fileName); - - std::string title = "Best results for " + model; - worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]); - // Body header - row = 3; - int col = 1; - writeString(row, 0, "Nº", "bodyHeader"); - writeString(row, 1, "Dataset", "bodyHeader"); - writeString(row, 2, "Score", "bodyHeader"); - writeString(row, 3, "File", "bodyHeader"); - writeString(row, 4, "Hyperparameters", "bodyHeader"); - auto i = 0; - std::string hyperparameters; - int hypSize = 22; - std::map files; // map of files imported and their tabs - for (auto const& item : data.items()) { - row++; - writeInt(row, 0, i++, "ints"); - writeString(row, 1, item.key().c_str(), "text"); - writeDouble(row, 2, item.value().at(0).get(), "result"); - auto fileName = item.value().at(2).get(); - std::string hyperlink = ""; - try { - hyperlink = files.at(fileName); - } - catch (const std::out_of_range& oor) { - auto tabName = "table_" + std::to_string(i); - auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str()); - json data = loadResultData(Paths::results() + fileName); - auto report = ReportExcel(data, false, workbook, worksheetNew); - report.show(); - hyperlink = "#table_" + std::to_string(i); - files[fileName] = hyperlink; - } - hyperlink += "!H" + std::to_string(i + 6); - std::string fileNameText = "=HYPERLINK(\"" + hyperlink + "\",\"" + fileName + "\")"; - worksheet_write_formula(worksheet, row, 3, fileNameText.c_str(), efectiveStyle("text")); - hyperparameters = item.value().at(1).dump(); - if (hyperparameters.size() > hypSize) { - hypSize = hyperparameters.size(); - } - writeString(row, 4, hyperparameters, "text"); - } - row++; - // Set Totals - writeString(row, 1, "Total", "bodyHeader"); - std::stringstream oss; - auto colName = getColumnName(2); - oss << "=sum(" << colName << "5:" << colName << row << ")"; - worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]); - // Set format - worksheet_freeze_panes(worksheet, 4, 2); - std::vector columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 }; - for (int i = 0; i < columns_sizes.size(); ++i) { - worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); - } - } - BestResultsExcel::~BestResultsExcel() - { - workbook_close(workbook); - } - void BestResultsExcel::formatColumns() - { - worksheet_freeze_panes(worksheet, 4, 2); - std::vector columns_sizes = { 5, datasetNameSize }; - for (int i = 0; i < models.size(); ++i) { - columns_sizes.push_back(modelNameSize); - } - for (int i = 0; i < columns_sizes.size(); ++i) { - worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); - } - } - void BestResultsExcel::addConditionalFormat(std::string formula) - { - // Add conditional format for max/min values in scores/ranks sheets - lxw_format* custom_format = workbook_add_format(workbook); - format_set_bg_color(custom_format, 0xFFC7CE); - format_set_font_color(custom_format, 0x9C0006); - // Create a conditional format object. A static object would also work. - lxw_conditional_format* conditional_format = (lxw_conditional_format*)calloc(1, sizeof(lxw_conditional_format)); - conditional_format->type = LXW_CONDITIONAL_TYPE_FORMULA; - std::string col = getColumnName(models.size() + 1); - std::stringstream oss; - oss << "=C5=" << formula << "($C5:$" << col << "5)"; - auto formulaValue = oss.str(); - conditional_format->value_string = formulaValue.c_str(); - conditional_format->format = custom_format; - worksheet_conditional_format_range(worksheet, 4, 2, datasets.size() + 3, models.size() + 1, conditional_format); - } - void BestResultsExcel::build() - { - // Create Sheet with scores - header(false); - body(false); - // Add conditional format for max values - addConditionalFormat("max"); - footer(false); - if (friedman) { - // Create Sheet with ranks - worksheet = workbook_add_worksheet(workbook, "Ranks"); - formatColumns(); - header(true); - body(true); - addConditionalFormat("min"); - footer(true); - // Create Sheet with Friedman Test - doFriedman(); - } - } - std::string BestResultsExcel::getFileName() - { - return Paths::excel() + fileName; - } - void BestResultsExcel::header(bool ranks) - { - row = 0; - std::string message = ranks ? "Ranks for score " + score : "Best results for " + score; - worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), message.c_str(), styles["headerFirst"]); - // Body header - row = 3; - int col = 1; - writeString(row, 0, "Nº", "bodyHeader"); - writeString(row, 1, "Dataset", "bodyHeader"); - for (const auto& model : models) { - writeString(row, ++col, model.c_str(), "bodyHeader"); - } - } - void BestResultsExcel::body(bool ranks) - { - row = 4; - int i = 0; - json origin = table.begin().value(); - for (auto const& item : origin.items()) { - writeInt(row, 0, i++, "ints"); - writeString(row, 1, item.key().c_str(), "text"); - int col = 1; - for (const auto& model : models) { - double value = ranks ? ranksModels[item.key()][model] : table[model].at(item.key()).at(0).get(); - writeDouble(row, ++col, value, "result"); - } - ++row; - } - } - void BestResultsExcel::footer(bool ranks) - { - // Set Totals - writeString(row, 1, "Total", "bodyHeader"); - int col = 1; - for (const auto& model : models) { - std::stringstream oss; - auto colName = getColumnName(col + 1); - oss << "=SUM(" << colName << "5:" << colName << row << ")"; - worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]); - } - if (ranks) { - row++; - writeString(row, 1, "Average ranks", "bodyHeader"); - int col = 1; - for (const auto& model : models) { - auto colName = getColumnName(col + 1); - std::stringstream oss; - oss << "=SUM(" << colName << "5:" << colName << row - 1 << ")/" << datasets.size(); - worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]); - } - } - } - void BestResultsExcel::doFriedman() - { - worksheet = workbook_add_worksheet(workbook, "Friedman"); - std::vector columns_sizes = { 5, datasetNameSize }; - for (int i = 0; i < models.size(); ++i) { - columns_sizes.push_back(modelNameSize); - } - for (int i = 0; i < columns_sizes.size(); ++i) { - worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); - } - worksheet_merge_range(worksheet, 0, 0, 0, 1 + models.size(), "Friedman Test", styles["headerFirst"]); - row = 2; - Statistics stats(models, datasets, table, significance, false); - auto result = stats.friedmanTest(); - stats.postHocHolmTest(result); - auto friedmanResult = stats.getFriedmanResult(); - auto holmResult = stats.getHolmResult(); - worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]); - row += 2; - writeString(row, 1, "Friedman Q", "bodyHeader"); - writeDouble(row, 2, friedmanResult.statistic, "bodyHeader"); - row++; - writeString(row, 1, "Critical χ2 value", "bodyHeader"); - writeDouble(row, 2, friedmanResult.criticalValue, "bodyHeader"); - row++; - writeString(row, 1, "p-value", "bodyHeader"); - writeDouble(row, 2, friedmanResult.pvalue, "bodyHeader"); - writeString(row, 3, friedmanResult.reject ? "<" : ">", "bodyHeader"); - writeDouble(row, 4, significance, "bodyHeader"); - writeString(row, 5, friedmanResult.reject ? "Reject H0" : "Accept H0", "bodyHeader"); - row += 3; - worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Holm Test", styles["headerFirst"]); - row += 2; - worksheet_merge_range(worksheet, row, 0, row, 1 + models.size(), "Null hypothesis: H0 'There is no significant differences between the control model and the other models.'", styles["headerSmall"]); - row += 2; - std::string controlModel = "Control Model: " + holmResult.model; - worksheet_merge_range(worksheet, row, 1, row, 7, controlModel.c_str(), styles["bodyHeader_odd"]); - row++; - writeString(row, 1, "Model", "bodyHeader"); - writeString(row, 2, "p-value", "bodyHeader"); - writeString(row, 3, "Rank", "bodyHeader"); - writeString(row, 4, "Win", "bodyHeader"); - writeString(row, 5, "Tie", "bodyHeader"); - writeString(row, 6, "Loss", "bodyHeader"); - writeString(row, 7, "Reject H0", "bodyHeader"); - row++; - bool first = true; - for (const auto& item : holmResult.holmLines) { - writeString(row, 1, item.model, "text"); - if (first) { - // Control model info - first = false; - writeString(row, 2, "", "text"); - writeDouble(row, 3, item.rank, "result"); - writeString(row, 4, "", "text"); - writeString(row, 5, "", "text"); - writeString(row, 6, "", "text"); - writeString(row, 7, "", "textCentered"); - } else { - // Rest of the models info - writeDouble(row, 2, item.pvalue, "result"); - writeDouble(row, 3, item.rank, "result"); - writeInt(row, 4, item.wtl.win, "ints"); - writeInt(row, 5, item.wtl.tie, "ints"); - writeInt(row, 6, item.wtl.loss, "ints"); - writeString(row, 7, item.reject ? "Yes" : "No", "textCentered"); - } - row++; - } - } -} \ No newline at end of file diff --git a/src/Platform/BestResultsExcel.h b/src/Platform/BestResultsExcel.h deleted file mode 100644 index 1ab75d0..0000000 --- a/src/Platform/BestResultsExcel.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef BESTRESULTS_EXCEL_H -#define BESTRESULTS_EXCEL_H -#include "ExcelFile.h" -#include -#include -#include - -using json = nlohmann::json; - -namespace platform { - - class BestResultsExcel : ExcelFile { - public: - BestResultsExcel(const std::string& score, const std::vector& datasets); - ~BestResultsExcel(); - void reportAll(const std::vector& models, const json& table, const std::map>& ranks, bool friedman, double significance); - void reportSingle(const std::string& model, const std::string& fileName); - std::string getFileName(); - private: - void build(); - void header(bool ranks); - void body(bool ranks); - void footer(bool ranks); - void formatColumns(); - void doFriedman(); - void addConditionalFormat(std::string formula); - const std::string fileName = "BestResults.xlsx"; - std::string score; - std::vector models; - std::vector datasets; - json table; - std::map> ranksModels; - bool friedman; - double significance; - int modelNameSize = 12; // Min size of the column - int datasetNameSize = 25; // Min size of the column - }; -} -#endif //BESTRESULTS_EXCEL_H \ No newline at end of file diff --git a/src/Platform/BestScore.h b/src/Platform/BestScore.h deleted file mode 100644 index f1b552f..0000000 --- a/src/Platform/BestScore.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef BESTSCORE_H -#define BESTSCORE_H -#include -#include -#include -#include "DotEnv.h" -namespace platform { - class BestScore { - public: - static std::pair getScore(const std::string& metric) - { - static std::map, std::pair> data = { - {{"discretiz", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}}, - {{"odte", "accuracy"}, {"STree_default (linear-ovo)", 22.109799}}, - }; - auto env = platform::DotEnv(); - std::string experiment = env.get("experiment"); - try { - return data[{experiment, metric}]; - } - catch (...) { - return { "", 0.0 }; - } - } - }; -} - -#endif \ No newline at end of file diff --git a/src/Platform/CLocale.h b/src/Platform/CLocale.h deleted file mode 100644 index 4403562..0000000 --- a/src/Platform/CLocale.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef LOCALE_H -#define LOCALE_H -#include -#include -#include -namespace platform { - struct separation : std::numpunct { - char do_decimal_point() const { return ','; } - char do_thousands_sep() const { return '.'; } - std::string do_grouping() const { return "\03"; } - }; - class ConfigLocale { - public: - explicit ConfigLocale() - { - std::locale mylocale(std::cout.getloc(), new separation); - std::locale::global(mylocale); - std::cout.imbue(mylocale); - } - }; -} -#endif \ No newline at end of file diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt deleted file mode 100644 index 52336f1..0000000 --- a/src/Platform/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) -add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc HyperParameters.cc Folding.cc Datasets.cc Dataset.cc) -add_executable(b_list b_list.cc Datasets.cc Dataset.cc) -add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc) -add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) - -include_directories( - ${BayesNet_SOURCE_DIR}/src/BayesNet - ${BayesNet_SOURCE_DIR}/src/Platform - ${BayesNet_SOURCE_DIR}/src/PyClassifiers - ${BayesNet_SOURCE_DIR}/lib/Files - ${BayesNet_SOURCE_DIR}/lib/mdlp - ${BayesNet_SOURCE_DIR}/lib/argparse/include - ${BayesNet_SOURCE_DIR}/lib/json/include - ${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include - ${Python3_INCLUDE_DIRS} - ${MPI_CXX_INCLUDE_DIRS} - ${CMAKE_BINARY_DIR}/configured_files/include -) - -target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp) -target_link_libraries(b_grid BayesNet PyWrap ${MPI_CXX_LIBRARIES}) -target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}") -target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap) -target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) \ No newline at end of file diff --git a/src/Platform/Colors.h b/src/Platform/Colors.h deleted file mode 100644 index c6c8ef3..0000000 --- a/src/Platform/Colors.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef COLORS_H -#define COLORS_H -class Colors { -public: - static std::string MAGENTA() { return "\033[1;35m"; } - static std::string BLUE() { return "\033[1;34m"; } - static std::string CYAN() { return "\033[1;36m"; } - static std::string GREEN() { return "\033[1;32m"; } - static std::string YELLOW() { return "\033[1;33m"; } - static std::string RED() { return "\033[1;31m"; } - static std::string WHITE() { return "\033[1;37m"; } - static std::string IBLUE() { return "\033[0;94m"; } - static std::string RESET() { return "\033[0m"; } -}; -#endif // COLORS_H \ No newline at end of file diff --git a/src/Platform/CommandParser.cc b/src/Platform/CommandParser.cc deleted file mode 100644 index 5c58f3e..0000000 --- a/src/Platform/CommandParser.cc +++ /dev/null @@ -1,87 +0,0 @@ -#include "CommandParser.h" -#include -#include -#include -#include "Colors.h" -#include "Utils.h" - -namespace platform { - void CommandParser::messageError(const std::string& message) - { - std::cout << Colors::RED() << message << Colors::RESET() << std::endl; - } - std::pair CommandParser::parse(const std::string& color, const std::vector>& options, const char defaultCommand, const int maxIndex) - { - bool finished = false; - while (!finished) { - std::stringstream oss; - std::string line; - oss << color << "Choose option ("; - bool first = true; - for (auto& option : options) { - if (first) { - first = false; - } else { - oss << ", "; - } - oss << std::get(option) << "=" << std::get(option); - } - oss << "): "; - std::cout << oss.str(); - getline(std::cin, line); - std::cout << Colors::RESET(); - line = trim(line); - if (line.size() == 0) - continue; - if (all_of(line.begin(), line.end(), ::isdigit)) { - command = defaultCommand; - index = stoi(line); - if (index > maxIndex || index < 0) { - messageError("Index out of range"); - continue; - } - finished = true; - break; - } - bool found = false; - for (auto& option : options) { - if (line[0] == std::get(option)) { - found = true; - // it's a match - line.erase(line.begin()); - line = trim(line); - if (std::get(option)) { - // The option requires a value - if (line.size() == 0) { - messageError("Option " + std::get(option) + " requires a value"); - break; - } - try { - index = stoi(line); - if (index > maxIndex || index < 0) { - messageError("Index out of range"); - break; - } - } - catch (const std::invalid_argument& ia) { - messageError("Invalid value: " + line); - break; - } - } else { - if (line.size() > 0) { - messageError("option " + std::get(option) + " doesn't accept values"); - break; - } - } - command = std::get(option); - finished = true; - break; - } - } - if (!found) { - messageError("I don't know " + line); - } - } - return { command, index }; - } -} /* namespace platform */ \ No newline at end of file diff --git a/src/Platform/CommandParser.h b/src/Platform/CommandParser.h deleted file mode 100644 index c34554b..0000000 --- a/src/Platform/CommandParser.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef COMMAND_PARSER_H -#define COMMAND_PARSER_H -#include -#include -#include - -namespace platform { - class CommandParser { - public: - CommandParser() = default; - std::pair parse(const std::string& color, const std::vector>& options, const char defaultCommand, const int maxIndex); - char getCommand() const { return command; }; - int getIndex() const { return index; }; - private: - void messageError(const std::string& message); - char command; - int index; - }; -} /* namespace platform */ -#endif /* COMMAND_PARSER_H */ \ No newline at end of file diff --git a/src/Platform/Dataset.cc b/src/Platform/Dataset.cc deleted file mode 100644 index 0322249..0000000 --- a/src/Platform/Dataset.cc +++ /dev/null @@ -1,215 +0,0 @@ -#include "Dataset.h" -#include "ArffFiles.h" -#include -namespace platform { - Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType) - { - } - std::string Dataset::getName() const - { - return name; - } - std::string Dataset::getClassName() const - { - return className; - } - std::vector Dataset::getFeatures() const - { - if (loaded) { - return features; - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - int Dataset::getNFeatures() const - { - if (loaded) { - return n_features; - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - int Dataset::getNSamples() const - { - if (loaded) { - return n_samples; - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - std::map> Dataset::getStates() const - { - if (loaded) { - return states; - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - pair>&, std::vector&> Dataset::getVectors() - { - if (loaded) { - return { Xv, yv }; - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - pair>&, std::vector&> Dataset::getVectorsDiscretized() - { - if (loaded) { - return { Xd, yv }; - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - pair Dataset::getTensors() - { - if (loaded) { - buildTensors(); - return { X, y }; - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - void Dataset::load_csv() - { - ifstream file(path + "/" + name + ".csv"); - if (file.is_open()) { - std::string line; - getline(file, line); - std::vector tokens = split(line, ','); - features = std::vector(tokens.begin(), tokens.end() - 1); - if (className == "-1") { - className = tokens.back(); - } - for (auto i = 0; i < features.size(); ++i) { - Xv.push_back(std::vector()); - } - while (getline(file, line)) { - tokens = split(line, ','); - for (auto i = 0; i < features.size(); ++i) { - Xv[i].push_back(stof(tokens[i])); - } - yv.push_back(stoi(tokens.back())); - } - file.close(); - } else { - throw std::invalid_argument("Unable to open dataset file."); - } - } - void Dataset::computeStates() - { - for (int i = 0; i < features.size(); ++i) { - states[features[i]] = std::vector(*max_element(Xd[i].begin(), Xd[i].end()) + 1); - auto item = states.at(features[i]); - iota(begin(item), end(item), 0); - } - states[className] = std::vector(*max_element(yv.begin(), yv.end()) + 1); - iota(begin(states.at(className)), end(states.at(className)), 0); - } - void Dataset::load_arff() - { - auto arff = ArffFiles(); - arff.load(path + "/" + name + ".arff", className); - // Get Dataset X, y - Xv = arff.getX(); - yv = arff.getY(); - // Get className & Features - className = arff.getClassName(); - auto attributes = arff.getAttributes(); - transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; }); - } - std::vector tokenize(std::string line) - { - std::vector tokens; - for (auto i = 0; i < line.size(); ++i) { - if (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') { - std::string token = line.substr(0, i); - tokens.push_back(token); - line.erase(line.begin(), line.begin() + i + 1); - i = 0; - while (line[i] == ' ' || line[i] == '\t' || line[i] == '\n') - line.erase(line.begin(), line.begin() + i + 1); - } - } - if (line.size() > 0) { - tokens.push_back(line); - } - return tokens; - } - void Dataset::load_rdata() - { - ifstream file(path + "/" + name + "_R.dat"); - if (file.is_open()) { - std::string line; - getline(file, line); - line = ArffFiles::trim(line); - std::vector tokens = tokenize(line); - transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); }); - if (className == "-1") { - className = ArffFiles::trim(tokens.back()); - } - for (auto i = 0; i < features.size(); ++i) { - Xv.push_back(std::vector()); - } - while (getline(file, line)) { - tokens = tokenize(line); - // We have to skip the first token, which is the instance number. - for (auto i = 1; i < features.size() + 1; ++i) { - const float value = stof(tokens[i]); - Xv[i - 1].push_back(value); - } - yv.push_back(stoi(tokens.back())); - } - file.close(); - } else { - throw std::invalid_argument("Unable to open dataset file."); - } - } - void Dataset::load() - { - if (loaded) { - return; - } - if (fileType == CSV) { - load_csv(); - } else if (fileType == ARFF) { - load_arff(); - } else if (fileType == RDATA) { - load_rdata(); - } - if (discretize) { - Xd = discretizeDataset(Xv, yv); - computeStates(); - } - n_samples = Xv[0].size(); - n_features = Xv.size(); - loaded = true; - } - void Dataset::buildTensors() - { - if (discretize) { - X = torch::zeros({ static_cast(n_features), static_cast(n_samples) }, torch::kInt32); - } else { - X = torch::zeros({ static_cast(n_features), static_cast(n_samples) }, torch::kFloat32); - } - for (int i = 0; i < features.size(); ++i) { - if (discretize) { - X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32)); - } else { - X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32)); - } - } - y = torch::tensor(yv, torch::kInt32); - } - std::vector Dataset::discretizeDataset(std::vector& X, mdlp::labels_t& y) - { - std::vector Xd; - auto fimdlp = mdlp::CPPFImdlp(); - for (int i = 0; i < X.size(); i++) { - fimdlp.fit(X[i], y); - mdlp::labels_t& xd = fimdlp.transform(X[i]); - Xd.push_back(xd); - } - return Xd; - } -} \ No newline at end of file diff --git a/src/Platform/Dataset.h b/src/Platform/Dataset.h deleted file mode 100644 index dd75351..0000000 --- a/src/Platform/Dataset.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef DATASET_H -#define DATASET_H -#include -#include -#include -#include -#include "CPPFImdlp.h" -#include "Utils.h" -namespace platform { - enum fileType_t { CSV, ARFF, RDATA }; - class SourceData { - public: - SourceData(std::string source) - { - if (source == "Surcov") { - path = "datasets/"; - fileType = CSV; - } else if (source == "Arff") { - path = "datasets/"; - fileType = ARFF; - } else if (source == "Tanveer") { - path = "data/"; - fileType = RDATA; - } else { - throw std::invalid_argument("Unknown source."); - } - } - std::string getPath() - { - return path; - } - fileType_t getFileType() - { - return fileType; - } - private: - std::string path; - fileType_t fileType; - }; - class Dataset { - private: - std::string path; - std::string name; - fileType_t fileType; - std::string className; - int n_samples{ 0 }, n_features{ 0 }; - std::vector features; - std::map> states; - bool loaded; - bool discretize; - torch::Tensor X, y; - std::vector> Xv; - std::vector> Xd; - std::vector yv; - void buildTensors(); - void load_csv(); - void load_arff(); - void load_rdata(); - void computeStates(); - std::vector discretizeDataset(std::vector& X, mdlp::labels_t& y); - public: - Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; - explicit Dataset(const Dataset&); - std::string getName() const; - std::string getClassName() const; - std::vector getFeatures() const; - std::map> getStates() const; - std::pair>&, std::vector&> getVectors(); - std::pair>&, std::vector&> getVectorsDiscretized(); - std::pair getTensors(); - int getNFeatures() const; - int getNSamples() const; - void load(); - const bool inline isLoaded() const { return loaded; }; - }; -}; - -#endif \ No newline at end of file diff --git a/src/Platform/Datasets.cc b/src/Platform/Datasets.cc deleted file mode 100644 index af5457d..0000000 --- a/src/Platform/Datasets.cc +++ /dev/null @@ -1,129 +0,0 @@ -#include "Datasets.h" -#include -namespace platform { - void Datasets::load() - { - auto sd = SourceData(sfileType); - fileType = sd.getFileType(); - path = sd.getPath(); - ifstream catalog(path + "all.txt"); - if (catalog.is_open()) { - std::string line; - while (getline(catalog, line)) { - if (line.empty() || line[0] == '#') { - continue; - } - std::vector tokens = split(line, ','); - std::string name = tokens[0]; - std::string className; - if (tokens.size() == 1) { - className = "-1"; - } else { - className = tokens[1]; - } - datasets[name] = make_unique(path, name, className, discretize, fileType); - } - catalog.close(); - } else { - throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]"); - } - } - std::vector Datasets::getNames() - { - std::vector result; - transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; }); - return result; - } - std::vector Datasets::getFeatures(const std::string& name) const - { - if (datasets.at(name)->isLoaded()) { - return datasets.at(name)->getFeatures(); - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - map> Datasets::getStates(const std::string& name) const - { - if (datasets.at(name)->isLoaded()) { - return datasets.at(name)->getStates(); - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - void Datasets::loadDataset(const std::string& name) const - { - if (datasets.at(name)->isLoaded()) { - return; - } else { - datasets.at(name)->load(); - } - } - std::string Datasets::getClassName(const std::string& name) const - { - if (datasets.at(name)->isLoaded()) { - return datasets.at(name)->getClassName(); - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - int Datasets::getNSamples(const std::string& name) const - { - if (datasets.at(name)->isLoaded()) { - return datasets.at(name)->getNSamples(); - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - int Datasets::getNClasses(const std::string& name) - { - if (datasets.at(name)->isLoaded()) { - auto className = datasets.at(name)->getClassName(); - if (discretize) { - auto states = getStates(name); - return states.at(className).size(); - } - auto [Xv, yv] = getVectors(name); - return *std::max_element(yv.begin(), yv.end()) + 1; - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - std::vector Datasets::getClassesCounts(const std::string& name) const - { - if (datasets.at(name)->isLoaded()) { - auto [Xv, yv] = datasets.at(name)->getVectors(); - std::vector counts(*std::max_element(yv.begin(), yv.end()) + 1); - for (auto y : yv) { - counts[y]++; - } - return counts; - } else { - throw std::invalid_argument("Dataset not loaded."); - } - } - pair>&, std::vector&> Datasets::getVectors(const std::string& name) - { - if (!datasets[name]->isLoaded()) { - datasets[name]->load(); - } - return datasets[name]->getVectors(); - } - pair>&, std::vector&> Datasets::getVectorsDiscretized(const std::string& name) - { - if (!datasets[name]->isLoaded()) { - datasets[name]->load(); - } - return datasets[name]->getVectorsDiscretized(); - } - pair Datasets::getTensors(const std::string& name) - { - if (!datasets[name]->isLoaded()) { - datasets[name]->load(); - } - return datasets[name]->getTensors(); - } - bool Datasets::isDataset(const std::string& name) const - { - return datasets.find(name) != datasets.end(); - } -} \ No newline at end of file diff --git a/src/Platform/Datasets.h b/src/Platform/Datasets.h deleted file mode 100644 index 4ead616..0000000 --- a/src/Platform/Datasets.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef DATASETS_H -#define DATASETS_H -#include "Dataset.h" -namespace platform { - class Datasets { - private: - std::string path; - fileType_t fileType; - std::string sfileType; - std::map> datasets; - bool discretize; - void load(); // Loads the list of datasets - public: - explicit Datasets(bool discretize, std::string sfileType) : discretize(discretize), sfileType(sfileType) { load(); }; - std::vector getNames(); - std::vector getFeatures(const std::string& name) const; - int getNSamples(const std::string& name) const; - std::string getClassName(const std::string& name) const; - int getNClasses(const std::string& name); - std::vector getClassesCounts(const std::string& name) const; - std::map> getStates(const std::string& name) const; - std::pair>&, std::vector&> getVectors(const std::string& name); - std::pair>&, std::vector&> getVectorsDiscretized(const std::string& name); - std::pair getTensors(const std::string& name); - bool isDataset(const std::string& name) const; - void loadDataset(const std::string& name) const; - }; -}; - -#endif \ No newline at end of file diff --git a/src/Platform/DotEnv.h b/src/Platform/DotEnv.h deleted file mode 100644 index 8b7a0cf..0000000 --- a/src/Platform/DotEnv.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef DOTENV_H -#define DOTENV_H -#include -#include -#include -#include -#include -#include -#include "Utils.h" - -//#include "Dataset.h" -namespace platform { - class DotEnv { - private: - std::map env; - public: - DotEnv() - { - std::ifstream file(".env"); - if (!file.is_open()) { - std::cerr << "File .env not found" << std::endl; - exit(1); - } - std::string line; - while (std::getline(file, line)) { - line = trim(line); - if (line.empty() || line[0] == '#') { - continue; - } - std::istringstream iss(line); - std::string key, value; - if (std::getline(iss, key, '=') && std::getline(iss, value)) { - env[key] = value; - } - } - } - std::string get(const std::string& key) - { - return env.at(key); - } - std::vector getSeeds() - { - auto seeds = std::vector(); - auto seeds_str = env["seeds"]; - seeds_str = trim(seeds_str); - seeds_str = seeds_str.substr(1, seeds_str.size() - 2); - auto seeds_str_split = split(seeds_str, ','); - transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) { - return stoi(str); - }); - return seeds; - } - }; -} -#endif \ No newline at end of file diff --git a/src/Platform/ExcelFile.cc b/src/Platform/ExcelFile.cc deleted file mode 100644 index 3149480..0000000 --- a/src/Platform/ExcelFile.cc +++ /dev/null @@ -1,168 +0,0 @@ -#include "ExcelFile.h" - -namespace platform { - ExcelFile::ExcelFile() - { - setDefault(); - } - ExcelFile::ExcelFile(lxw_workbook* workbook) : workbook(workbook) - { - setDefault(); - } - ExcelFile::ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet) : workbook(workbook), worksheet(worksheet) - { - setDefault(); - } - void ExcelFile::setDefault() - { - normalSize = 14; //font size for report body - row = 0; - colorTitle = 0xB1A0C7; - colorOdd = 0xDCE6F1; - colorEven = 0xFDE9D9; - } - - lxw_workbook* ExcelFile::getWorkbook() - { - return workbook; - } - void ExcelFile::setProperties(std::string title) - { - char line[title.size() + 1]; - strcpy(line, title.c_str()); - lxw_doc_properties properties = { - .title = line, - .subject = (char*)"Machine learning results", - .author = (char*)"Ricardo Montañana Gómez", - .manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta", - .company = (char*)"UCLM", - .comments = (char*)"Created with libxlsxwriter and c++", - }; - workbook_set_properties(workbook, &properties); - } - lxw_format* ExcelFile::efectiveStyle(const std::string& style) - { - lxw_format* efectiveStyle = NULL; - if (style != "") { - std::string suffix = row % 2 ? "_odd" : "_even"; - try { - efectiveStyle = styles.at(style + suffix); - } - catch (const std::out_of_range& oor) { - try { - efectiveStyle = styles.at(style); - } - catch (const std::out_of_range& oor) { - throw std::invalid_argument("Style " + style + " not found"); - } - } - } - return efectiveStyle; - } - void ExcelFile::writeString(int row, int col, const std::string& text, const std::string& style) - { - worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style)); - } - void ExcelFile::writeInt(int row, int col, const int number, const std::string& style) - { - worksheet_write_number(worksheet, row, col, number, efectiveStyle(style)); - } - void ExcelFile::writeDouble(int row, int col, const double number, const std::string& style) - { - worksheet_write_number(worksheet, row, col, number, efectiveStyle(style)); - } - void ExcelFile::addColor(lxw_format* style, bool odd) - { - uint32_t efectiveColor = odd ? colorEven : colorOdd; - format_set_bg_color(style, lxw_color_t(efectiveColor)); - } - void ExcelFile::createStyle(const std::string& name, lxw_format* style, bool odd) - { - addColor(style, odd); - if (name == "textCentered") { - format_set_align(style, LXW_ALIGN_CENTER); - format_set_font_size(style, normalSize); - format_set_border(style, LXW_BORDER_THIN); - } else if (name == "text") { - format_set_font_size(style, normalSize); - format_set_border(style, LXW_BORDER_THIN); - } else if (name == "bodyHeader") { - format_set_bold(style); - format_set_font_size(style, normalSize); - format_set_align(style, LXW_ALIGN_CENTER); - format_set_align(style, LXW_ALIGN_VERTICAL_CENTER); - format_set_border(style, LXW_BORDER_THIN); - format_set_bg_color(style, lxw_color_t(colorTitle)); - } else if (name == "result") { - format_set_font_size(style, normalSize); - format_set_border(style, LXW_BORDER_THIN); - format_set_num_format(style, "0.0000000"); - } else if (name == "time") { - format_set_font_size(style, normalSize); - format_set_border(style, LXW_BORDER_THIN); - format_set_num_format(style, "#,##0.000000"); - } else if (name == "ints") { - format_set_font_size(style, normalSize); - format_set_num_format(style, "###,##0"); - format_set_border(style, LXW_BORDER_THIN); - } else if (name == "floats") { - format_set_border(style, LXW_BORDER_THIN); - format_set_font_size(style, normalSize); - format_set_num_format(style, "#,##0.00"); - } - } - - void ExcelFile::createFormats() - { - auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" }; - lxw_format* style; - for (std::string name : styleNames) { - lxw_format* style = workbook_add_format(workbook); - style = workbook_add_format(workbook); - createStyle(name, style, true); - styles[name + "_odd"] = style; - style = workbook_add_format(workbook); - createStyle(name, style, false); - styles[name + "_even"] = style; - } - - // Header 1st line - lxw_format* headerFirst = workbook_add_format(workbook); - format_set_bold(headerFirst); - format_set_font_size(headerFirst, 18); - format_set_align(headerFirst, LXW_ALIGN_CENTER); - format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER); - format_set_border(headerFirst, LXW_BORDER_THIN); - format_set_bg_color(headerFirst, lxw_color_t(colorTitle)); - - // Header rest - lxw_format* headerRest = workbook_add_format(workbook); - format_set_bold(headerRest); - format_set_align(headerRest, LXW_ALIGN_CENTER); - format_set_font_size(headerRest, 16); - format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER); - format_set_border(headerRest, LXW_BORDER_THIN); - format_set_bg_color(headerRest, lxw_color_t(colorOdd)); - - // Header small - lxw_format* headerSmall = workbook_add_format(workbook); - format_set_bold(headerSmall); - format_set_align(headerSmall, LXW_ALIGN_LEFT); - format_set_font_size(headerSmall, 12); - format_set_border(headerSmall, LXW_BORDER_THIN); - format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER); - format_set_bg_color(headerSmall, lxw_color_t(colorOdd)); - - // Summary style - lxw_format* summaryStyle = workbook_add_format(workbook); - format_set_bold(summaryStyle); - format_set_font_size(summaryStyle, 16); - format_set_border(summaryStyle, LXW_BORDER_THIN); - format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER); - - styles["headerFirst"] = headerFirst; - styles["headerRest"] = headerRest; - styles["headerSmall"] = headerSmall; - styles["summaryStyle"] = summaryStyle; - } -} \ No newline at end of file diff --git a/src/Platform/ExcelFile.h b/src/Platform/ExcelFile.h deleted file mode 100644 index efc115f..0000000 --- a/src/Platform/ExcelFile.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef EXCELFILE_H -#define EXCELFILE_H -#include -#include -#include -#include "xlsxwriter.h" - -namespace platform { - struct separated : std::numpunct { - char do_decimal_point() const { return ','; } - - char do_thousands_sep() const { return '.'; } - - std::string do_grouping() const { return "\03"; } - }; - class ExcelFile { - public: - ExcelFile(); - ExcelFile(lxw_workbook* workbook); - ExcelFile(lxw_workbook* workbook, lxw_worksheet* worksheet); - lxw_workbook* getWorkbook(); - protected: - void setProperties(std::string title); - void writeString(int row, int col, const std::string& text, const std::string& style = ""); - void writeInt(int row, int col, const int number, const std::string& style = ""); - void writeDouble(int row, int col, const double number, const std::string& style = ""); - void createFormats(); - void createStyle(const std::string& name, lxw_format* style, bool odd); - void addColor(lxw_format* style, bool odd); - lxw_format* efectiveStyle(const std::string& name); - lxw_workbook* workbook; - lxw_worksheet* worksheet; - std::map styles; - int row; - int normalSize; //font size for report body - uint32_t colorTitle; - uint32_t colorOdd; - uint32_t colorEven; - private: - void setDefault(); - }; -} -#endif // !EXCELFILE_H \ No newline at end of file diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc deleted file mode 100644 index 1574f73..0000000 --- a/src/Platform/Experiment.cc +++ /dev/null @@ -1,226 +0,0 @@ -#include -#include "Experiment.h" -#include "Datasets.h" -#include "Models.h" -#include "ReportConsole.h" -#include "Paths.h" -namespace platform { - using json = nlohmann::json; - std::string get_date() - { - time_t rawtime; - tm* timeinfo; - time(&rawtime); - timeinfo = std::localtime(&rawtime); - std::ostringstream oss; - oss << std::put_time(timeinfo, "%Y-%m-%d"); - return oss.str(); - } - std::string get_time() - { - time_t rawtime; - tm* timeinfo; - time(&rawtime); - timeinfo = std::localtime(&rawtime); - std::ostringstream oss; - oss << std::put_time(timeinfo, "%H:%M:%S"); - return oss.str(); - } - std::string Experiment::get_file_name() - { - std::string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json"; - return result; - } - - json Experiment::build_json() - { - json result; - result["title"] = title; - result["date"] = get_date(); - result["time"] = get_time(); - result["model"] = model; - result["version"] = model_version; - result["platform"] = platform; - result["score_name"] = score_name; - result["language"] = language; - result["language_version"] = language_version; - result["discretized"] = discretized; - result["stratified"] = stratified; - result["folds"] = nfolds; - result["seeds"] = randomSeeds; - result["duration"] = duration; - result["results"] = json::array(); - for (const auto& r : results) { - json j; - j["dataset"] = r.getDataset(); - j["hyperparameters"] = r.getHyperparameters(); - j["samples"] = r.getSamples(); - j["features"] = r.getFeatures(); - j["classes"] = r.getClasses(); - j["score_train"] = r.getScoreTrain(); - j["score_test"] = r.getScoreTest(); - j["score"] = r.getScoreTest(); - j["score_std"] = r.getScoreTestStd(); - j["score_train_std"] = r.getScoreTrainStd(); - j["score_test_std"] = r.getScoreTestStd(); - j["train_time"] = r.getTrainTime(); - j["train_time_std"] = r.getTrainTimeStd(); - j["test_time"] = r.getTestTime(); - j["test_time_std"] = r.getTestTimeStd(); - j["time"] = r.getTestTime() + r.getTrainTime(); - j["time_std"] = r.getTestTimeStd() + r.getTrainTimeStd(); - j["scores_train"] = r.getScoresTrain(); - j["scores_test"] = r.getScoresTest(); - j["times_train"] = r.getTimesTrain(); - j["times_test"] = r.getTimesTest(); - j["nodes"] = r.getNodes(); - j["leaves"] = r.getLeaves(); - j["depth"] = r.getDepth(); - result["results"].push_back(j); - } - return result; - } - void Experiment::save(const std::string& path) - { - json data = build_json(); - ofstream file(path + "/" + get_file_name()); - file << data; - file.close(); - } - - void Experiment::report() - { - json data = build_json(); - ReportConsole report(data); - report.show(); - } - - void Experiment::show() - { - json data = build_json(); - std::cout << data.dump(4) << std::endl; - } - - void Experiment::go(std::vector filesToProcess, bool quiet) - { - std::cout << "*** Starting experiment: " << title << " ***" << std::endl; - for (auto fileName : filesToProcess) { - std::cout << "- " << setw(20) << left << fileName << " " << right << flush; - cross_validation(fileName, quiet); - std::cout << std::endl; - } - } - - std::string getColor(bayesnet::status_t status) - { - switch (status) { - case bayesnet::NORMAL: - return Colors::GREEN(); - case bayesnet::WARNING: - return Colors::YELLOW(); - case bayesnet::ERROR: - return Colors::RED(); - default: - return Colors::RESET(); - } - } - - void showProgress(int fold, const std::string& color, const std::string& phase) - { - std::string prefix = phase == "a" ? "" : "\b\b\b\b"; - std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush; - - } - void Experiment::cross_validation(const std::string& fileName, bool quiet) - { - auto datasets = Datasets(discretized, Paths::datasets()); - // Get dataset - auto [X, y] = datasets.getTensors(fileName); - auto states = datasets.getStates(fileName); - auto features = datasets.getFeatures(fileName); - auto samples = datasets.getNSamples(fileName); - auto className = datasets.getClassName(fileName); - if (!quiet) { - std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush; - } - // Prepare Result - auto result = Result(); - auto [values, counts] = at::_unique(y); - result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0)); - result.setHyperparameters(hyperparameters.get(fileName)); - // Initialize results std::vectors - int nResults = nfolds * static_cast(randomSeeds.size()); - auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64); - auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64); - auto train_time = torch::zeros({ nResults }, torch::kFloat64); - auto test_time = torch::zeros({ nResults }, torch::kFloat64); - auto nodes = torch::zeros({ nResults }, torch::kFloat64); - auto edges = torch::zeros({ nResults }, torch::kFloat64); - auto num_states = torch::zeros({ nResults }, torch::kFloat64); - Timer train_timer, test_timer; - int item = 0; - for (auto seed : randomSeeds) { - if (!quiet) - std::cout << "(" << seed << ") doing Fold: " << flush; - Fold* fold; - if (stratified) - fold = new StratifiedKFold(nfolds, y, seed); - else - fold = new KFold(nfolds, y.size(0), seed); - for (int nfold = 0; nfold < nfolds; nfold++) { - auto clf = Models::instance()->create(model); - setModelVersion(clf->getVersion()); - auto valid = clf->getValidHyperparameters(); - hyperparameters.check(valid, fileName); - clf->setHyperparameters(hyperparameters.get(fileName)); - // Split train - test dataset - train_timer.start(); - auto [train, test] = fold->getFold(nfold); - auto train_t = torch::tensor(train); - auto test_t = torch::tensor(test); - auto X_train = X.index({ "...", train_t }); - auto y_train = y.index({ train_t }); - auto X_test = X.index({ "...", test_t }); - auto y_test = y.index({ test_t }); - if (!quiet) - showProgress(nfold + 1, getColor(clf->getStatus()), "a"); - // Train model - clf->fit(X_train, y_train, features, className, states); - if (!quiet) - showProgress(nfold + 1, getColor(clf->getStatus()), "b"); - nodes[item] = clf->getNumberOfNodes(); - edges[item] = clf->getNumberOfEdges(); - num_states[item] = clf->getNumberOfStates(); - train_time[item] = train_timer.getDuration(); - // Score train - auto accuracy_train_value = clf->score(X_train, y_train); - // Test model - if (!quiet) - showProgress(nfold + 1, getColor(clf->getStatus()), "c"); - test_timer.start(); - auto accuracy_test_value = clf->score(X_test, y_test); - test_time[item] = test_timer.getDuration(); - accuracy_train[item] = accuracy_train_value; - accuracy_test[item] = accuracy_test_value; - if (!quiet) - std::cout << "\b\b\b, " << flush; - // Store results and times in std::vector - result.addScoreTrain(accuracy_train_value); - result.addScoreTest(accuracy_test_value); - result.addTimeTrain(train_time[item].item()); - result.addTimeTest(test_time[item].item()); - item++; - } - if (!quiet) - std::cout << "end. " << flush; - delete fold; - } - result.setScoreTest(torch::mean(accuracy_test).item()).setScoreTrain(torch::mean(accuracy_train).item()); - result.setScoreTestStd(torch::std(accuracy_test).item()).setScoreTrainStd(torch::std(accuracy_train).item()); - result.setTrainTime(torch::mean(train_time).item()).setTestTime(torch::mean(test_time).item()); - result.setTestTimeStd(torch::std(test_time).item()).setTrainTimeStd(torch::std(train_time).item()); - result.setNodes(torch::mean(nodes).item()).setLeaves(torch::mean(edges).item()).setDepth(torch::mean(num_states).item()); - result.setDataset(fileName); - addResult(result); - } -} \ No newline at end of file diff --git a/src/Platform/Experiment.h b/src/Platform/Experiment.h deleted file mode 100644 index b7aeda6..0000000 --- a/src/Platform/Experiment.h +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef EXPERIMENT_H -#define EXPERIMENT_H -#include -#include -#include -#include "Folding.h" -#include "BaseClassifier.h" -#include "HyperParameters.h" -#include "TAN.h" -#include "KDB.h" -#include "AODE.h" -#include "Timer.h" - -namespace platform { - using json = nlohmann::json; - class Result { - private: - std::string dataset, model_version; - json hyperparameters; - int samples{ 0 }, features{ 0 }, classes{ 0 }; - double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 }; - float nodes{ 0 }, leaves{ 0 }, depth{ 0 }; - std::vector scores_train, scores_test, times_train, times_test; - public: - Result() = default; - Result& setDataset(const std::string& dataset) { this->dataset = dataset; return *this; } - Result& setHyperparameters(const json& hyperparameters) { this->hyperparameters = hyperparameters; return *this; } - Result& setSamples(int samples) { this->samples = samples; return *this; } - Result& setFeatures(int features) { this->features = features; return *this; } - Result& setClasses(int classes) { this->classes = classes; return *this; } - Result& setScoreTrain(double score) { this->score_train = score; return *this; } - Result& setScoreTest(double score) { this->score_test = score; return *this; } - Result& setScoreTrainStd(double score_std) { this->score_train_std = score_std; return *this; } - Result& setScoreTestStd(double score_std) { this->score_test_std = score_std; return *this; } - Result& setTrainTime(double train_time) { this->train_time = train_time; return *this; } - Result& setTrainTimeStd(double train_time_std) { this->train_time_std = train_time_std; return *this; } - Result& setTestTime(double test_time) { this->test_time = test_time; return *this; } - Result& setTestTimeStd(double test_time_std) { this->test_time_std = test_time_std; return *this; } - Result& setNodes(float nodes) { this->nodes = nodes; return *this; } - Result& setLeaves(float leaves) { this->leaves = leaves; return *this; } - Result& setDepth(float depth) { this->depth = depth; return *this; } - Result& addScoreTrain(double score) { scores_train.push_back(score); return *this; } - Result& addScoreTest(double score) { scores_test.push_back(score); return *this; } - Result& addTimeTrain(double time) { times_train.push_back(time); return *this; } - Result& addTimeTest(double time) { times_test.push_back(time); return *this; } - const float get_score_train() const { return score_train; } - float get_score_test() { return score_test; } - const std::string& getDataset() const { return dataset; } - const json& getHyperparameters() const { return hyperparameters; } - const int getSamples() const { return samples; } - const int getFeatures() const { return features; } - const int getClasses() const { return classes; } - const double getScoreTrain() const { return score_train; } - const double getScoreTest() const { return score_test; } - const double getScoreTrainStd() const { return score_train_std; } - const double getScoreTestStd() const { return score_test_std; } - const double getTrainTime() const { return train_time; } - const double getTrainTimeStd() const { return train_time_std; } - const double getTestTime() const { return test_time; } - const double getTestTimeStd() const { return test_time_std; } - const float getNodes() const { return nodes; } - const float getLeaves() const { return leaves; } - const float getDepth() const { return depth; } - const std::vector& getScoresTrain() const { return scores_train; } - const std::vector& getScoresTest() const { return scores_test; } - const std::vector& getTimesTrain() const { return times_train; } - const std::vector& getTimesTest() const { return times_test; } - }; - class Experiment { - public: - Experiment() = default; - Experiment& setTitle(const std::string& title) { this->title = title; return *this; } - Experiment& setModel(const std::string& model) { this->model = model; return *this; } - Experiment& setPlatform(const std::string& platform) { this->platform = platform; return *this; } - Experiment& setScoreName(const std::string& score_name) { this->score_name = score_name; return *this; } - Experiment& setModelVersion(const std::string& model_version) { this->model_version = model_version; return *this; } - Experiment& setLanguage(const std::string& language) { this->language = language; return *this; } - Experiment& setLanguageVersion(const std::string& language_version) { this->language_version = language_version; return *this; } - Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; } - Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; } - Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; } - Experiment& addResult(Result result) { results.push_back(result); return *this; } - Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; } - Experiment& setDuration(float duration) { this->duration = duration; return *this; } - Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; } - std::string get_file_name(); - void save(const std::string& path); - void cross_validation(const std::string& fileName, bool quiet); - void go(std::vector filesToProcess, bool quiet); - void show(); - void report(); - private: - std::string title, model, platform, score_name, model_version, language_version, language; - bool discretized{ false }, stratified{ false }; - std::vector results; - std::vector randomSeeds; - HyperParameters hyperparameters; - int nfolds{ 0 }; - float duration{ 0 }; - json build_json(); - }; -} -#endif \ No newline at end of file diff --git a/src/Platform/Folding.cc b/src/Platform/Folding.cc deleted file mode 100644 index 05f5923..0000000 --- a/src/Platform/Folding.cc +++ /dev/null @@ -1,104 +0,0 @@ -#include "Folding.h" -#include -#include -namespace platform { - Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed) - { - std::random_device rd; - random_seed = std::default_random_engine(seed == -1 ? rd() : seed); - std::srand(seed == -1 ? time(0) : seed); - } - KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(std::vector(n)) - { - std::iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1 - shuffle(indices.begin(), indices.end(), random_seed); - } - std::pair, std::vector> KFold::getFold(int nFold) - { - if (nFold >= k || nFold < 0) { - throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")"); - } - int nTest = n / k; - auto train = std::vector(); - auto test = std::vector(); - for (int i = 0; i < n; i++) { - if (i >= nTest * nFold && i < nTest * (nFold + 1)) { - test.push_back(indices[i]); - } else { - train.push_back(indices[i]); - } - } - return { train, test }; - } - StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed) - { - n = y.numel(); - this->y = std::vector(y.data_ptr(), y.data_ptr() + n); - build(); - } - StratifiedKFold::StratifiedKFold(int k, const std::vector& y, int seed) - : Fold(k, y.size(), seed) - { - this->y = y; - n = y.size(); - build(); - } - void StratifiedKFold::build() - { - stratified_indices = std::vector>(k); - int fold_size = n / k; - - // Compute class counts and indices - auto class_indices = std::map>(); - std::vector class_counts(*max_element(y.begin(), y.end()) + 1, 0); - for (auto i = 0; i < n; ++i) { - class_counts[y[i]]++; - class_indices[y[i]].push_back(i); - } - // Shuffle class indices - for (auto& [cls, indices] : class_indices) { - shuffle(indices.begin(), indices.end(), random_seed); - } - // Assign indices to folds - for (auto label = 0; label < class_counts.size(); ++label) { - auto num_samples_to_take = class_counts.at(label) / k; - if (num_samples_to_take == 0) { - std::cerr << "Warning! The number of samples in class " << label << " (" << class_counts.at(label) - << ") is less than the number of folds (" << k << ")." << std::endl; - faulty = true; - continue; - } - auto remainder_samples_to_take = class_counts[label] % k; - for (auto fold = 0; fold < k; ++fold) { - auto it = next(class_indices[label].begin(), num_samples_to_take); - move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ## - class_indices[label].erase(class_indices[label].begin(), it); - } - auto chosen = std::vector(k, false); - while (remainder_samples_to_take > 0) { - int fold = (rand() % static_cast(k)); - if (chosen.at(fold)) { - continue; - } - chosen[fold] = true; - auto it = next(class_indices[label].begin(), 1); - stratified_indices[fold].push_back(*class_indices[label].begin()); - class_indices[label].erase(class_indices[label].begin(), it); - remainder_samples_to_take--; - } - } - } - std::pair, std::vector> StratifiedKFold::getFold(int nFold) - { - if (nFold >= k || nFold < 0) { - throw std::out_of_range("nFold (" + std::to_string(nFold) + ") must be less than k (" + std::to_string(k) + ")"); - } - std::vector test_indices = stratified_indices[nFold]; - std::vector train_indices; - for (int i = 0; i < k; ++i) { - if (i == nFold) continue; - train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end()); - } - return { train_indices, test_indices }; - } -} \ No newline at end of file diff --git a/src/Platform/Folding.h b/src/Platform/Folding.h deleted file mode 100644 index de3bd3a..0000000 --- a/src/Platform/Folding.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef FOLDING_H -#define FOLDING_H -#include -#include -#include -namespace platform { - class Fold { - protected: - int k; - int n; - int seed; - std::default_random_engine random_seed; - public: - Fold(int k, int n, int seed = -1); - virtual std::pair, std::vector> getFold(int nFold) = 0; - virtual ~Fold() = default; - int getNumberOfFolds() { return k; } - }; - class KFold : public Fold { - private: - std::vector indices; - public: - KFold(int k, int n, int seed = -1); - std::pair, std::vector> getFold(int nFold) override; - }; - class StratifiedKFold : public Fold { - private: - std::vector y; - std::vector> stratified_indices; - void build(); - bool faulty = false; // Only true if the number of samples of any class is less than the number of folds. - public: - StratifiedKFold(int k, const std::vector& y, int seed = -1); - StratifiedKFold(int k, torch::Tensor& y, int seed = -1); - std::pair, std::vector> getFold(int nFold) override; - bool isFaulty() { return faulty; } - }; -} -#endif \ No newline at end of file diff --git a/src/Platform/GridData.cc b/src/Platform/GridData.cc deleted file mode 100644 index e93ee17..0000000 --- a/src/Platform/GridData.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include "GridData.h" -#include - -namespace platform { - GridData::GridData(const std::string& fileName) - { - json grid_file; - std::ifstream resultData(fileName); - if (resultData.is_open()) { - grid_file = json::parse(resultData); - } else { - throw std::invalid_argument("Unable to open input file. [" + fileName + "]"); - } - for (const auto& item : grid_file.items()) { - auto key = item.key(); - auto value = item.value(); - grid[key] = value; - } - - } - int GridData::computeNumCombinations(const json& line) - { - int numCombinations = 1; - for (const auto& item : line.items()) { - numCombinations *= item.value().size(); - } - return numCombinations; - } - int GridData::getNumCombinations(const std::string& dataset) - { - int numCombinations = 0; - auto selected = decide_dataset(dataset); - for (const auto& line : grid.at(selected)) { - numCombinations += computeNumCombinations(line); - } - return numCombinations; - } - json GridData::generateCombinations(json::iterator index, const json::iterator last, std::vector& output, json currentCombination) - { - if (index == last) { - // If we reached the end of input, store the current combination - output.push_back(currentCombination); - return currentCombination; - } - const auto& key = index.key(); - const auto& values = index.value(); - for (const auto& value : values) { - auto combination = currentCombination; - combination[key] = value; - json::iterator nextIndex = index; - generateCombinations(++nextIndex, last, output, combination); - } - return currentCombination; - } - std::vector GridData::getGrid(const std::string& dataset) - { - auto selected = decide_dataset(dataset); - auto result = std::vector(); - for (json line : grid.at(selected)) { - generateCombinations(line.begin(), line.end(), result, json({})); - } - return result; - } - json& GridData::getInputGrid(const std::string& dataset) - { - auto selected = decide_dataset(dataset); - return grid.at(selected); - } - std::string GridData::decide_dataset(const std::string& dataset) - { - if (grid.find(dataset) != grid.end()) - return dataset; - return ALL_DATASETS; - } -} /* namespace platform */ \ No newline at end of file diff --git a/src/Platform/GridData.h b/src/Platform/GridData.h deleted file mode 100644 index 0156453..0000000 --- a/src/Platform/GridData.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef GRIDDATA_H -#define GRIDDATA_H -#include -#include -#include -#include - -namespace platform { - using json = nlohmann::json; - const std::string ALL_DATASETS = "all"; - class GridData { - public: - explicit GridData(const std::string& fileName); - ~GridData() = default; - std::vector getGrid(const std::string& dataset = ALL_DATASETS); - int getNumCombinations(const std::string& dataset = ALL_DATASETS); - json& getInputGrid(const std::string& dataset = ALL_DATASETS); - std::map& getGridFile() { return grid; } - private: - std::string decide_dataset(const std::string& dataset); - json generateCombinations(json::iterator index, const json::iterator last, std::vector& output, json currentCombination); - int computeNumCombinations(const json& line); - std::map grid; - }; -} /* namespace platform */ -#endif /* GRIDDATA_H */ \ No newline at end of file diff --git a/src/Platform/GridSearch.cc b/src/Platform/GridSearch.cc deleted file mode 100644 index 3e9ae3d..0000000 --- a/src/Platform/GridSearch.cc +++ /dev/null @@ -1,441 +0,0 @@ -#include -#include -#include -#include "GridSearch.h" -#include "Models.h" -#include "Paths.h" -#include "Folding.h" -#include "Colors.h" - -namespace platform { - std::string get_date() - { - time_t rawtime; - tm* timeinfo; - time(&rawtime); - timeinfo = std::localtime(&rawtime); - std::ostringstream oss; - oss << std::put_time(timeinfo, "%Y-%m-%d"); - return oss.str(); - } - std::string get_time() - { - time_t rawtime; - tm* timeinfo; - time(&rawtime); - timeinfo = std::localtime(&rawtime); - std::ostringstream oss; - oss << std::put_time(timeinfo, "%H:%M:%S"); - return oss.str(); - } - std::string get_color_rank(int rank) - { - auto colors = { Colors::WHITE(), Colors::RED(), Colors::GREEN(), Colors::BLUE(), Colors::MAGENTA(), Colors::CYAN() }; - return *(colors.begin() + rank % colors.size()); - } - GridSearch::GridSearch(struct ConfigGrid& config) : config(config) - { - } - json GridSearch::loadResults() - { - std::ifstream file(Paths::grid_output(config.model)); - if (file.is_open()) { - return json::parse(file); - } - return json(); - } - std::vector GridSearch::filterDatasets(Datasets& datasets) const - { - // Load datasets - auto datasets_names = datasets.getNames(); - if (config.continue_from != NO_CONTINUE()) { - // Continue previous execution: - if (std::find(datasets_names.begin(), datasets_names.end(), config.continue_from) == datasets_names.end()) { - throw std::invalid_argument("Dataset " + config.continue_from + " not found"); - } - // Remove datasets already processed - std::vector::iterator it = datasets_names.begin(); - while (it != datasets_names.end()) { - if (*it != config.continue_from) { - it = datasets_names.erase(it); - } else { - if (config.only) - ++it; - else - break; - } - } - } - // Exclude datasets - for (const auto& name : config.excluded) { - auto dataset = name.get(); - auto it = std::find(datasets_names.begin(), datasets_names.end(), dataset); - if (it == datasets_names.end()) { - throw std::invalid_argument("Dataset " + dataset + " already excluded or doesn't exist!"); - } - datasets_names.erase(it); - } - return datasets_names; - } - json GridSearch::build_tasks_mpi(int rank) - { - auto tasks = json::array(); - auto grid = GridData(Paths::grid_input(config.model)); - auto datasets = Datasets(false, Paths::datasets()); - auto all_datasets = datasets.getNames(); - auto datasets_names = filterDatasets(datasets); - for (int idx_dataset = 0; idx_dataset < datasets_names.size(); ++idx_dataset) { - auto dataset = datasets_names[idx_dataset]; - for (const auto& seed : config.seeds) { - auto combinations = grid.getGrid(dataset); - for (int n_fold = 0; n_fold < config.n_folds; n_fold++) { - json task = { - { "dataset", dataset }, - { "idx_dataset", idx_dataset}, - { "seed", seed }, - { "fold", n_fold}, - }; - tasks.push_back(task); - } - } - } - // Shuffle the array so heavy datasets are spread across the workers - std::mt19937 g{ 271 }; // Use fixed seed to obtain the same shuffle - std::shuffle(tasks.begin(), tasks.end(), g); - std::cout << get_color_rank(rank) << "* Number of tasks: " << tasks.size() << std::endl; - std::cout << "|"; - for (int i = 0; i < tasks.size(); ++i) { - std::cout << (i + 1) % 10; - } - std::cout << "|" << std::endl << "|" << std::flush; - return tasks; - } - void process_task_mpi_consumer(struct ConfigGrid& config, struct ConfigMPI& config_mpi, json& tasks, int n_task, Datasets& datasets, Task_Result* result) - { - // initialize - Timer timer; - timer.start(); - json task = tasks[n_task]; - auto model = config.model; - auto grid = GridData(Paths::grid_input(model)); - auto dataset = task["dataset"].get(); - auto idx_dataset = task["idx_dataset"].get(); - auto seed = task["seed"].get(); - auto n_fold = task["fold"].get(); - bool stratified = config.stratified; - // Generate the hyperparamters combinations - auto combinations = grid.getGrid(dataset); - auto [X, y] = datasets.getTensors(dataset); - auto states = datasets.getStates(dataset); - auto features = datasets.getFeatures(dataset); - auto className = datasets.getClassName(dataset); - // - // Start working on task - // - Fold* fold; - if (stratified) - fold = new StratifiedKFold(config.n_folds, y, seed); - else - fold = new KFold(config.n_folds, y.size(0), seed); - auto [train, test] = fold->getFold(n_fold); - auto train_t = torch::tensor(train); - auto test_t = torch::tensor(test); - auto X_train = X.index({ "...", train_t }); - auto y_train = y.index({ train_t }); - auto X_test = X.index({ "...", test_t }); - auto y_test = y.index({ test_t }); - double best_fold_score = 0.0; - int best_idx_combination = -1; - json best_fold_hyper; - for (int idx_combination = 0; idx_combination < combinations.size(); ++idx_combination) { - auto hyperparam_line = combinations[idx_combination]; - auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line); - Fold* nested_fold; - if (config.stratified) - nested_fold = new StratifiedKFold(config.nested, y_train, seed); - else - nested_fold = new KFold(config.nested, y_train.size(0), seed); - double score = 0.0; - for (int n_nested_fold = 0; n_nested_fold < config.nested; n_nested_fold++) { - // Nested level fold - auto [train_nested, test_nested] = nested_fold->getFold(n_nested_fold); - auto train_nested_t = torch::tensor(train_nested); - auto test_nested_t = torch::tensor(test_nested); - auto X_nested_train = X_train.index({ "...", train_nested_t }); - auto y_nested_train = y_train.index({ train_nested_t }); - auto X_nested_test = X_train.index({ "...", test_nested_t }); - auto y_nested_test = y_train.index({ test_nested_t }); - // Build Classifier with selected hyperparameters - auto clf = Models::instance()->create(config.model); - auto valid = clf->getValidHyperparameters(); - hyperparameters.check(valid, dataset); - clf->setHyperparameters(hyperparameters.get(dataset)); - // Train model - clf->fit(X_nested_train, y_nested_train, features, className, states); - // Test model - score += clf->score(X_nested_test, y_nested_test); - } - delete nested_fold; - score /= config.nested; - if (score > best_fold_score) { - best_fold_score = score; - best_idx_combination = idx_combination; - best_fold_hyper = hyperparam_line; - } - } - delete fold; - // Build Classifier with the best hyperparameters to obtain the best score - auto hyperparameters = platform::HyperParameters(datasets.getNames(), best_fold_hyper); - auto clf = Models::instance()->create(config.model); - auto valid = clf->getValidHyperparameters(); - hyperparameters.check(valid, dataset); - clf->setHyperparameters(best_fold_hyper); - clf->fit(X_train, y_train, features, className, states); - best_fold_score = clf->score(X_test, y_test); - // Return the result - result->idx_dataset = task["idx_dataset"].get(); - result->idx_combination = best_idx_combination; - result->score = best_fold_score; - result->n_fold = n_fold; - result->time = timer.getDuration(); - // Update progress bar - std::cout << get_color_rank(config_mpi.rank) << "*" << std::flush; - } - json store_result(std::vector& names, Task_Result& result, json& results) - { - json json_result = { - { "score", result.score }, - { "combination", result.idx_combination }, - { "fold", result.n_fold }, - { "time", result.time }, - { "dataset", result.idx_dataset } - }; - auto name = names[result.idx_dataset]; - if (!results.contains(name)) { - results[name] = json::array(); - } - results[name].push_back(json_result); - return results; - } - json producer(std::vector& names, json& tasks, struct ConfigMPI& config_mpi, MPI_Datatype& MPI_Result) - { - Task_Result result; - json results; - int num_tasks = tasks.size(); - - // - // 2a.1 Producer will loop to send all the tasks to the consumers and receive the results - // - for (int i = 0; i < num_tasks; ++i) { - MPI_Status status; - MPI_Recv(&result, 1, MPI_Result, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); - if (status.MPI_TAG == TAG_RESULT) { - //Store result - store_result(names, result, results); - } - MPI_Send(&i, 1, MPI_INT, status.MPI_SOURCE, TAG_TASK, MPI_COMM_WORLD); - } - // - // 2a.2 Producer will send the end message to all the consumers - // - for (int i = 0; i < config_mpi.n_procs - 1; ++i) { - MPI_Status status; - MPI_Recv(&result, 1, MPI_Result, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); - if (status.MPI_TAG == TAG_RESULT) { - //Store result - store_result(names, result, results); - } - MPI_Send(&i, 1, MPI_INT, status.MPI_SOURCE, TAG_END, MPI_COMM_WORLD); - } - return results; - } - void select_best_results_folds(json& results, json& all_results, std::string& model) - { - Timer timer; - auto grid = GridData(Paths::grid_input(model)); - // - // Select the best result of the computed outer folds - // - for (const auto& result : all_results.items()) { - // each result has the results of all the outer folds as each one were a different task - double best_score = 0.0; - json best; - for (const auto& result_fold : result.value()) { - double score = result_fold["score"].get(); - if (score > best_score) { - best_score = score; - best = result_fold; - } - } - auto dataset = result.key(); - auto combinations = grid.getGrid(dataset); - json json_best = { - { "score", best_score }, - { "hyperparameters", combinations[best["combination"].get()] }, - { "date", get_date() + " " + get_time() }, - { "grid", grid.getInputGrid(dataset) }, - { "duration", timer.translate2String(best["time"].get()) } - }; - results[dataset] = json_best; - } - } - void consumer(Datasets& datasets, json& tasks, struct ConfigGrid& config, struct ConfigMPI& config_mpi, MPI_Datatype& MPI_Result) - { - Task_Result result; - // - // 2b.1 Consumers announce to the producer that they are ready to receive a task - // - MPI_Send(&result, 1, MPI_Result, config_mpi.manager, TAG_QUERY, MPI_COMM_WORLD); - int task; - while (true) { - MPI_Status status; - // - // 2b.2 Consumers receive the task from the producer and process it - // - MPI_Recv(&task, 1, MPI_INT, config_mpi.manager, MPI_ANY_TAG, MPI_COMM_WORLD, &status); - if (status.MPI_TAG == TAG_END) { - break; - } - process_task_mpi_consumer(config, config_mpi, tasks, task, datasets, &result); - // - // 2b.3 Consumers send the result to the producer - // - MPI_Send(&result, 1, MPI_Result, config_mpi.manager, TAG_RESULT, MPI_COMM_WORLD); - } - } - void GridSearch::go(struct ConfigMPI& config_mpi) - { - /* - * Each task is a json object with the following structure: - * { - * "dataset": "dataset_name", - * "idx_dataset": idx_dataset, // used to identify the dataset in the results - * // this index is relative to the used datasets in the actual run not to the whole datasets - * "seed": # of seed to use, - * "Fold": # of fold to process - * } - * - * The overall process consists in these steps: - * 0. Create the MPI result type & tasks - * 0.1 Create the MPI result type - * 0.2 Manager creates the tasks - * 1. Manager will broadcast the tasks to all the processes - * 1.1 Broadcast the number of tasks - * 1.2 Broadcast the length of the following string - * 1.2 Broadcast the tasks as a char* string - * 2a. Producer delivers the tasks to the consumers - * 2a.1 Producer will loop to send all the tasks to the consumers and receive the results - * 2a.2 Producer will send the end message to all the consumers - * 2b. Consumers process the tasks and send the results to the producer - * 2b.1 Consumers announce to the producer that they are ready to receive a task - * 2b.2 Consumers receive the task from the producer and process it - * 2b.3 Consumers send the result to the producer - * 3. Manager select the bests sccores for each dataset - * 3.1 Loop thru all the results obtained from each outer fold (task) and select the best - * 3.2 Save the results - */ - // - // 0.1 Create the MPI result type - // - Task_Result result; - int tasks_size; - MPI_Datatype MPI_Result; - MPI_Datatype type[5] = { MPI_UNSIGNED, MPI_UNSIGNED, MPI_INT, MPI_DOUBLE, MPI_DOUBLE }; - int blocklen[5] = { 1, 1, 1, 1, 1 }; - MPI_Aint disp[5]; - disp[0] = offsetof(Task_Result, idx_dataset); - disp[1] = offsetof(Task_Result, idx_combination); - disp[2] = offsetof(Task_Result, n_fold); - disp[3] = offsetof(Task_Result, score); - disp[4] = offsetof(Task_Result, time); - MPI_Type_create_struct(5, blocklen, disp, type, &MPI_Result); - MPI_Type_commit(&MPI_Result); - // - // 0.2 Manager creates the tasks - // - char* msg; - json tasks; - if (config_mpi.rank == config_mpi.manager) { - timer.start(); - tasks = build_tasks_mpi(config_mpi.rank); - auto tasks_str = tasks.dump(); - tasks_size = tasks_str.size(); - msg = new char[tasks_size + 1]; - strcpy(msg, tasks_str.c_str()); - } - // - // 1. Manager will broadcast the tasks to all the processes - // - MPI_Bcast(&tasks_size, 1, MPI_INT, config_mpi.manager, MPI_COMM_WORLD); - if (config_mpi.rank != config_mpi.manager) { - msg = new char[tasks_size + 1]; - } - MPI_Bcast(msg, tasks_size + 1, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD); - tasks = json::parse(msg); - delete[] msg; - auto datasets = Datasets(config.discretize, Paths::datasets()); - if (config_mpi.rank == config_mpi.manager) { - // - // 2a. Producer delivers the tasks to the consumers - // - auto datasets_names = filterDatasets(datasets); - json all_results = producer(datasets_names, tasks, config_mpi, MPI_Result); - std::cout << get_color_rank(config_mpi.rank) << "|" << std::endl; - // - // 3. Manager select the bests sccores for each dataset - // - auto results = initializeResults(); - select_best_results_folds(results, all_results, config.model); - // - // 3.2 Save the results - // - save(results); - } else { - // - // 2b. Consumers process the tasks and send the results to the producer - // - consumer(datasets, tasks, config, config_mpi, MPI_Result); - } - } - json GridSearch::initializeResults() - { - // Load previous results if continue is set - json results; - if (config.continue_from != NO_CONTINUE()) { - if (!config.quiet) - std::cout << "* Loading previous results" << std::endl; - try { - std::ifstream file(Paths::grid_output(config.model)); - if (file.is_open()) { - results = json::parse(file); - results = results["results"]; - } - } - catch (const std::exception& e) { - std::cerr << "* There were no previous results" << std::endl; - std::cerr << "* Initizalizing new results" << std::endl; - results = json(); - } - } - return results; - } - void GridSearch::save(json& results) - { - std::ofstream file(Paths::grid_output(config.model)); - json output = { - { "model", config.model }, - { "score", config.score }, - { "discretize", config.discretize }, - { "stratified", config.stratified }, - { "n_folds", config.n_folds }, - { "seeds", config.seeds }, - { "date", get_date() + " " + get_time()}, - { "nested", config.nested}, - { "platform", config.platform }, - { "duration", timer.getDurationString(true)}, - { "results", results } - - }; - file << output.dump(4); - } -} /* namespace platform */ \ No newline at end of file diff --git a/src/Platform/GridSearch.h b/src/Platform/GridSearch.h deleted file mode 100644 index ec1b3cb..0000000 --- a/src/Platform/GridSearch.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef GRIDSEARCH_H -#define GRIDSEARCH_H -#include -#include -#include -#include -#include "Datasets.h" -#include "HyperParameters.h" -#include "GridData.h" -#include "Timer.h" - -namespace platform { - using json = nlohmann::json; - struct ConfigGrid { - std::string model; - std::string score; - std::string continue_from; - std::string platform; - bool quiet; - bool only; // used with continue_from to only compute that dataset - bool discretize; - bool stratified; - int nested; - int n_folds; - json excluded; - std::vector seeds; - }; - struct ConfigMPI { - int rank; - int n_procs; - int manager; - }; - typedef struct { - uint idx_dataset; - uint idx_combination; - int n_fold; - double score; - double time; - } Task_Result; - const int TAG_QUERY = 1; - const int TAG_RESULT = 2; - const int TAG_TASK = 3; - const int TAG_END = 4; - class GridSearch { - public: - explicit GridSearch(struct ConfigGrid& config); - void go(struct ConfigMPI& config_mpi); - ~GridSearch() = default; - json loadResults(); - static inline std::string NO_CONTINUE() { return "NO_CONTINUE"; } - private: - void save(json& results); - json initializeResults(); - std::vector filterDatasets(Datasets& datasets) const; - struct ConfigGrid config; - json build_tasks_mpi(int rank); - Timer timer; // used to measure the time of the whole process - }; -} /* namespace platform */ -#endif /* GRIDSEARCH_H */ \ No newline at end of file diff --git a/src/Platform/HyperParameters.cc b/src/Platform/HyperParameters.cc deleted file mode 100644 index 92d56d7..0000000 --- a/src/Platform/HyperParameters.cc +++ /dev/null @@ -1,55 +0,0 @@ -#include "HyperParameters.h" -#include -#include -#include - -namespace platform { - HyperParameters::HyperParameters(const std::vector& datasets, const json& hyperparameters_) - { - // Initialize all datasets with the given hyperparameters - for (const auto& item : datasets) { - hyperparameters[item] = hyperparameters_; - } - } - // https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/ - std::string join(std::vector const& strings, std::string delim) - { - std::stringstream ss; - std::copy(strings.begin(), strings.end(), - std::ostream_iterator(ss, delim.c_str())); - return ss.str(); - } - HyperParameters::HyperParameters(const std::vector& datasets, const std::string& hyperparameters_file) - { - // Check if file exists - std::ifstream file(hyperparameters_file); - if (!file.is_open()) { - throw std::runtime_error("File " + hyperparameters_file + " not found"); - } - // Check if file is a json - json input_hyperparameters = json::parse(file); - // Check if hyperparameters are valid - for (const auto& dataset : datasets) { - if (!input_hyperparameters.contains(dataset)) { - std::cerr << "*Warning: Dataset " << dataset << " not found in hyperparameters file" << " assuming default hyperparameters" << std::endl; - hyperparameters[dataset] = json({}); - continue; - } - hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get(); - } - } - void HyperParameters::check(const std::vector& valid, const std::string& fileName) - { - json result = hyperparameters.at(fileName); - for (const auto& item : result.items()) { - if (find(valid.begin(), valid.end(), item.key()) == valid.end()) { - throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid. Passed Hyperparameters are: " - + result.dump(4) + "\n Valid hyperparameters are: {" + join(valid, ",") + "}"); - } - } - } - json HyperParameters::get(const std::string& fileName) - { - return hyperparameters.at(fileName); - } -} /* namespace platform */ \ No newline at end of file diff --git a/src/Platform/HyperParameters.h b/src/Platform/HyperParameters.h deleted file mode 100644 index 3628fb8..0000000 --- a/src/Platform/HyperParameters.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef HYPERPARAMETERS_H -#define HYPERPARAMETERS_H -#include -#include -#include -#include - -namespace platform { - using json = nlohmann::json; - class HyperParameters { - public: - HyperParameters() = default; - explicit HyperParameters(const std::vector& datasets, const json& hyperparameters_); - explicit HyperParameters(const std::vector& datasets, const std::string& hyperparameters_file); - ~HyperParameters() = default; - bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); } - void check(const std::vector& valid, const std::string& fileName); - json get(const std::string& fileName); - private: - std::map hyperparameters; - }; -} /* namespace platform */ -#endif /* HYPERPARAMETERS_H */ \ No newline at end of file diff --git a/src/Platform/ManageResults.cc b/src/Platform/ManageResults.cc deleted file mode 100644 index dc03979..0000000 --- a/src/Platform/ManageResults.cc +++ /dev/null @@ -1,213 +0,0 @@ -#include "ManageResults.h" -#include "CommandParser.h" -#include -#include -#include "Colors.h" -#include "CLocale.h" -#include "Paths.h" -#include "ReportConsole.h" -#include "ReportExcel.h" - -namespace platform { - - ManageResults::ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare) : - numFiles{ numFiles }, complete{ complete }, partial{ partial }, compare{ compare }, results(Results(Paths::results(), model, score, complete, partial)) - { - indexList = true; - openExcel = false; - workbook = NULL; - if (numFiles == 0) { - this->numFiles = results.size(); - } - } - void ManageResults::doMenu() - { - if (results.empty()) { - std::cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl; - return; - } - results.sortDate(); - list(); - menu(); - if (openExcel) { - workbook_close(workbook); - } - std::cout << Colors::RESET() << "Done!" << std::endl; - } - void ManageResults::list() - { - auto temp = ConfigLocale(); - std::string suffix = numFiles != results.size() ? " of " + std::to_string(results.size()) : ""; - std::stringstream oss; - oss << "Results on screen: " << numFiles << suffix; - std::cout << Colors::GREEN() << oss.str() << std::endl; - std::cout << std::string(oss.str().size(), '-') << std::endl; - if (complete) { - std::cout << Colors::MAGENTA() << "Only listing complete results" << std::endl; - } - if (partial) { - std::cout << Colors::MAGENTA() << "Only listing partial results" << std::endl; - } - auto i = 0; - int maxModel = results.maxModelSize(); - std::cout << Colors::GREEN() << " # Date " << std::setw(maxModel) << std::left << "Model" << " Score Name Score C/P Duration Title" << std::endl; - std::cout << "=== ========== " << std::string(maxModel, '=') << " =========== =========== === ========= =============================================================" << std::endl; - bool odd = true; - for (auto& result : results) { - auto color = odd ? Colors::BLUE() : Colors::CYAN(); - std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " "; - std::cout << result.to_string(maxModel) << std::endl; - if (i == numFiles) { - break; - } - odd = !odd; - } - } - bool ManageResults::confirmAction(const std::string& intent, const std::string& fileName) const - { - std::string color; - if (intent == "delete") { - color = Colors::RED(); - } else { - color = Colors::YELLOW(); - } - std::string line; - bool finished = false; - while (!finished) { - std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): "; - getline(std::cin, line); - finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n')); - } - if (tolower(line[0]) == 'y') { - return true; - } - std::cout << "Not done!" << std::endl; - return false; - } - void ManageResults::report(const int index, const bool excelReport) - { - std::cout << Colors::YELLOW() << "Reporting " << results.at(index).getFilename() << std::endl; - auto data = results.at(index).load(); - if (excelReport) { - ReportExcel reporter(data, compare, workbook); - reporter.show(); - openExcel = true; - workbook = reporter.getWorkbook(); - std::cout << "Adding sheet to " << Paths::excel() + Paths::excelResults() << std::endl; - } else { - ReportConsole reporter(data, compare); - reporter.show(); - } - } - void ManageResults::showIndex(const int index, const int idx) - { - // Show a dataset result inside a report - auto data = results.at(index).load(); - std::cout << Colors::YELLOW() << "Showing " << results.at(index).getFilename() << std::endl; - ReportConsole reporter(data, compare, idx); - reporter.show(); - } - void ManageResults::sortList() - { - std::cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', duration='u', model='m'): "; - std::string line; - char option; - getline(std::cin, line); - if (line.size() == 0) - return; - if (line.size() > 1) { - std::cout << "Invalid option" << std::endl; - return; - } - option = line[0]; - switch (option) { - case 'd': - results.sortDate(); - break; - case 's': - results.sortScore(); - break; - case 'u': - results.sortDuration(); - break; - case 'm': - results.sortModel(); - break; - default: - std::cout << "Invalid option" << std::endl; - } - } - void ManageResults::menu() - { - char option; - int index, subIndex; - bool finished = false; - std::string filename; - // tuple - std::vector> mainOptions = { - {"quit", 'q', false}, - {"list", 'l', false}, - {"delete", 'd', true}, - {"hide", 'h', true}, - {"sort", 's', false}, - {"report", 'r', true}, - {"excel", 'e', true} - }; - std::vector> listOptions = { - {"report", 'r', true}, - {"list", 'l', false}, - {"quit", 'q', false} - }; - auto parser = CommandParser(); - while (!finished) { - if (indexList) { - std::tie(option, index) = parser.parse(Colors::GREEN(), mainOptions, 'r', numFiles - 1); - } else { - std::tie(option, subIndex) = parser.parse(Colors::MAGENTA(), listOptions, 'r', results.at(index).load()["results"].size() - 1); - } - switch (option) { - case 'q': - finished = true; - break; - case 'l': - list(); - indexList = true; - break; - case 'd': - filename = results.at(index).getFilename(); - if (!confirmAction("delete", filename)) - break; - std::cout << "Deleting " << filename << std::endl; - results.deleteResult(index); - std::cout << "File: " + filename + " deleted!" << std::endl; - list(); - break; - case 'h': - filename = results.at(index).getFilename(); - if (!confirmAction("hide", filename)) - break; - filename = results.at(index).getFilename(); - std::cout << "Hiding " << filename << std::endl; - results.hideResult(index, Paths::hiddenResults()); - std::cout << "File: " + filename + " hidden! (moved to " << Paths::hiddenResults() << ")" << std::endl; - list(); - break; - case 's': - sortList(); - list(); - break; - case 'r': - if (indexList) { - report(index, false); - indexList = false; - } else { - showIndex(index, subIndex); - } - break; - case 'e': - report(index, true); - break; - } - } - } -} /* namespace platform */ diff --git a/src/Platform/ManageResults.h b/src/Platform/ManageResults.h deleted file mode 100644 index 6bd3704..0000000 --- a/src/Platform/ManageResults.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef MANAGE_RESULTS_H -#define MANAGE_RESULTS_H -#include "Results.h" -#include "xlsxwriter.h" - -namespace platform { - class ManageResults { - public: - ManageResults(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare); - ~ManageResults() = default; - void doMenu(); - private: - void list(); - bool confirmAction(const std::string& intent, const std::string& fileName) const; - void report(const int index, const bool excelReport); - void showIndex(const int index, const int idx); - void sortList(); - void menu(); - int numFiles; - bool indexList; - bool openExcel; - bool complete; - bool partial; - bool compare; - Results results; - lxw_workbook* workbook; - }; - -} - -#endif /* MANAGE_RESULTS_H */ \ No newline at end of file diff --git a/src/Platform/Models.cc b/src/Platform/Models.cc deleted file mode 100644 index 2791f1a..0000000 --- a/src/Platform/Models.cc +++ /dev/null @@ -1,52 +0,0 @@ -#include "Models.h" -namespace platform { - // Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory - Models* Models::factory = nullptr;; - Models* Models::instance() - { - //manages singleton - if (factory == nullptr) - factory = new Models(); - return factory; - } - void Models::registerFactoryFunction(const std::string& name, - function classFactoryFunction) - { - // register the class factory function - functionRegistry[name] = classFactoryFunction; - } - shared_ptr Models::create(const std::string& name) - { - bayesnet::BaseClassifier* instance = nullptr; - - // find name in the registry and call factory method. - auto it = functionRegistry.find(name); - if (it != functionRegistry.end()) - instance = it->second(); - // wrap instance in a shared ptr and return - if (instance != nullptr) - return unique_ptr(instance); - else - return nullptr; - } - std::vector Models::getNames() - { - std::vector names; - transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names), - [](const pair>& pair) { return pair.first; }); - return names; - } - std::string Models::tostring() - { - std::string result = ""; - for (const auto& pair : functionRegistry) { - result += pair.first + ", "; - } - return "{" + result.substr(0, result.size() - 2) + "}"; - } - Registrar::Registrar(const std::string& name, function classFactoryFunction) - { - // register the class factory function - Models::instance()->registerFactoryFunction(name, classFactoryFunction); - } -} \ No newline at end of file diff --git a/src/Platform/Models.h b/src/Platform/Models.h deleted file mode 100644 index a985c32..0000000 --- a/src/Platform/Models.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef MODELS_H -#define MODELS_H -#include -#include "BaseClassifier.h" -#include "AODE.h" -#include "TAN.h" -#include "KDB.h" -#include "SPODE.h" -#include "TANLd.h" -#include "KDBLd.h" -#include "SPODELd.h" -#include "AODELd.h" -#include "BoostAODE.h" -#include "STree.h" -#include "ODTE.h" -#include "SVC.h" -#include "RandomForest.h" -namespace platform { - class Models { - private: - map> functionRegistry; - static Models* factory; //singleton - Models() {}; - public: - Models(Models&) = delete; - void operator=(const Models&) = delete; - // Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory - static Models* instance(); - shared_ptr create(const std::string& name); - void registerFactoryFunction(const std::string& name, - function classFactoryFunction); - std::vector getNames(); - std::string tostring(); - - }; - class Registrar { - public: - Registrar(const std::string& className, function classFactoryFunction); - }; -} -#endif \ No newline at end of file diff --git a/src/Platform/Paths.h b/src/Platform/Paths.h deleted file mode 100644 index 6fd61cf..0000000 --- a/src/Platform/Paths.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef PATHS_H -#define PATHS_H -#include -#include -#include "DotEnv.h" -namespace platform { - class Paths { - public: - static std::string results() { return "results/"; } - static std::string hiddenResults() { return "hidden_results/"; } - static std::string excel() { return "excel/"; } - static std::string grid() { return "grid/"; } - static std::string datasets() - { - auto env = platform::DotEnv(); - return env.get("source_data"); - } - static void createPath(const std::string& path) - { - // Create directory if it does not exist - try { - std::filesystem::create_directory(path); - } - catch (std::exception& e) { - throw std::runtime_error("Could not create directory " + path); - } - } - static std::string excelResults() { return "some_results.xlsx"; } - static std::string grid_input(const std::string& model) - { - return grid() + "grid_" + model + "_input.json"; - } - static std::string grid_output(const std::string& model) - { - return grid() + "grid_" + model + "_output.json"; - } - }; -} -#endif \ No newline at end of file diff --git a/src/Platform/ReportBase.cc b/src/Platform/ReportBase.cc deleted file mode 100644 index 49e6617..0000000 --- a/src/Platform/ReportBase.cc +++ /dev/null @@ -1,113 +0,0 @@ -#include -#include -#include "Datasets.h" -#include "ReportBase.h" -#include "DotEnv.h" - -namespace platform { - ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1) - { - std::stringstream oss; - oss << "Better than ZeroR + " << std::setprecision(1) << fixed << margin * 100 << "%"; - meaning = { - {Symbols::equal_best, "Equal to best"}, - {Symbols::better_best, "Better than best"}, - {Symbols::cross, "Less than or equal to ZeroR"}, - {Symbols::upward_arrow, oss.str()} - }; - } - std::string ReportBase::fromVector(const std::string& key) - { - std::stringstream oss; - std::string sep = ""; - oss << "["; - for (auto& item : data[key]) { - oss << sep << item.get(); - sep = ", "; - } - oss << "]"; - return oss.str(); - } - std::string ReportBase::fVector(const std::string& title, const json& data, const int width, const int precision) - { - std::stringstream oss; - std::string sep = ""; - oss << title << "["; - for (const auto& item : data) { - oss << sep << fixed << setw(width) << std::setprecision(precision) << item.get(); - sep = ", "; - } - oss << "]"; - return oss.str(); - } - void ReportBase::show() - { - header(); - body(); - } - std::string ReportBase::compareResult(const std::string& dataset, double result) - { - std::string status = " "; - if (compare) { - double best = bestResult(dataset, data["model"].get()); - if (result == best) { - status = Symbols::equal_best; - } else if (result > best) { - status = Symbols::better_best; - } - } else { - if (data["score_name"].get() == "accuracy") { - auto dt = Datasets(false, Paths::datasets()); - dt.loadDataset(dataset); - auto numClasses = dt.getNClasses(dataset); - if (numClasses == 2) { - std::vector distribution = dt.getClassesCounts(dataset); - double nSamples = dt.getNSamples(dataset); - std::vector::iterator maxValue = max_element(distribution.begin(), distribution.end()); - double mark = *maxValue / nSamples * (1 + margin); - if (mark > 1) { - mark = 0.9995; - } - status = result < mark ? Symbols::cross : result > mark ? Symbols::upward_arrow : "="; - } - } - } - if (status != " ") { - auto item = summary.find(status); - if (item != summary.end()) { - summary[status]++; - } else { - summary[status] = 1; - } - } - return status; - } - double ReportBase::bestResult(const std::string& dataset, const std::string& model) - { - double value = 0.0; - if (bestResults.size() == 0) { - // try to load the best results - std::string score = data["score_name"]; - replace(score.begin(), score.end(), '_', '-'); - std::string fileName = "best_results_" + score + "_" + model + ".json"; - ifstream resultData(Paths::results() + "/" + fileName); - if (resultData.is_open()) { - bestResults = json::parse(resultData); - } else { - existBestFile = false; - } - } - try { - value = bestResults.at(dataset).at(0); - } - catch (exception) { - value = 1.0; - - } - return value; - } - bool ReportBase::getExistBestFile() - { - return existBestFile; - } -} \ No newline at end of file diff --git a/src/Platform/ReportBase.h b/src/Platform/ReportBase.h deleted file mode 100644 index 35cde6b..0000000 --- a/src/Platform/ReportBase.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef REPORTBASE_H -#define REPORTBASE_H -#include -#include -#include "Paths.h" -#include "Symbols.h" -#include - -using json = nlohmann::json; -namespace platform { - - class ReportBase { - public: - explicit ReportBase(json data_, bool compare); - virtual ~ReportBase() = default; - void show(); - protected: - json data; - std::string fromVector(const std::string& key); - std::string fVector(const std::string& title, const json& data, const int width, const int precision); - bool getExistBestFile(); - virtual void header() = 0; - virtual void body() = 0; - virtual void showSummary() = 0; - std::string compareResult(const std::string& dataset, double result); - std::map summary; - double margin; - std::map meaning; - bool compare; - private: - double bestResult(const std::string& dataset, const std::string& model); - json bestResults; - bool existBestFile = true; - }; -}; -#endif \ No newline at end of file diff --git a/src/Platform/ReportConsole.cc b/src/Platform/ReportConsole.cc deleted file mode 100644 index 9a1ce0d..0000000 --- a/src/Platform/ReportConsole.cc +++ /dev/null @@ -1,114 +0,0 @@ -#include -#include -#include -#include "ReportConsole.h" -#include "BestScore.h" -#include "CLocale.h" - -namespace platform { - std::string ReportConsole::headerLine(const std::string& text, int utf = 0) - { - int n = MAXL - text.length() - 3; - n = n < 0 ? 0 : n; - return "* " + text + std::string(n + utf, ' ') + "*\n"; - } - - void ReportConsole::header() - { - std::stringstream oss; - std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl; - std::cout << headerLine( - "Report " + data["model"].get() + " ver. " + data["version"].get() - + " with " + std::to_string(data["folds"].get()) + " Folds cross validation and " + std::to_string(data["seeds"].size()) - + " random seeds. " + data["date"].get() + " " + data["time"].get() - ); - std::cout << headerLine(data["title"].get()); - std::cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get() ? "True" : "False")); - oss << "Execution took " << std::setprecision(2) << std::fixed << data["duration"].get() - << " seconds, " << data["duration"].get() / 3600 << " hours, on " << data["platform"].get(); - std::cout << headerLine(oss.str()); - std::cout << headerLine("Score is " + data["score_name"].get()); - std::cout << std::string(MAXL, '*') << std::endl; - std::cout << std::endl; - } - void ReportConsole::body() - { - auto tmp = ConfigLocale(); - int maxHyper = 15; - int maxDataset = 7; - for (const auto& r : data["results"]) { - maxHyper = std::max(maxHyper, (int)r["hyperparameters"].dump().size()); - maxDataset = std::max(maxDataset, (int)r["dataset"].get().size()); - - } - std::cout << Colors::GREEN() << " # " << std::setw(maxDataset) << std::left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << std::endl; - std::cout << "=== " << std::string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << std::string(maxHyper, '=') << std::endl; - json lastResult; - double totalScore = 0.0; - bool odd = true; - int index = 0; - for (const auto& r : data["results"]) { - if (selectedIndex != -1 && index != selectedIndex) { - index++; - continue; - } - auto color = odd ? Colors::CYAN() : Colors::BLUE(); - std::cout << color; - std::cout << std::setw(3) << std::right << index++ << " "; - std::cout << std::setw(maxDataset) << std::left << r["dataset"].get() << " "; - std::cout << std::setw(6) << std::right << r["samples"].get() << " "; - std::cout << std::setw(5) << std::right << r["features"].get() << " "; - std::cout << std::setw(3) << std::right << r["classes"].get() << " "; - std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["nodes"].get() << " "; - std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["leaves"].get() << " "; - std::cout << std::setw(9) << std::setprecision(2) << std::fixed << r["depth"].get() << " "; - std::cout << std::setw(8) << std::right << std::setprecision(6) << std::fixed << r["score"].get() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["score_std"].get(); - const std::string status = compareResult(r["dataset"].get(), r["score"].get()); - std::cout << status; - std::cout << std::setw(12) << std::right << std::setprecision(6) << std::fixed << r["time"].get() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["time_std"].get() << " "; - std::cout << r["hyperparameters"].dump(); - std::cout << std::endl; - std::cout << std::flush; - lastResult = r; - totalScore += r["score"].get(); - odd = !odd; - } - if (data["results"].size() == 1 || selectedIndex != -1) { - std::cout << std::string(MAXL, '*') << std::endl; - std::cout << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12)); - std::cout << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12)); - std::cout << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3)); - std::cout << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3)); - std::cout << std::string(MAXL, '*') << std::endl; - } else { - footer(totalScore); - } - } - void ReportConsole::showSummary() - { - for (const auto& item : summary) { - std::stringstream oss; - oss << std::setw(3) << std::left << item.first; - oss << std::setw(3) << std::right << item.second << " "; - oss << std::left << meaning.at(item.first); - std::cout << headerLine(oss.str(), 2); - } - } - - void ReportConsole::footer(double totalScore) - { - std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl; - showSummary(); - auto score = data["score_name"].get(); - auto best = BestScore::getScore(score); - if (best.first != "") { - std::stringstream oss; - oss << score << " compared to " << best.first << " .: " << totalScore / best.second; - std::cout << headerLine(oss.str()); - } - if (!getExistBestFile() && compare) { - std::cout << headerLine("*** Best Results File not found. Couldn't compare any result!"); - } - std::cout << std::string(MAXL, '*') << std::endl << Colors::RESET(); - } -} \ No newline at end of file diff --git a/src/Platform/ReportConsole.h b/src/Platform/ReportConsole.h deleted file mode 100644 index 8ba3ffe..0000000 --- a/src/Platform/ReportConsole.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef REPORTCONSOLE_H -#define REPORTCONSOLE_H -#include -#include "ReportBase.h" -#include "Colors.h" - -namespace platform { - const int MAXL = 133; - class ReportConsole : public ReportBase { - public: - explicit ReportConsole(json data_, bool compare = false, int index = -1) : ReportBase(data_, compare), selectedIndex(index) {}; - virtual ~ReportConsole() = default; - private: - int selectedIndex; - std::string headerLine(const std::string& text, int utf); - void header() override; - void body() override; - void footer(double totalScore); - void showSummary() override; - }; -}; -#endif \ No newline at end of file diff --git a/src/Platform/ReportExcel.cc b/src/Platform/ReportExcel.cc deleted file mode 100644 index addbf4c..0000000 --- a/src/Platform/ReportExcel.cc +++ /dev/null @@ -1,180 +0,0 @@ -#include -#include -#include "ReportExcel.h" -#include "BestScore.h" - - -namespace platform { - - ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet) : ReportBase(data_, compare), ExcelFile(workbook, worksheet) - { - createFile(); - } - - void ReportExcel::formatColumns() - { - worksheet_freeze_panes(worksheet, 6, 1); - std::vector columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 }; - for (int i = 0; i < columns_sizes.size(); ++i) { - worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); - } - } - void ReportExcel::createWorksheet() - { - const std::string name = data["model"].get(); - std::string suffix = ""; - std::string efectiveName; - int num = 1; - // Create a sheet with the name of the model - while (true) { - efectiveName = name + suffix; - if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) { - suffix = std::to_string(++num); - } else { - worksheet = workbook_add_worksheet(workbook, efectiveName.c_str()); - break; - } - if (num > 100) { - throw std::invalid_argument("Couldn't create sheet " + efectiveName); - } - } - } - - void ReportExcel::createFile() - { - if (workbook == NULL) { - workbook = workbook_new((Paths::excel() + Paths::excelResults()).c_str()); - } - if (worksheet == NULL) { - createWorksheet(); - } - setProperties(data["title"].get()); - createFormats(); - formatColumns(); - } - - void ReportExcel::closeFile() - { - workbook_close(workbook); - } - - void ReportExcel::header() - { - std::locale mylocale(std::cout.getloc(), new separated); - std::locale::global(mylocale); - std::cout.imbue(mylocale); - std::stringstream oss; - std::string message = data["model"].get() + " ver. " + data["version"].get() + " " + - data["language"].get() + " ver. " + data["language_version"].get() + - " with " + std::to_string(data["folds"].get()) + " Folds cross validation and " + std::to_string(data["seeds"].size()) + - " random seeds. " + data["date"].get() + " " + data["time"].get(); - worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]); - worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get().c_str(), styles["headerRest"]); - worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get()).c_str(), styles["headerRest"]); - worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]); - oss << std::setprecision(2) << std::fixed << data["duration"].get() << " s"; - worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]); - oss.str(""); - oss.clear(); - oss << std::setprecision(2) << std::fixed << data["duration"].get() / 3600 << " h"; - worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]); - worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]); - worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get().c_str(), styles["headerRest"]); - worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]); - oss.str(""); - oss.clear(); - oss << "Stratified: " << (data["stratified"].get() ? "True" : "False"); - worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]); - oss.str(""); - oss.clear(); - oss << "Discretized: " << (data["discretized"].get() ? "True" : "False"); - worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]); - } - - void ReportExcel::body() - { - auto head = std::vector( - { "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time", - "Time Std.", "Hyperparameters" }); - int col = 0; - for (const auto& item : head) { - writeString(5, col++, item, "bodyHeader"); - } - row = 6; - col = 0; - int hypSize = 22; - json lastResult; - double totalScore = 0.0; - std::string hyperparameters; - for (const auto& r : data["results"]) { - writeString(row, col, r["dataset"].get(), "text"); - writeInt(row, col + 1, r["samples"].get(), "ints"); - writeInt(row, col + 2, r["features"].get(), "ints"); - writeInt(row, col + 3, r["classes"].get(), "ints"); - writeDouble(row, col + 4, r["nodes"].get(), "floats"); - writeDouble(row, col + 5, r["leaves"].get(), "floats"); - writeDouble(row, col + 6, r["depth"].get(), "floats"); - writeDouble(row, col + 7, r["score"].get(), "result"); - writeDouble(row, col + 8, r["score_std"].get(), "result"); - const std::string status = compareResult(r["dataset"].get(), r["score"].get()); - writeString(row, col + 9, status, "textCentered"); - writeDouble(row, col + 10, r["time"].get(), "time"); - writeDouble(row, col + 11, r["time_std"].get(), "time"); - hyperparameters = r["hyperparameters"].dump(); - if (hyperparameters.size() > hypSize) { - hypSize = hyperparameters.size(); - } - writeString(row, col + 12, hyperparameters, "text"); - lastResult = r; - totalScore += r["score"].get(); - row++; - } - // Set the right column width of hyperparameters with the maximum length - worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL); - // Show totals if only one dataset is present in the result - if (data["results"].size() == 1) { - for (const std::string& group : { "scores_train", "scores_test", "times_train", "times_test" }) { - row++; - col = 1; - writeString(row, col, group, "text"); - for (double item : lastResult[group]) { - std::string style = group.find("scores") != std::string::npos ? "result" : "time"; - writeDouble(row, ++col, item, style); - } - } - // Set with of columns to show those totals completely - worksheet_set_column(worksheet, 1, 1, 12, NULL); - for (int i = 2; i < 7; ++i) { - // doesn't work with from col to col, so... - worksheet_set_column(worksheet, i, i, 15, NULL); - } - } else { - footer(totalScore, row); - } - } - - void ReportExcel::showSummary() - { - for (const auto& item : summary) { - worksheet_write_string(worksheet, row + 2, 1, item.first.c_str(), styles["summaryStyle"]); - worksheet_write_number(worksheet, row + 2, 2, item.second, styles["summaryStyle"]); - worksheet_merge_range(worksheet, row + 2, 3, row + 2, 5, meaning.at(item.first).c_str(), styles["summaryStyle"]); - row += 1; - } - } - - void ReportExcel::footer(double totalScore, int row) - { - showSummary(); - row += 4 + summary.size(); - auto score = data["score_name"].get(); - auto best = BestScore::getScore(score); - if (best.first != "") { - worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + best.first + " .:").c_str(), efectiveStyle("text")); - writeDouble(row, 6, totalScore / best.second, "result"); - } - if (!getExistBestFile() && compare) { - worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]); - } - } -} \ No newline at end of file diff --git a/src/Platform/ReportExcel.h b/src/Platform/ReportExcel.h deleted file mode 100644 index b7fda10..0000000 --- a/src/Platform/ReportExcel.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef REPORTEXCEL_H -#define REPORTEXCEL_H -#include -#include "xlsxwriter.h" -#include "ReportBase.h" -#include "ExcelFile.h" -#include "Colors.h" -namespace platform { - class ReportExcel : public ReportBase, public ExcelFile { - public: - explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL); - private: - void formatColumns(); - void createFile(); - void createWorksheet(); - void closeFile(); - void header() override; - void body() override; - void showSummary() override; - void footer(double totalScore, int row); - - }; -}; -#endif // !REPORTEXCEL_H \ No newline at end of file diff --git a/src/Platform/Result.cc b/src/Platform/Result.cc deleted file mode 100644 index 43c33d1..0000000 --- a/src/Platform/Result.cc +++ /dev/null @@ -1,58 +0,0 @@ -#include "Result.h" -#include "BestScore.h" -#include -#include -#include -#include "Colors.h" -#include "DotEnv.h" -#include "CLocale.h" - -namespace platform { - Result::Result(const std::string& path, const std::string& filename) - : path(path) - , filename(filename) - { - auto data = load(); - date = data["date"]; - score = 0; - for (const auto& result : data["results"]) { - score += result["score"].get(); - } - scoreName = data["score_name"]; - auto best = BestScore::getScore(scoreName); - if (best.first != "") { - score /= best.second; - } - title = data["title"]; - duration = data["duration"]; - model = data["model"]; - complete = data["results"].size() > 1; - } - - json Result::load() const - { - std::ifstream resultData(path + "/" + filename); - if (resultData.is_open()) { - json data = json::parse(resultData); - return data; - } - throw std::invalid_argument("Unable to open result file. [" + path + "/" + filename + "]"); - } - - std::string Result::to_string(int maxModel) const - { - auto tmp = ConfigLocale(); - std::stringstream oss; - double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration; - std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s"; - oss << date << " "; - oss << std::setw(maxModel) << std::left << model << " "; - oss << std::setw(11) << std::left << scoreName << " "; - oss << std::right << std::setw(11) << std::setprecision(7) << std::fixed << score << " "; - auto completeString = isComplete() ? "C" : "P"; - oss << std::setw(1) << " " << completeString << " "; - oss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " "; - oss << std::setw(50) << std::left << title << " "; - return oss.str(); - } -} \ No newline at end of file diff --git a/src/Platform/Result.h b/src/Platform/Result.h deleted file mode 100644 index 10459b7..0000000 --- a/src/Platform/Result.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef RESULT_H -#define RESULT_H -#include -#include -#include -#include -namespace platform { - using json = nlohmann::json; - - class Result { - public: - Result(const std::string& path, const std::string& filename); - json load() const; - std::string to_string(int maxModel) const; - std::string getFilename() const { return filename; }; - std::string getDate() const { return date; }; - double getScore() const { return score; }; - std::string getTitle() const { return title; }; - double getDuration() const { return duration; }; - std::string getModel() const { return model; }; - std::string getScoreName() const { return scoreName; }; - bool isComplete() const { return complete; }; - private: - std::string path; - std::string filename; - std::string date; - double score; - std::string title; - double duration; - std::string model; - std::string scoreName; - bool complete; - }; -}; -#endif \ No newline at end of file diff --git a/src/Platform/Results.cc b/src/Platform/Results.cc deleted file mode 100644 index 4f6184f..0000000 --- a/src/Platform/Results.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include "Results.h" -#include - -namespace platform { - Results::Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial) : - path(path), model(model), scoreName(score), complete(complete), partial(partial) - { - load(); - if (!files.empty()) { - maxModel = (*max_element(files.begin(), files.end(), [](const Result& a, const Result& b) { return a.getModel().size() < b.getModel().size(); })).getModel().size(); - } else { - maxModel = 0; - } - }; - void Results::load() - { - using std::filesystem::directory_iterator; - for (const auto& file : directory_iterator(path)) { - auto filename = file.path().filename().string(); - if (filename.find(".json") != std::string::npos && filename.find("results_") == 0) { - auto result = Result(path, filename); - bool addResult = true; - if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete()) - addResult = false; - if (addResult) - files.push_back(result); - } - } - } - void Results::hideResult(int index, const std::string& pathHidden) - { - auto filename = files.at(index).getFilename(); - rename((path + "/" + filename).c_str(), (pathHidden + "/" + filename).c_str()); - files.erase(files.begin() + index); - } - void Results::deleteResult(int index) - { - auto filename = files.at(index).getFilename(); - remove((path + "/" + filename).c_str()); - files.erase(files.begin() + index); - } - int Results::size() const - { - return files.size(); - } - void Results::sortDate() - { - sort(files.begin(), files.end(), [](const Result& a, const Result& b) { - return a.getDate() > b.getDate(); - }); - } - void Results::sortModel() - { - sort(files.begin(), files.end(), [](const Result& a, const Result& b) { - return a.getModel() > b.getModel(); - }); - } - void Results::sortDuration() - { - sort(files.begin(), files.end(), [](const Result& a, const Result& b) { - return a.getDuration() > b.getDuration(); - }); - } - void Results::sortScore() - { - sort(files.begin(), files.end(), [](const Result& a, const Result& b) { - return a.getScore() > b.getScore(); - }); - } - bool Results::empty() const - { - return files.empty(); - } -} \ No newline at end of file diff --git a/src/Platform/Results.h b/src/Platform/Results.h deleted file mode 100644 index 9f9023f..0000000 --- a/src/Platform/Results.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef RESULTS_H -#define RESULTS_H -#include -#include -#include -#include -#include "Result.h" -namespace platform { - using json = nlohmann::json; - class Results { - public: - Results(const std::string& path, const std::string& model, const std::string& score, bool complete, bool partial); - void sortDate(); - void sortScore(); - void sortModel(); - void sortDuration(); - int maxModelSize() const { return maxModel; }; - void hideResult(int index, const std::string& pathHidden); - void deleteResult(int index); - int size() const; - bool empty() const; - std::vector::iterator begin() { return files.begin(); }; - std::vector::iterator end() { return files.end(); }; - Result& at(int index) { return files.at(index); }; - private: - std::string path; - std::string model; - std::string scoreName; - bool complete; - bool partial; - int maxModel; - std::vector files; - void load(); // Loads the list of results - }; -}; -#endif \ No newline at end of file diff --git a/src/Platform/Statistics.cc b/src/Platform/Statistics.cc deleted file mode 100644 index 2d58c36..0000000 --- a/src/Platform/Statistics.cc +++ /dev/null @@ -1,252 +0,0 @@ -#include -#include "Statistics.h" -#include "Colors.h" -#include "Symbols.h" -#include -#include -#include "CLocale.h" - - -namespace platform { - - Statistics::Statistics(const std::vector& models, const std::vector& datasets, const json& data, double significance, bool output) : - models(models), datasets(datasets), data(data), significance(significance), output(output) - { - nModels = models.size(); - nDatasets = datasets.size(); - auto temp = ConfigLocale(); - }; - - void Statistics::fit() - { - if (nModels < 3 || nDatasets < 3) { - std::cerr << "nModels: " << nModels << std::endl; - std::cerr << "nDatasets: " << nDatasets << std::endl; - throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); - } - ranksModels.clear(); - computeRanks(); - // Set the control model as the one with the lowest average rank - controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); - computeWTL(); - maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); - maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); - fitted = true; - } - std::map assignRanks(std::vector>& ranksOrder) - { - // sort the ranksOrder std::vector by value - std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair& a, const std::pair& b) { - return a.second > b.second; - }); - //Assign ranks to values and if they are the same they share the same averaged rank - std::map ranks; - for (int i = 0; i < ranksOrder.size(); i++) { - ranks[ranksOrder[i].first] = i + 1.0; - } - int i = 0; - while (i < static_cast(ranksOrder.size())) { - int j = i + 1; - int sumRanks = ranks[ranksOrder[i].first]; - while (j < static_cast(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) { - sumRanks += ranks[ranksOrder[j++].first]; - } - if (j > i + 1) { - float averageRank = (float)sumRanks / (j - i); - for (int k = i; k < j; k++) { - ranks[ranksOrder[k].first] = averageRank; - } - } - i = j; - } - return ranks; - } - void Statistics::computeRanks() - { - std::map ranksLine; - for (const auto& dataset : datasets) { - std::vector> ranksOrder; - for (const auto& model : models) { - double value = data[model].at(dataset).at(0).get(); - ranksOrder.push_back({ model, value }); - } - // Assign the ranks - ranksLine = assignRanks(ranksOrder); - // Store the ranks of the dataset - ranksModels[dataset] = ranksLine; - if (ranks.size() == 0) { - ranks = ranksLine; - } else { - for (const auto& rank : ranksLine) { - ranks[rank.first] += rank.second; - } - } - } - // Average the ranks - for (const auto& rank : ranks) { - ranks[rank.first] /= nDatasets; - } - } - void Statistics::computeWTL() - { - // Compute the WTL matrix - for (int i = 0; i < nModels; ++i) { - wtl[i] = { 0, 0, 0 }; - } - json origin = data.begin().value(); - for (auto const& item : origin.items()) { - auto controlModel = models.at(controlIdx); - double controlValue = data[controlModel].at(item.key()).at(0).get(); - for (int i = 0; i < nModels; ++i) { - if (i == controlIdx) { - continue; - } - double value = data[models[i]].at(item.key()).at(0).get(); - if (value < controlValue) { - wtl[i].win++; - } else if (value == controlValue) { - wtl[i].tie++; - } else { - wtl[i].loss++; - } - } - } - } - - void Statistics::postHocHolmTest(bool friedmanResult) - { - if (!fitted) { - fit(); - } - std::stringstream oss; - // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8 - // Post-hoc Holm test - // Calculate the p-value for the models paired with the control model - std::map stats; // p-value of each model paired with the control model - boost::math::normal dist(0.0, 1.0); - double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets)); - for (int i = 0; i < nModels; i++) { - if (i == controlIdx) { - stats[i] = 0.0; - continue; - } - double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff; - double p_value = (long double)2 * (1 - cdf(dist, z)); - stats[i] = p_value; - } - // Sort the models by p-value - std::vector> statsOrder; - for (const auto& stat : stats) { - statsOrder.push_back({ stat.first, stat.second }); - } - std::sort(statsOrder.begin(), statsOrder.end(), [](const std::pair& a, const std::pair& b) { - return a.second < b.second; - }); - - // Holm adjustment - for (int i = 0; i < statsOrder.size(); ++i) { - auto item = statsOrder.at(i); - double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second; - double p_value = std::min((double)1.0, item.second * (nModels - i)); - p_value = std::max(before, p_value); - statsOrder[i] = { item.first, p_value }; - } - holmResult.model = models.at(controlIdx); - auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW(); - oss << color; - oss << " *************************************************************************************************************" << std::endl; - oss << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl; - oss << " Control model: " << models.at(controlIdx) << std::endl; - oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl; - oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl; - // sort ranks from lowest to highest - std::vector> ranksOrder; - for (const auto& rank : ranks) { - ranksOrder.push_back({ rank.first, rank.second }); - } - std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair& a, const std::pair& b) { - return a.second < b.second; - }); - // Show the control model info. - oss << " " << Colors::BLUE() << std::left << std::setw(maxModelName) << ranksOrder.at(0).first << " "; - oss << std::setw(12) << " " << std::setprecision(7) << std::fixed << " " << ranksOrder.at(0).second << std::endl; - for (const auto& item : ranksOrder) { - auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first)); - double pvalue = 0.0; - for (const auto& stat : statsOrder) { - if (stat.first == idx) { - pvalue = stat.second; - } - } - holmResult.holmLines.push_back({ item.first, pvalue, item.second, wtl.at(idx), pvalue < significance }); - if (item.first == models.at(controlIdx)) { - continue; - } - auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA(); - auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross; - auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0"; - oss << " " << colorStatus << std::left << std::setw(maxModelName) << item.first << " "; - oss << std::setprecision(6) << std::scientific << pvalue << std::setprecision(7) << std::fixed << " " << item.second; - oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss; - oss << " " << status << textStatus << std::endl; - } - oss << color << " *************************************************************************************************************" << std::endl; - oss << Colors::RESET(); - if (output) { - std::cout << oss.str(); - } - } - bool Statistics::friedmanTest() - { - if (!fitted) { - fit(); - } - std::stringstream oss; - // Friedman test - // Calculate the Friedman statistic - oss << Colors::BLUE() << std::endl; - oss << "***************************************************************************************************************" << std::endl; - oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl; - double degreesOfFreedom = nModels - 1.0; - double sumSquared = 0; - for (const auto& rank : ranks) { - sumSquared += pow(rank.second, 2); - } - // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8 - double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4); - // Calculate the critical value - boost::math::chi_squared chiSquared(degreesOfFreedom); - long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ); - double criticalValue = quantile(chiSquared, 1 - significance); - oss << "Friedman statistic: " << friedmanQ << std::endl; - oss << "Critical χ2 Value for df=" << std::fixed << (int)degreesOfFreedom - << " and alpha=" << std::setprecision(2) << std::fixed << significance << ": " << std::setprecision(7) << std::scientific << criticalValue << std::endl; - oss << "p-value: " << std::scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << std::setprecision(2) << std::fixed << significance << std::endl; - bool result; - if (p_value < significance) { - oss << Colors::GREEN() << "The null hypothesis H0 is rejected." << std::endl; - result = true; - } else { - oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl; - result = false; - } - oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << std::endl; - if (output) { - std::cout << oss.str(); - } - friedmanResult = { friedmanQ, criticalValue, p_value, result }; - return result; - } - FriedmanResult& Statistics::getFriedmanResult() - { - return friedmanResult; - } - HolmResult& Statistics::getHolmResult() - { - return holmResult; - } - std::map>& Statistics::getRanks() - { - return ranksModels; - } -} // namespace platform diff --git a/src/Platform/Statistics.h b/src/Platform/Statistics.h deleted file mode 100644 index aee7409..0000000 --- a/src/Platform/Statistics.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef STATISTICS_H -#define STATISTICS_H -#include -#include -#include -#include - -using json = nlohmann::json; - -namespace platform { - struct WTL { - int win; - int tie; - int loss; - }; - struct FriedmanResult { - double statistic; - double criticalValue; - long double pvalue; - bool reject; - }; - struct HolmLine { - std::string model; - long double pvalue; - double rank; - WTL wtl; - bool reject; - }; - struct HolmResult { - std::string model; - std::vector holmLines; - }; - class Statistics { - public: - Statistics(const std::vector& models, const std::vector& datasets, const json& data, double significance = 0.05, bool output = true); - bool friedmanTest(); - void postHocHolmTest(bool friedmanResult); - FriedmanResult& getFriedmanResult(); - HolmResult& getHolmResult(); - std::map>& getRanks(); - private: - void fit(); - void computeRanks(); - void computeWTL(); - const std::vector& models; - const std::vector& datasets; - const json& data; - double significance; - bool output; - bool fitted = false; - int nModels = 0; - int nDatasets = 0; - int controlIdx = 0; - std::map wtl; - std::map ranks; - int maxModelName = 0; - int maxDatasetName = 0; - FriedmanResult friedmanResult; - HolmResult holmResult; - std::map> ranksModels; - }; -} -#endif // !STATISTICS_H \ No newline at end of file diff --git a/src/Platform/Symbols.h b/src/Platform/Symbols.h deleted file mode 100644 index 5a8c9be..0000000 --- a/src/Platform/Symbols.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef SYMBOLS_H -#define SYMBOLS_H -#include -namespace platform { - class Symbols { - public: - inline static const std::string check_mark{ "\u2714" }; - inline static const std::string exclamation{ "\u2757" }; - inline static const std::string black_star{ "\u2605" }; - inline static const std::string cross{ "\u2717" }; - inline static const std::string upward_arrow{ "\u27B6" }; - inline static const std::string down_arrow{ "\u27B4" }; - inline static const std::string equal_best{ check_mark }; - inline static const std::string better_best{ black_star }; - }; -} -#endif // !SYMBOLS_H \ No newline at end of file diff --git a/src/Platform/Timer.h b/src/Platform/Timer.h deleted file mode 100644 index dd10d94..0000000 --- a/src/Platform/Timer.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef TIMER_H -#define TIMER_H -#include -#include -#include - -namespace platform { - class Timer { - private: - std::chrono::high_resolution_clock::time_point begin; - std::chrono::high_resolution_clock::time_point end; - public: - Timer() = default; - ~Timer() = default; - void start() { begin = std::chrono::high_resolution_clock::now(); } - void stop() { end = std::chrono::high_resolution_clock::now(); } - double getDuration() - { - stop(); - std::chrono::duration time_span = std::chrono::duration_cast> (end - begin); - return time_span.count(); - } - double getLapse() - { - std::chrono::duration time_span = std::chrono::duration_cast> (std::chrono::high_resolution_clock::now() - begin); - return time_span.count(); - } - std::string getDurationString(bool lapse = false) - { - double duration = lapse ? getLapse() : getDuration(); - return translate2String(duration); - } - std::string translate2String(double duration) - { - double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration; - std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s"; - std::stringstream ss; - ss << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit; - return ss.str(); - } - }; -} /* namespace platform */ -#endif /* TIMER_H */ \ No newline at end of file diff --git a/src/Platform/Utils.h b/src/Platform/Utils.h deleted file mode 100644 index 1a08ac5..0000000 --- a/src/Platform/Utils.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef UTILS_H -#define UTILS_H -#include -#include -#include -namespace platform { - //static std::vector split(const std::string& text, char delimiter); - static std::vector split(const std::string& text, char delimiter) - { - std::vector result; - std::stringstream ss(text); - std::string token; - while (std::getline(ss, token, delimiter)) { - result.push_back(token); - } - return result; - } - static std::string trim(const std::string& str) - { - std::string result = str; - result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) { - return !std::isspace(ch); - })); - result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) { - return !std::isspace(ch); - }).base(), result.end()); - return result; - } -} -#endif \ No newline at end of file diff --git a/src/Platform/b_best.cc b/src/Platform/b_best.cc deleted file mode 100644 index f305ae5..0000000 --- a/src/Platform/b_best.cc +++ /dev/null @@ -1,85 +0,0 @@ -#include -#include -#include "Paths.h" -#include "BestResults.h" -#include "Colors.h" -#include "config.h" - -void manageArguments(argparse::ArgumentParser& program, int argc, char** argv) -{ - program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)"); - program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied"); - program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true); - program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true); - program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true); - program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true); - program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const std::string& value) { - try { - auto k = std::stod(value); - if (k < 0.01 || k > 0.15) { - throw std::runtime_error("Significance level hast to be a number in [0.01, 0.15]"); - } - return k; - } - catch (const std::runtime_error& err) { - throw std::runtime_error(err.what()); - } - catch (...) { - throw std::runtime_error("Number of folds must be an decimal number"); - }}); -} - -int main(int argc, char** argv) -{ - argparse::ArgumentParser program("b_best", { project_version.begin(), project_version.end() }); - manageArguments(program, argc, argv); - std::string model, score; - bool build, report, friedman, excel; - double level; - try { - program.parse_args(argc, argv); - model = program.get("model"); - score = program.get("score"); - build = program.get("build"); - report = program.get("report"); - friedman = program.get("friedman"); - excel = program.get("excel"); - level = program.get("level"); - if (model == "" || score == "") { - throw std::runtime_error("Model and score name must be supplied"); - } - if (friedman && model != "any") { - std::cerr << "Friedman test can only be used with all models" << std::endl; - std::cerr << program; - exit(1); - } - if (!report && !build) { - std::cerr << "Either build, report or both, have to be selected to do anything!" << std::endl; - std::cerr << program; - exit(1); - } - } - catch (const std::exception& err) { - std::cerr << err.what() << std::endl; - std::cerr << program; - exit(1); - } - // Generate report - auto results = platform::BestResults(platform::Paths::results(), score, model, friedman, level); - if (build) { - if (model == "any") { - results.buildAll(); - } else { - std::string fileName = results.build(); - std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl; - } - } - if (report) { - if (model == "any") { - results.reportAll(excel); - } else { - results.reportSingle(excel); - } - } - return 0; -} diff --git a/src/Platform/b_grid.cc b/src/Platform/b_grid.cc deleted file mode 100644 index 7a750f8..0000000 --- a/src/Platform/b_grid.cc +++ /dev/null @@ -1,232 +0,0 @@ -#include -#include -#include -#include -#include -#include "DotEnv.h" -#include "Models.h" -#include "modelRegister.h" -#include "GridSearch.h" -#include "Paths.h" -#include "Timer.h" -#include "Colors.h" -#include "config.h" - -using json = nlohmann::json; -const int MAXL = 133; - -void manageArguments(argparse::ArgumentParser& program) -{ - auto env = platform::DotEnv(); - auto& group = program.add_mutually_exclusive_group(true); - program.add_argument("-m", "--model") - .help("Model to use " + platform::Models::instance()->tostring()) - .action([](const std::string& value) { - static const std::vector choices = platform::Models::instance()->getNames(); - if (find(choices.begin(), choices.end(), value) != choices.end()) { - return value; - } - throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring()); - } - ); - group.add_argument("--dump").help("Show the grid combinations").default_value(false).implicit_value(true); - group.add_argument("--report").help("Report the computed hyperparameters").default_value(false).implicit_value(true); - group.add_argument("--compute").help("Perform computation of the grid output hyperparameters").default_value(false).implicit_value(true); - program.add_argument("--discretize").help("Discretize input datasets").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); - program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); - program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); - program.add_argument("--continue").help("Continue computing from that dataset").default_value(platform::GridSearch::NO_CONTINUE()); - program.add_argument("--only").help("Used with continue to compute that dataset only").default_value(false).implicit_value(true); - program.add_argument("--exclude").default_value("[]").help("Datasets to exclude in json format, e.g. [\"dataset1\", \"dataset2\"]"); - program.add_argument("--nested").help("Set the double/nested cross validation number of folds").default_value(5).scan<'i', int>().action([](const std::string& value) { - try { - auto k = stoi(value); - if (k < 2) { - throw std::runtime_error("Number of nested folds must be greater than 1"); - } - return k; - } - catch (const runtime_error& err) { - throw std::runtime_error(err.what()); - } - catch (...) { - throw std::runtime_error("Number of nested folds must be an integer"); - }}); - program.add_argument("--score").help("Score used in gridsearch").default_value("accuracy"); - program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) { - try { - auto k = stoi(value); - if (k < 2) { - throw std::runtime_error("Number of folds must be greater than 1"); - } - return k; - } - catch (const runtime_error& err) { - throw std::runtime_error(err.what()); - } - catch (...) { - throw std::runtime_error("Number of folds must be an integer"); - }}); - auto seed_values = env.getSeeds(); - program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); -} - -void list_dump(std::string& model) -{ - auto data = platform::GridData(platform::Paths::grid_input(model)); - std::cout << Colors::MAGENTA() << "Listing configuration input file (Grid)" << std::endl << std::endl; - int index = 0; - int max_hyper = 15; - int max_dataset = 7; - auto combinations = data.getGridFile(); - for (auto const& item : combinations) { - if (item.first.size() > max_dataset) { - max_dataset = item.first.size(); - } - if (item.second.dump().size() > max_hyper) { - max_hyper = item.second.dump().size(); - } - } - std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. " - << setw(max_hyper) << "Hyperparameters" << std::endl; - std::cout << "=== " << string(max_dataset, '=') << " ===== " << string(max_hyper, '=') << std::endl; - bool odd = true; - for (auto const& item : combinations) { - auto color = odd ? Colors::CYAN() : Colors::BLUE(); - std::cout << color; - auto num_combinations = data.getNumCombinations(item.first); - std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first - << " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << item.second.dump() << std::endl; - odd = !odd; - } - std::cout << Colors::RESET() << std::endl; -} -std::string headerLine(const std::string& text, int utf = 0) -{ - int n = MAXL - text.length() - 3; - n = n < 0 ? 0 : n; - return "* " + text + std::string(n + utf, ' ') + "*\n"; -} -void list_results(json& results, std::string& model) -{ - std::cout << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl; - std::cout << headerLine("Listing computed hyperparameters for model " + model); - std::cout << headerLine("Date & time: " + results["date"].get() + " Duration: " + results["duration"].get()); - std::cout << headerLine("Score: " + results["score"].get()); - std::cout << headerLine( - "Random seeds: " + results["seeds"].dump() - + " Discretized: " + (results["discretize"].get() ? "True" : "False") - + " Stratified: " + (results["stratified"].get() ? "True" : "False") - + " #Folds: " + std::to_string(results["n_folds"].get()) - + " Nested: " + (results["nested"].get() == 0 ? "False" : to_string(results["nested"].get())) - ); - std::cout << std::string(MAXL, '*') << std::endl; - int spaces = 7; - int hyperparameters_spaces = 15; - for (const auto& item : results["results"].items()) { - auto key = item.key(); - auto value = item.value(); - if (key.size() > spaces) { - spaces = key.size(); - } - if (value["hyperparameters"].dump().size() > hyperparameters_spaces) { - hyperparameters_spaces = value["hyperparameters"].dump().size(); - } - } - std::cout << Colors::GREEN() << " # " << left << setw(spaces) << "Dataset" << " " << setw(19) << "Date" << " " - << "Duration " << setw(8) << "Score" << " " << "Hyperparameters" << std::endl; - std::cout << "=== " << string(spaces, '=') << " " << string(19, '=') << " " << string(8, '=') << " " - << string(8, '=') << " " << string(hyperparameters_spaces, '=') << std::endl; - bool odd = true; - int index = 0; - for (const auto& item : results["results"].items()) { - auto color = odd ? Colors::CYAN() : Colors::BLUE(); - auto value = item.value(); - std::cout << color; - std::cout << std::setw(3) << std::right << index++ << " "; - std::cout << left << setw(spaces) << item.key() << " " << value["date"].get() - << " " << setw(8) << right << value["duration"].get() << " " << setw(8) << setprecision(6) - << fixed << right << value["score"].get() << " " << value["hyperparameters"].dump() << std::endl; - odd = !odd; - } - std::cout << Colors::RESET() << std::endl; -} - -/* - * Main - */ -int main(int argc, char** argv) -{ - argparse::ArgumentParser program("b_grid", { project_version.begin(), project_version.end() }); - manageArguments(program); - struct platform::ConfigGrid config; - bool dump, compute; - try { - program.parse_args(argc, argv); - config.model = program.get("model"); - config.score = program.get("score"); - config.discretize = program.get("discretize"); - config.stratified = program.get("stratified"); - config.n_folds = program.get("folds"); - config.quiet = program.get("quiet"); - config.only = program.get("only"); - config.seeds = program.get>("seeds"); - config.nested = program.get("nested"); - config.continue_from = program.get("continue"); - if (config.continue_from == platform::GridSearch::NO_CONTINUE() && config.only) { - throw std::runtime_error("Cannot use --only without --continue"); - } - dump = program.get("dump"); - compute = program.get("compute"); - if (dump && (config.continue_from != platform::GridSearch::NO_CONTINUE() || config.only)) { - throw std::runtime_error("Cannot use --dump with --continue or --only"); - } - auto excluded = program.get("exclude"); - config.excluded = json::parse(excluded); - } - catch (const exception& err) { - cerr << err.what() << std::endl; - cerr << program; - exit(1); - } - /* - * Begin Processing - */ - auto env = platform::DotEnv(); - config.platform = env.get("platform"); - platform::Paths::createPath(platform::Paths::grid()); - auto grid_search = platform::GridSearch(config); - platform::Timer timer; - timer.start(); - if (dump) { - list_dump(config.model); - } else { - if (compute) { - struct platform::ConfigMPI mpi_config; - mpi_config.manager = 0; // which process is the manager - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_config.rank); - MPI_Comm_size(MPI_COMM_WORLD, &mpi_config.n_procs); - if (mpi_config.n_procs < 2) { - throw std::runtime_error("Cannot use --compute with less than 2 mpi processes, try mpirun -np 2 ..."); - } - grid_search.go(mpi_config); - if (mpi_config.rank == mpi_config.manager) { - auto results = grid_search.loadResults(); - list_results(results, config.model); - std::cout << "Process took " << timer.getDurationString() << std::endl; - } - MPI_Finalize(); - } else { - // List results - auto results = grid_search.loadResults(); - if (results.empty()) { - std::cout << "** No results found" << std::endl; - } else { - list_results(results, config.model); - } - } - } - std::cout << "Done!" << std::endl; - return 0; -} diff --git a/src/Platform/b_list.cc b/src/Platform/b_list.cc deleted file mode 100644 index 00907aa..0000000 --- a/src/Platform/b_list.cc +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include "Paths.h" -#include "Colors.h" -#include "Datasets.h" - -const int BALANCE_LENGTH = 75; - -struct separated : numpunct { - char do_decimal_point() const { return ','; } - char do_thousands_sep() const { return '.'; } - std::string do_grouping() const { return "\03"; } -}; - -void outputBalance(const std::string& balance) -{ - auto temp = std::string(balance); - while (temp.size() > BALANCE_LENGTH - 1) { - auto part = temp.substr(0, BALANCE_LENGTH); - std::cout << part << std::endl; - std::cout << setw(48) << " "; - temp = temp.substr(BALANCE_LENGTH); - } - std::cout << temp << std::endl; -} - -int main(int argc, char** argv) -{ - auto data = platform::Datasets(false, platform::Paths::datasets()); - locale mylocale(std::cout.getloc(), new separated); - locale::global(mylocale); - std::cout.imbue(mylocale); - std::cout << Colors::GREEN() << "Dataset Sampl. Feat. Cls. Balance" << std::endl; - std::string balanceBars = std::string(BALANCE_LENGTH, '='); - std::cout << "============================== ====== ===== === " << balanceBars << std::endl; - bool odd = true; - for (const auto& dataset : data.getNames()) { - auto color = odd ? Colors::CYAN() : Colors::BLUE(); - std::cout << color << setw(30) << left << dataset << " "; - data.loadDataset(dataset); - auto nSamples = data.getNSamples(dataset); - std::cout << setw(6) << right << nSamples << " "; - std::cout << setw(5) << right << data.getFeatures(dataset).size() << " "; - std::cout << setw(3) << right << data.getNClasses(dataset) << " "; - std::stringstream oss; - std::string sep = ""; - for (auto number : data.getClassesCounts(dataset)) { - oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; - sep = " / "; - } - outputBalance(oss.str()); - odd = !odd; - } - std::cout << Colors::RESET() << std::endl; - return 0; -} diff --git a/src/Platform/b_main.cc b/src/Platform/b_main.cc deleted file mode 100644 index 7872239..0000000 --- a/src/Platform/b_main.cc +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include -#include -#include "Experiment.h" -#include "Datasets.h" -#include "DotEnv.h" -#include "Models.h" -#include "modelRegister.h" -#include "Paths.h" -#include "config.h" - - -using json = nlohmann::json; - -void manageArguments(argparse::ArgumentParser& program) -{ - auto env = platform::DotEnv(); - program.add_argument("-d", "--dataset").default_value("").help("Dataset file name"); - program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment"); - program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \ - "Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format."); - program.add_argument("-m", "--model") - .help("Model to use " + platform::Models::instance()->tostring()) - .action([](const std::string& value) { - static const std::vector choices = platform::Models::instance()->getNames(); - if (find(choices.begin(), choices.end(), value) != choices.end()) { - return value; - } - throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring()); - } - ); - program.add_argument("--title").default_value("").help("Experiment title"); - program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); - program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); - program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true); - program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); - program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) { - try { - auto k = stoi(value); - if (k < 2) { - throw std::runtime_error("Number of folds must be greater than 1"); - } - return k; - } - catch (const runtime_error& err) { - throw std::runtime_error(err.what()); - } - catch (...) { - throw std::runtime_error("Number of folds must be an integer"); - }}); - auto seed_values = env.getSeeds(); - program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); -} - -int main(int argc, char** argv) -{ - argparse::ArgumentParser program("b_main", { project_version.begin(), project_version.end() }); - manageArguments(program); - std::string file_name, model_name, title, hyperparameters_file; - json hyperparameters_json; - bool discretize_dataset, stratified, saveResults, quiet; - std::vector seeds; - std::vector filesToTest; - int n_folds; - try { - program.parse_args(argc, argv); - file_name = program.get("dataset"); - model_name = program.get("model"); - discretize_dataset = program.get("discretize"); - stratified = program.get("stratified"); - quiet = program.get("quiet"); - n_folds = program.get("folds"); - seeds = program.get>("seeds"); - auto hyperparameters = program.get("hyperparameters"); - hyperparameters_json = json::parse(hyperparameters); - hyperparameters_file = program.get("hyper-file"); - if (hyperparameters_file != "" && hyperparameters != "{}") { - throw runtime_error("hyperparameters and hyper_file are mutually exclusive"); - } - title = program.get("title"); - if (title == "" && file_name == "") { - throw runtime_error("title is mandatory if dataset is not provided"); - } - saveResults = program.get("save"); - } - catch (const exception& err) { - cerr << err.what() << std::endl; - cerr << program; - exit(1); - } - auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets()); - if (file_name != "") { - if (!datasets.isDataset(file_name)) { - cerr << "Dataset " << file_name << " not found" << std::endl; - exit(1); - } - if (title == "") { - title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds"; - } - filesToTest.push_back(file_name); - } else { - filesToTest = datasets.getNames(); - saveResults = true; - } - platform::HyperParameters test_hyperparams; - if (hyperparameters_file != "") { - test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file); - } else { - test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json); - } - - /* - * Begin Processing - */ - auto env = platform::DotEnv(); - auto experiment = platform::Experiment(); - experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3"); - experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); - experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy"); - experiment.setHyperparameters(test_hyperparams); - for (auto seed : seeds) { - experiment.addRandomSeed(seed); - } - platform::Timer timer; - timer.start(); - experiment.go(filesToTest, quiet); - experiment.setDuration(timer.getDuration()); - if (saveResults) { - experiment.save(platform::Paths::results()); - } - if (!quiet) - experiment.report(); - std::cout << "Done!" << std::endl; - return 0; -} diff --git a/src/Platform/b_manage.cc b/src/Platform/b_manage.cc deleted file mode 100644 index 3b499c6..0000000 --- a/src/Platform/b_manage.cc +++ /dev/null @@ -1,49 +0,0 @@ -#include -#include -#include "ManageResults.h" -#include "config.h" - - -void manageArguments(argparse::ArgumentParser& program, int argc, char** argv) -{ - program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>(); - program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)"); - program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied"); - program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true); - program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true); - program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true); - try { - program.parse_args(argc, argv); - auto number = program.get("number"); - if (number < 0) { - throw std::runtime_error("Number of results must be greater than or equal to 0"); - } - auto model = program.get("model"); - auto score = program.get("score"); - auto complete = program.get("complete"); - auto partial = program.get("partial"); - auto compare = program.get("compare"); - } - catch (const std::exception& err) { - std::cerr << err.what() << std::endl; - std::cerr << program; - exit(1); - } -} - -int main(int argc, char** argv) -{ - auto program = argparse::ArgumentParser("b_manage", { project_version.begin(), project_version.end() }); - manageArguments(program, argc, argv); - int number = program.get("number"); - std::string model = program.get("model"); - std::string score = program.get("score"); - auto complete = program.get("complete"); - auto partial = program.get("partial"); - auto compare = program.get("compare"); - if (complete) - partial = false; - auto manager = platform::ManageResults(number, model, score, complete, partial, compare); - manager.doMenu(); - return 0; -} diff --git a/src/Platform/modelRegister.h b/src/Platform/modelRegister.h deleted file mode 100644 index 7f58401..0000000 --- a/src/Platform/modelRegister.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef MODEL_REGISTER_H -#define MODEL_REGISTER_H -static platform::Registrar registrarT("TAN", - [](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();}); -static platform::Registrar registrarTLD("TANLd", - [](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();}); -static platform::Registrar registrarS("SPODE", - [](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);}); -static platform::Registrar registrarSLD("SPODELd", - [](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);}); -static platform::Registrar registrarK("KDB", - [](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);}); -static platform::Registrar registrarKLD("KDBLd", - [](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);}); -static platform::Registrar registrarA("AODE", - [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();}); -static platform::Registrar registrarALD("AODELd", - [](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();}); -static platform::Registrar registrarBA("BoostAODE", - [](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();}); -static platform::Registrar registrarSt("STree", - [](void) -> bayesnet::BaseClassifier* { return new pywrap::STree();}); -static platform::Registrar registrarOdte("Odte", - [](void) -> bayesnet::BaseClassifier* { return new pywrap::ODTE();}); -static platform::Registrar registrarSvc("SVC", - [](void) -> bayesnet::BaseClassifier* { return new pywrap::SVC();}); -static platform::Registrar registrarRaF("RandomForest", - [](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();}); -#endif \ No newline at end of file diff --git a/src/PyClassifiers/CMakeLists.txt b/src/PyClassifiers/CMakeLists.txt deleted file mode 100644 index 63714da..0000000 --- a/src/PyClassifiers/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -include_directories( - ${BayesNet_SOURCE_DIR}/lib/Files - ${BayesNet_SOURCE_DIR}/lib/json/include - ${BayesNet_SOURCE_DIR}/src/BayesNet - ${Python3_INCLUDE_DIRS} - ${TORCH_INCLUDE_DIRS} -) -add_library(PyWrap SHARED PyWrap.cc STree.cc ODTE.cc SVC.cc RandomForest.cc PyClassifier.cc) -#target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy xgboost::xgboost ArffFiles) -target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles) \ No newline at end of file diff --git a/src/PyClassifiers/ODTE.cc b/src/PyClassifiers/ODTE.cc deleted file mode 100644 index 11b1433..0000000 --- a/src/PyClassifiers/ODTE.cc +++ /dev/null @@ -1,24 +0,0 @@ -#include "ODTE.h" - -namespace pywrap { - ODTE::ODTE() : PyClassifier("odte", "Odte") - { - validHyperparameters = { "n_jobs", "n_estimators", "random_state" }; - } - int ODTE::getNumberOfNodes() const - { - return callMethodInt("get_nodes"); - } - int ODTE::getNumberOfEdges() const - { - return callMethodInt("get_leaves"); - } - int ODTE::getNumberOfStates() const - { - return callMethodInt("get_depth"); - } - std::string ODTE::graph() - { - return callMethodString("graph"); - } -} /* namespace pywrap */ \ No newline at end of file diff --git a/src/PyClassifiers/ODTE.h b/src/PyClassifiers/ODTE.h deleted file mode 100644 index 0f968f3..0000000 --- a/src/PyClassifiers/ODTE.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef ODTE_H -#define ODTE_H -#include "nlohmann/json.hpp" -#include "PyClassifier.h" - -namespace pywrap { - class ODTE : public PyClassifier { - public: - ODTE(); - ~ODTE() = default; - int getNumberOfNodes() const override; - int getNumberOfEdges() const override; - int getNumberOfStates() const override; - std::string graph(); - }; -} /* namespace pywrap */ -#endif /* ODTE_H */ \ No newline at end of file diff --git a/src/PyClassifiers/PyClassifier.cc b/src/PyClassifiers/PyClassifier.cc deleted file mode 100644 index aeb3798..0000000 --- a/src/PyClassifiers/PyClassifier.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include "PyClassifier.h" -namespace pywrap { - namespace bp = boost::python; - namespace np = boost::python::numpy; - PyClassifier::PyClassifier(const std::string& module, const std::string& className, bool sklearn) : module(module), className(className), sklearn(sklearn), fitted(false) - { - // This id allows to have more than one instance of the same module/class - id = reinterpret_cast(this); - pyWrap = PyWrap::GetInstance(); - pyWrap->importClass(id, module, className); - } - PyClassifier::~PyClassifier() - { - pyWrap->clean(id); - } - np::ndarray tensor2numpy(torch::Tensor& X) - { - int m = X.size(0); - int n = X.size(1); - auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin(), bp::make_tuple(m, n), bp::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2), bp::object()); - Xn = Xn.transpose(); - return Xn; - } - std::pair tensors2numpy(torch::Tensor& X, torch::Tensor& y) - { - int n = X.size(1); - auto yn = np::from_data(y.data_ptr(), np::dtype::get_builtin(), bp::make_tuple(n), bp::make_tuple(sizeof(y.dtype()) * 2), bp::object()); - return { tensor2numpy(X), yn }; - } - std::string PyClassifier::version() - { - if (sklearn) { - return pyWrap->sklearnVersion(); - } - return pyWrap->version(id); - } - std::string PyClassifier::callMethodString(const std::string& method) - { - return pyWrap->callMethodString(id, method); - } - int PyClassifier::callMethodSumOfItems(const std::string& method) const - { - return pyWrap->callMethodSumOfItems(id, method); - } - int PyClassifier::callMethodInt(const std::string& method) const - { - return pyWrap->callMethodInt(id, method); - } - PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y) - { - if (!fitted && hyperparameters.size() > 0) { - pyWrap->setHyperparameters(id, hyperparameters); - } - auto [Xn, yn] = tensors2numpy(X, y); - CPyObject Xp = bp::incref(bp::object(Xn).ptr()); - CPyObject yp = bp::incref(bp::object(yn).ptr()); - pyWrap->fit(id, Xp, yp); - fitted = true; - return *this; - } - PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) - { - return fit(X, y); - } - torch::Tensor PyClassifier::predict(torch::Tensor& X) - { - int dimension = X.size(1); - auto Xn = tensor2numpy(X); - CPyObject Xp = bp::incref(bp::object(Xn).ptr()); - PyObject* incoming = pyWrap->predict(id, Xp); - bp::handle<> handle(incoming); - bp::object object(handle); - np::ndarray prediction = np::from_object(object); - if (PyErr_Occurred()) { - PyErr_Print(); - throw std::runtime_error("Error creating object for predict in " + module + " and class " + className); - } - int* data = reinterpret_cast(prediction.get_data()); - std::vector vPrediction(data, data + prediction.shape(0)); - auto resultTensor = torch::tensor(vPrediction, torch::kInt32); - Py_XDECREF(incoming); - return resultTensor; - } - float PyClassifier::score(torch::Tensor& X, torch::Tensor& y) - { - auto [Xn, yn] = tensors2numpy(X, y); - CPyObject Xp = bp::incref(bp::object(Xn).ptr()); - CPyObject yp = bp::incref(bp::object(yn).ptr()); - float result = pyWrap->score(id, Xp, yp); - return result; - } - void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters) - { - this->hyperparameters = hyperparameters; - } -} /* namespace pywrap */ \ No newline at end of file diff --git a/src/PyClassifiers/PyClassifier.h b/src/PyClassifiers/PyClassifier.h deleted file mode 100644 index 7260d2e..0000000 --- a/src/PyClassifiers/PyClassifier.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef PYCLASSIFIER_H -#define PYCLASSIFIER_H -#include "boost/python/detail/wrap_python.hpp" -#include -#include -#include -#include -#include -#include -#include -#include "PyWrap.h" -#include "Classifier.h" -#include "TypeId.h" - -namespace pywrap { - class PyClassifier : public bayesnet::BaseClassifier { - public: - PyClassifier(const std::string& module, const std::string& className, const bool sklearn = false); - virtual ~PyClassifier(); - PyClassifier& fit(std::vector>& X, std::vector& y, const std::vector& features, const std::string& className, std::map>& states) override { return *this; }; - // X is nxm tensor, y is nx1 tensor - PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector& features, const std::string& className, std::map>& states) override; - PyClassifier& fit(torch::Tensor& X, torch::Tensor& y); - PyClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states) override { return *this; }; - PyClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights) override { return *this; }; - torch::Tensor predict(torch::Tensor& X) override; - std::vector predict(std::vector>& X) override { return std::vector(); }; - float score(std::vector>& X, std::vector& y) override { return 0.0; }; - float score(torch::Tensor& X, torch::Tensor& y) override; - std::string version(); - std::string callMethodString(const std::string& method); - int callMethodSumOfItems(const std::string& method) const; - int callMethodInt(const std::string& method) const; - std::string getVersion() override { return this->version(); }; - int getNumberOfNodes() const override { return 0; }; - int getNumberOfEdges() const override { return 0; }; - int getNumberOfStates() const override { return 0; }; - std::vector show() const override { return std::vector(); } - std::vector graph(const std::string& title = "") const override { return std::vector(); } - bayesnet::status_t getStatus() const override { return bayesnet::NORMAL; }; - std::vector topological_order() override { return std::vector(); } - void dump_cpt() const override {}; - void setHyperparameters(const nlohmann::json& hyperparameters) override; - protected: - nlohmann::json hyperparameters; - void trainModel(const torch::Tensor& weights) override {}; - private: - PyWrap* pyWrap; - std::string module; - std::string className; - bool sklearn; - clfId_t id; - bool fitted; - }; -} /* namespace pywrap */ -#endif /* PYCLASSIFIER_H */ \ No newline at end of file diff --git a/src/PyClassifiers/PyClf.h b/src/PyClassifiers/PyClf.h deleted file mode 100644 index 5a49dd8..0000000 --- a/src/PyClassifiers/PyClf.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef PYCLF_H -#define PYCLF_H -#include -#include "DotEnv.h" -namespace PyClassifiers { - class PyClf { - public: - PyClf(const std::string& name); - virtual ~PyClf(); - private: - std::string name; - - }; -} /* namespace PyClassifiers */ -#endif /* PYCLF_H */ \ No newline at end of file diff --git a/src/PyClassifiers/PyHelper.hpp b/src/PyClassifiers/PyHelper.hpp deleted file mode 100644 index fba31e7..0000000 --- a/src/PyClassifiers/PyHelper.hpp +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef PYHELPER_HPP -#define PYHELPER_HPP -#pragma once -// Code taken and adapted from -// https ://www.codeproject.com/Articles/820116/Embedding-Python-program-in-a-C-Cplusplus-code -#include "boost/python/detail/wrap_python.hpp" -#include -#include - -namespace pywrap { - namespace p = boost::python; - namespace np = boost::python::numpy; - class CPyInstance { - public: - CPyInstance() - { - Py_Initialize(); - np::initialize(); - } - - ~CPyInstance() - { - Py_Finalize(); - } - }; - class CPyObject { - private: - PyObject* p; - public: - CPyObject() : p(NULL) - { - } - - CPyObject(PyObject* _p) : p(_p) - { - } - ~CPyObject() - { - Release(); - } - PyObject* getObject() - { - return p; - } - PyObject* setObject(PyObject* _p) - { - return (p = _p); - } - PyObject* AddRef() - { - if (p) { - Py_INCREF(p); - } - return p; - } - void Release() - { - if (p) { - Py_XDECREF(p); - } - - p = NULL; - } - PyObject* operator ->() - { - return p; - } - bool is() - { - return p ? true : false; - } - operator PyObject* () - { - return p; - } - PyObject* operator = (PyObject* pp) - { - p = pp; - return p; - } - operator bool() - { - return p ? true : false; - } - }; -} /* namespace pywrap */ -#endif \ No newline at end of file diff --git a/src/PyClassifiers/PyWrap.cc b/src/PyClassifiers/PyWrap.cc deleted file mode 100644 index 88a5a9c..0000000 --- a/src/PyClassifiers/PyWrap.cc +++ /dev/null @@ -1,255 +0,0 @@ -#define PY_SSIZE_T_CLEAN -#include -#include "PyWrap.h" -#include -#include -#include -#include -#include - -namespace pywrap { - namespace np = boost::python::numpy; - PyWrap* PyWrap::wrapper = nullptr; - std::mutex PyWrap::mutex; - CPyInstance* PyWrap::pyInstance = nullptr; - auto moduleClassMap = std::map, std::tuple>(); - - PyWrap* PyWrap::GetInstance() - { - std::lock_guard lock(mutex); - if (wrapper == nullptr) { - wrapper = new PyWrap(); - pyInstance = new CPyInstance(); - PyRun_SimpleString("import warnings;warnings.filterwarnings('ignore')"); - } - return wrapper; - } - void PyWrap::RemoveInstance() - { - if (wrapper != nullptr) { - if (pyInstance != nullptr) { - delete pyInstance; - } - pyInstance = nullptr; - if (wrapper != nullptr) { - delete wrapper; - } - wrapper = nullptr; - } - } - void PyWrap::importClass(const clfId_t id, const std::string& moduleName, const std::string& className) - { - std::lock_guard lock(mutex); - auto result = moduleClassMap.find(id); - if (result != moduleClassMap.end()) { - return; - } - PyObject* module = PyImport_ImportModule(moduleName.c_str()); - if (PyErr_Occurred()) { - errorAbort("Couldn't import module " + moduleName); - } - PyObject* classObject = PyObject_GetAttrString(module, className.c_str()); - if (PyErr_Occurred()) { - errorAbort("Couldn't find class " + className); - } - PyObject* instance = PyObject_CallObject(classObject, NULL); - if (PyErr_Occurred()) { - errorAbort("Couldn't create instance of class " + className); - } - moduleClassMap.insert({ id, { module, classObject, instance } }); - } - void PyWrap::clean(const clfId_t id) - { - // Remove Python interpreter if no more modules imported left - std::lock_guard lock(mutex); - auto result = moduleClassMap.find(id); - if (result == moduleClassMap.end()) { - return; - } - Py_DECREF(std::get<0>(result->second)); - Py_DECREF(std::get<1>(result->second)); - Py_DECREF(std::get<2>(result->second)); - moduleClassMap.erase(result); - if (PyErr_Occurred()) { - PyErr_Print(); - errorAbort("Error cleaning module "); - } - // With boost you can't remove the interpreter - // https://www.boost.org/doc/libs/1_83_0/libs/python/doc/html/tutorial/tutorial/embedding.html#tutorial.embedding.getting_started - // if (moduleClassMap.empty()) { - // RemoveInstance(); - // } - } - void PyWrap::errorAbort(const std::string& message) - { - std::cerr << message << std::endl; - PyErr_Print(); - RemoveInstance(); - exit(1); - } - PyObject* PyWrap::getClass(const clfId_t id) - { - auto item = moduleClassMap.find(id); - if (item == moduleClassMap.end()) { - errorAbort("Module not found"); - } - return std::get<2>(item->second); - } - std::string PyWrap::callMethodString(const clfId_t id, const std::string& method) - { - PyObject* instance = getClass(id); - PyObject* result; - try { - if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL))) - errorAbort("Couldn't call method " + method); - } - catch (const std::exception& e) { - errorAbort(e.what()); - } - std::string value = PyUnicode_AsUTF8(result); - Py_XDECREF(result); - return value; - } - int PyWrap::callMethodInt(const clfId_t id, const std::string& method) - { - PyObject* instance = getClass(id); - PyObject* result; - try { - if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL))) - errorAbort("Couldn't call method " + method); - } - catch (const std::exception& e) { - errorAbort(e.what()); - } - int value = PyLong_AsLong(result); - Py_XDECREF(result); - return value; - } - std::string PyWrap::sklearnVersion() - { - PyObject* sklearnModule = PyImport_ImportModule("sklearn"); - if (sklearnModule == nullptr) { - errorAbort("Couldn't import sklearn"); - } - PyObject* versionAttr = PyObject_GetAttrString(sklearnModule, "__version__"); - if (versionAttr == nullptr || !PyUnicode_Check(versionAttr)) { - Py_XDECREF(sklearnModule); - errorAbort("Couldn't get sklearn version"); - } - std::string result = PyUnicode_AsUTF8(versionAttr); - Py_XDECREF(versionAttr); - Py_XDECREF(sklearnModule); - return result; - } - std::string PyWrap::version(const clfId_t id) - { - return callMethodString(id, "version"); - } - int PyWrap::callMethodSumOfItems(const clfId_t id, const std::string& method) - { - // Call method on each estimator and sum the results (made for RandomForest) - PyObject* instance = getClass(id); - PyObject* estimators = PyObject_GetAttrString(instance, "estimators_"); - if (estimators == nullptr) { - errorAbort("Failed to get attribute: " + method); - } - int sumOfItems = 0; - Py_ssize_t len = PyList_Size(estimators); - for (Py_ssize_t i = 0; i < len; i++) { - PyObject* estimator = PyList_GetItem(estimators, i); - PyObject* result; - if (method == "node_count") { - PyObject* owner = PyObject_GetAttrString(estimator, "tree_"); - if (owner == nullptr) { - Py_XDECREF(estimators); - errorAbort("Failed to get attribute tree_ for: " + method); - } - result = PyObject_GetAttrString(owner, method.c_str()); - if (result == nullptr) { - Py_XDECREF(estimators); - Py_XDECREF(owner); - errorAbort("Failed to get attribute node_count: " + method); - } - Py_DECREF(owner); - } else { - result = PyObject_CallMethod(estimator, method.c_str(), nullptr); - if (result == nullptr) { - Py_XDECREF(estimators); - errorAbort("Failed to call method: " + method); - } - } - sumOfItems += PyLong_AsLong(result); - Py_DECREF(result); - } - Py_DECREF(estimators); - return sumOfItems; - } - void PyWrap::setHyperparameters(const clfId_t id, const json& hyperparameters) - { - // Set hyperparameters as attributes of the class - PyObject* pValue; - PyObject* instance = getClass(id); - for (const auto& [key, value] : hyperparameters.items()) { - std::stringstream oss; - oss << value.type_name(); - if (oss.str() == "string") { - pValue = Py_BuildValue("s", value.get().c_str()); - } else { - if (value.is_number_integer()) { - pValue = Py_BuildValue("i", value.get()); - } else { - pValue = Py_BuildValue("f", value.get()); - } - } - int res = PyObject_SetAttrString(instance, key.c_str(), pValue); - if (res == -1 && PyErr_Occurred()) { - Py_XDECREF(pValue); - errorAbort("Couldn't set attribute " + key + "=" + value.dump()); - } - Py_XDECREF(pValue); - } - } - void PyWrap::fit(const clfId_t id, CPyObject& X, CPyObject& y) - { - PyObject* instance = getClass(id); - CPyObject result; - CPyObject method = PyUnicode_FromString("fit"); - try { - if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL))) - errorAbort("Couldn't call method fit"); - } - catch (const std::exception& e) { - errorAbort(e.what()); - } - } - PyObject* PyWrap::predict(const clfId_t id, CPyObject& X) - { - PyObject* instance = getClass(id); - PyObject* result; - CPyObject method = PyUnicode_FromString("predict"); - try { - if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), NULL))) - errorAbort("Couldn't call method predict"); - } - catch (const std::exception& e) { - errorAbort(e.what()); - } - Py_INCREF(result); - return result; // Caller must free this object - } - double PyWrap::score(const clfId_t id, CPyObject& X, CPyObject& y) - { - PyObject* instance = getClass(id); - CPyObject result; - CPyObject method = PyUnicode_FromString("score"); - try { - if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL))) - errorAbort("Couldn't call method score"); - } - catch (const std::exception& e) { - errorAbort(e.what()); - } - double resultValue = PyFloat_AsDouble(result); - return resultValue; - } -} \ No newline at end of file diff --git a/src/PyClassifiers/PyWrap.h b/src/PyClassifiers/PyWrap.h deleted file mode 100644 index d23b746..0000000 --- a/src/PyClassifiers/PyWrap.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef PYWRAP_H -#define PYWRAP_H -#include "boost/python/detail/wrap_python.hpp" -#include -#include -#include -#include -#include -#include "PyHelper.hpp" -#include "TypeId.h" -#pragma once - - -namespace pywrap { - /* - Singleton class to handle Python/numpy interpreter. - */ - using json = nlohmann::json; - class PyWrap { - public: - PyWrap() = default; - PyWrap(PyWrap& other) = delete; - static PyWrap* GetInstance(); - void operator=(const PyWrap&) = delete; - ~PyWrap() = default; - std::string callMethodString(const clfId_t id, const std::string& method); - int callMethodInt(const clfId_t id, const std::string& method); - std::string sklearnVersion(); - std::string version(const clfId_t id); - int callMethodSumOfItems(const clfId_t id, const std::string& method); - void setHyperparameters(const clfId_t id, const json& hyperparameters); - void fit(const clfId_t id, CPyObject& X, CPyObject& y); - PyObject* predict(const clfId_t id, CPyObject& X); - double score(const clfId_t id, CPyObject& X, CPyObject& y); - void clean(const clfId_t id); - void importClass(const clfId_t id, const std::string& moduleName, const std::string& className); - PyObject* getClass(const clfId_t id); - private: - // Only call RemoveInstance from clean method - static void RemoveInstance(); - void errorAbort(const std::string& message); - // No need to use static map here, since this class is a singleton - std::map> moduleClassMap; - static CPyInstance* pyInstance; - static PyWrap* wrapper; - static std::mutex mutex; - }; -} /* namespace pywrap */ -#endif /* PYWRAP_H */ \ No newline at end of file diff --git a/src/PyClassifiers/Pyclf.cc b/src/PyClassifiers/Pyclf.cc deleted file mode 100644 index d3cd7f5..0000000 --- a/src/PyClassifiers/Pyclf.cc +++ /dev/null @@ -1,18 +0,0 @@ -#include "PyClf.h" - -namespace PyClassifiers { - - PyClf::PyClf(const std::std::string& name) : name(name) - { - env = platform::DotEnv(); - - - } - - - PyClf::~PyClf() - { - - } - -} /* namespace PyClassifiers */ \ No newline at end of file diff --git a/src/PyClassifiers/RandomForest.cc b/src/PyClassifiers/RandomForest.cc deleted file mode 100644 index dfdb8ba..0000000 --- a/src/PyClassifiers/RandomForest.cc +++ /dev/null @@ -1,20 +0,0 @@ -#include "RandomForest.h" - -namespace pywrap { - RandomForest::RandomForest() : PyClassifier("sklearn.ensemble", "RandomForestClassifier", true) - { - validHyperparameters = { "n_estimators", "n_jobs", "random_state" }; - } - int RandomForest::getNumberOfEdges() const - { - return callMethodSumOfItems("get_n_leaves"); - } - int RandomForest::getNumberOfStates() const - { - return callMethodSumOfItems("get_depth"); - } - int RandomForest::getNumberOfNodes() const - { - return callMethodSumOfItems("node_count"); - } -} /* namespace pywrap */ \ No newline at end of file diff --git a/src/PyClassifiers/RandomForest.h b/src/PyClassifiers/RandomForest.h deleted file mode 100644 index 2e1d3d0..0000000 --- a/src/PyClassifiers/RandomForest.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef RANDOMFOREST_H -#define RANDOMFOREST_H -#include "PyClassifier.h" - -namespace pywrap { - class RandomForest : public PyClassifier { - public: - RandomForest(); - ~RandomForest() = default; - int getNumberOfEdges() const override; - int getNumberOfStates() const override; - int getNumberOfNodes() const override; - }; -} /* namespace pywrap */ -#endif /* RANDOMFOREST_H */ \ No newline at end of file diff --git a/src/PyClassifiers/STree.cc b/src/PyClassifiers/STree.cc deleted file mode 100644 index faff2ce..0000000 --- a/src/PyClassifiers/STree.cc +++ /dev/null @@ -1,24 +0,0 @@ -#include "STree.h" - -namespace pywrap { - STree::STree() : PyClassifier("stree", "Stree") - { - validHyperparameters = { "C", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy", "gamma", "max_features", "degree" }; - }; - int STree::getNumberOfNodes() const - { - return callMethodInt("get_nodes"); - } - int STree::getNumberOfEdges() const - { - return callMethodInt("get_leaves"); - } - int STree::getNumberOfStates() const - { - return callMethodInt("get_depth"); - } - std::string STree::graph() - { - return callMethodString("graph"); - } -} /* namespace pywrap */ \ No newline at end of file diff --git a/src/PyClassifiers/STree.h b/src/PyClassifiers/STree.h deleted file mode 100644 index 7862d3b..0000000 --- a/src/PyClassifiers/STree.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef STREE_H -#define STREE_H -#include "nlohmann/json.hpp" -#include "PyClassifier.h" - -namespace pywrap { - class STree : public PyClassifier { - public: - STree(); - ~STree() = default; - int getNumberOfNodes() const override; - int getNumberOfEdges() const override; - int getNumberOfStates() const override; - std::string graph(); - }; -} /* namespace pywrap */ -#endif /* STREE_H */ \ No newline at end of file diff --git a/src/PyClassifiers/SVC.cc b/src/PyClassifiers/SVC.cc deleted file mode 100644 index cce7650..0000000 --- a/src/PyClassifiers/SVC.cc +++ /dev/null @@ -1,8 +0,0 @@ -#include "SVC.h" - -namespace pywrap { - SVC::SVC() : PyClassifier("sklearn.svm", "SVC", true) - { - validHyperparameters = { "C", "gamma", "kernel", "random_state" }; - } -} /* namespace pywrap */ \ No newline at end of file diff --git a/src/PyClassifiers/SVC.h b/src/PyClassifiers/SVC.h deleted file mode 100644 index 77b2624..0000000 --- a/src/PyClassifiers/SVC.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef SVC_H -#define SVC_H -#include "PyClassifier.h" - -namespace pywrap { - class SVC : public PyClassifier { - public: - SVC(); - ~SVC() = default; - }; - -} /* namespace pywrap */ -#endif /* SVC_H */ \ No newline at end of file diff --git a/src/PyClassifiers/TypeId.h b/src/PyClassifiers/TypeId.h deleted file mode 100644 index d519367..0000000 --- a/src/PyClassifiers/TypeId.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef TYPEDEF_H -#define TYPEDEF_H -namespace pywrap { - typedef uint64_t clfId_t; -} -#endif /* TYPEDEF_H */ \ No newline at end of file diff --git a/src/PyClassifiers/XGBoost.cc b/src/PyClassifiers/XGBoost.cc deleted file mode 100644 index 5afd628..0000000 --- a/src/PyClassifiers/XGBoost.cc +++ /dev/null @@ -1,18 +0,0 @@ -#include "XGBoost.h" - - - - -See https ://stackoverflow.com/questions/36071672/using-xgboost-in-c - - - - - - -namespace pywrap { - std::string XGBoost::version() - { - return callMethodString("1.0"); - } -} /* namespace pywrap */ \ No newline at end of file diff --git a/src/PyClassifiers/XGBoost.h b/src/PyClassifiers/XGBoost.h deleted file mode 100644 index aaf996f..0000000 --- a/src/PyClassifiers/XGBoost.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef XGBOOST_H -#define XGBOOST_H -#include "PyClassifier.h" - -namespace pywrap { - class XGBoost : public PyClassifier { - public: - XGBoost() : PyClassifier("xgboost", "XGBClassifier") {}; - ~XGBoost() = default; - std::string version(); - }; -} /* namespace pywrap */ -#endif /* XGBOOST_H */ \ No newline at end of file diff --git a/stree_results.json b/stree_results.json deleted file mode 100644 index c1ef8cb..0000000 --- a/stree_results.json +++ /dev/null @@ -1,835 +0,0 @@ -[ - { - "date": "2021-04-11", - "time": "18:46:29", - "type": "crossval", - "classifier": "stree", - "dataset": "balance-scale", - "accuracy": "0.97056", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 10000.0, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}", - "time_spent": "0.0135214", - "time_spent_std": "0.00111213", - "accuracy_std": "0.0150468", - "nodes": "7.0", - "leaves": "4.0", - "depth": "3.0" - }, - { - "date": "2021-04-11", - "time": "18:46:29", - "type": "crossval", - "classifier": "stree", - "dataset": "balloons", - "accuracy": "0.86", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}", - "time_spent": "0.000804768", - "time_spent_std": "7.74797e-05", - "accuracy_std": "0.285015", - "nodes": "3.0", - "leaves": "2.0", - "depth": "2.0" - }, - { - "date": "2021-04-11", - "time": "18:46:29", - "type": "crossval", - "classifier": "stree", - "dataset": "breast-cancer-wisc-diag", - "accuracy": "0.972764", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.2, \"max_iter\": 10000.0}", - "time_spent": "0.00380772", - "time_spent_std": "0.000638676", - "accuracy_std": "0.0173132", - "nodes": "3.24", - "leaves": "2.12", - "depth": "2.12" - }, - { - "date": "2021-04-11", - "time": "18:46:30", - "type": "crossval", - "classifier": "stree", - "dataset": "breast-cancer-wisc-prog", - "accuracy": "0.811128", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.2, \"max_iter\": 10000.0}", - "time_spent": "0.00767535", - "time_spent_std": "0.00148114", - "accuracy_std": "0.0584601", - "nodes": "5.84", - "leaves": "3.42", - "depth": "3.24" - }, - { - "date": "2021-04-11", - "time": "18:46:31", - "type": "crossval", - "classifier": "stree", - "dataset": "breast-cancer-wisc", - "accuracy": "0.966661", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.00652217", - "time_spent_std": "0.000726579", - "accuracy_std": "0.0139421", - "nodes": "8.88", - "leaves": "4.94", - "depth": "4.08" - }, - { - "date": "2021-04-11", - "time": "18:46:32", - "type": "crossval", - "classifier": "stree", - "dataset": "breast-cancer", - "accuracy": "0.734211", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.023475", - "time_spent_std": "0.00584447", - "accuracy_std": "0.0479774", - "nodes": "21.72", - "leaves": "11.36", - "depth": "5.86" - }, - { - "date": "2021-04-11", - "time": "18:49:08", - "type": "crossval", - "classifier": "stree", - "dataset": "cardiotocography-10clases", - "accuracy": "0.791487", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "3.10582", - "time_spent_std": "0.339218", - "accuracy_std": "0.0192082", - "nodes": "160.76", - "leaves": "80.88", - "depth": "22.86" - }, - { - "date": "2021-04-11", - "time": "18:50:01", - "type": "crossval", - "classifier": "stree", - "dataset": "cardiotocography-3clases", - "accuracy": "0.900613", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "1.05228", - "time_spent_std": "0.138768", - "accuracy_std": "0.0154004", - "nodes": "47.68", - "leaves": "24.34", - "depth": "8.84" - }, - { - "date": "2021-04-11", - "time": "18:50:01", - "type": "crossval", - "classifier": "stree", - "dataset": "conn-bench-sonar-mines-rocks", - "accuracy": "0.755528", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.011577", - "time_spent_std": "0.00341148", - "accuracy_std": "0.0678424", - "nodes": "6.08", - "leaves": "3.54", - "depth": "2.86" - }, - { - "date": "2021-04-11", - "time": "18:50:17", - "type": "crossval", - "classifier": "stree", - "dataset": "cylinder-bands", - "accuracy": "0.715049", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.301143", - "time_spent_std": "0.109773", - "accuracy_std": "0.0367646", - "nodes": "26.2", - "leaves": "13.6", - "depth": "6.82" - }, - { - "date": "2021-04-11", - "time": "18:50:19", - "type": "crossval", - "classifier": "stree", - "dataset": "dermatology", - "accuracy": "0.971833", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 55, \"max_iter\": 10000.0}", - "time_spent": "0.0377538", - "time_spent_std": "0.010726", - "accuracy_std": "0.0206883", - "nodes": "11.0", - "leaves": "6.0", - "depth": "6.0" - }, - { - "date": "2021-04-11", - "time": "18:50:19", - "type": "crossval", - "classifier": "stree", - "dataset": "echocardiogram", - "accuracy": "0.814758", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_features\": \"auto\", \"max_iter\": 10000.0}", - "time_spent": "0.00333449", - "time_spent_std": "0.000964686", - "accuracy_std": "0.0998078", - "nodes": "7.0", - "leaves": "4.0", - "depth": "3.54" - }, - { - "date": "2021-04-11", - "time": "18:50:20", - "type": "crossval", - "classifier": "stree", - "dataset": "fertility", - "accuracy": "0.88", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.05, \"max_features\": \"auto\", \"max_iter\": 10000.0}", - "time_spent": "0.00090271", - "time_spent_std": "8.96446e-05", - "accuracy_std": "0.0547723", - "nodes": "1.0", - "leaves": "1.0", - "depth": "1.0" - }, - { - "date": "2021-04-11", - "time": "18:50:21", - "type": "crossval", - "classifier": "stree", - "dataset": "haberman-survival", - "accuracy": "0.735637", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.0171611", - "time_spent_std": "0.00334945", - "accuracy_std": "0.0434614", - "nodes": "23.4", - "leaves": "12.2", - "depth": "5.98" - }, - { - "date": "2021-04-11", - "time": "18:50:21", - "type": "crossval", - "classifier": "stree", - "dataset": "heart-hungarian", - "accuracy": "0.827522", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.05, \"max_iter\": 10000.0}", - "time_spent": "0.00493946", - "time_spent_std": "0.000738198", - "accuracy_std": "0.0505283", - "nodes": "10.16", - "leaves": "5.58", - "depth": "4.0" - }, - { - "date": "2021-04-11", - "time": "18:50:21", - "type": "crossval", - "classifier": "stree", - "dataset": "hepatitis", - "accuracy": "0.824516", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}", - "time_spent": "0.0021534", - "time_spent_std": "0.000133715", - "accuracy_std": "0.0738872", - "nodes": "3.0", - "leaves": "2.0", - "depth": "2.0" - }, - { - "date": "2021-04-11", - "time": "18:50:23", - "type": "crossval", - "classifier": "stree", - "dataset": "ilpd-indian-liver", - "accuracy": "0.723498", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.0345243", - "time_spent_std": "0.015789", - "accuracy_std": "0.0384886", - "nodes": "16.04", - "leaves": "8.52", - "depth": "5.28" - }, - { - "date": "2021-04-11", - "time": "18:50:24", - "type": "crossval", - "classifier": "stree", - "dataset": "ionosphere", - "accuracy": "0.953276", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}", - "time_spent": "0.00881722", - "time_spent_std": "0.000843108", - "accuracy_std": "0.0238537", - "nodes": "3.16", - "leaves": "2.08", - "depth": "2.08" - }, - { - "date": "2021-04-11", - "time": "18:50:24", - "type": "crossval", - "classifier": "stree", - "dataset": "iris", - "accuracy": "0.965333", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.00357342", - "time_spent_std": "0.000400509", - "accuracy_std": "0.0319444", - "nodes": "5.0", - "leaves": "3.0", - "depth": "3.0" - }, - { - "date": "2021-04-11", - "time": "18:50:36", - "type": "crossval", - "classifier": "stree", - "dataset": "led-display", - "accuracy": "0.703", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.222106", - "time_spent_std": "0.0116922", - "accuracy_std": "0.0291204", - "nodes": "47.16", - "leaves": "24.08", - "depth": "17.76" - }, - { - "date": "2021-04-11", - "time": "18:51:18", - "type": "crossval", - "classifier": "stree", - "dataset": "libras", - "accuracy": "0.788611", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.08, \"max_iter\": 10000.0}", - "time_spent": "0.841714", - "time_spent_std": "0.0830966", - "accuracy_std": "0.0516913", - "nodes": "82.28", - "leaves": "41.64", - "depth": "28.84" - }, - { - "date": "2021-04-11", - "time": "18:51:41", - "type": "crossval", - "classifier": "stree", - "dataset": "low-res-spect", - "accuracy": "0.883782", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.05, \"max_iter\": 10000.0}", - "time_spent": "0.446301", - "time_spent_std": "0.0411822", - "accuracy_std": "0.0324593", - "nodes": "27.4", - "leaves": "14.2", - "depth": "10.74" - }, - { - "date": "2021-04-11", - "time": "18:51:41", - "type": "crossval", - "classifier": "stree", - "dataset": "lymphography", - "accuracy": "0.835034", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.05, \"max_iter\": 10000.0}", - "time_spent": "0.00539465", - "time_spent_std": "0.000754365", - "accuracy_std": "0.0590649", - "nodes": "9.04", - "leaves": "5.02", - "depth": "4.48" - }, - { - "date": "2021-04-11", - "time": "18:51:43", - "type": "crossval", - "classifier": "stree", - "dataset": "mammographic", - "accuracy": "0.81915", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.0227931", - "time_spent_std": "0.00328533", - "accuracy_std": "0.0222517", - "nodes": "7.4", - "leaves": "4.2", - "depth": "4.0" - }, - { - "date": "2021-04-11", - "time": "18:51:43", - "type": "crossval", - "classifier": "stree", - "dataset": "molec-biol-promoter", - "accuracy": "0.767056", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.05, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_iter\": 10000.0}", - "time_spent": "0.00130273", - "time_spent_std": "0.000105772", - "accuracy_std": "0.0910923", - "nodes": "3.0", - "leaves": "2.0", - "depth": "2.0" - }, - { - "date": "2021-04-11", - "time": "18:51:44", - "type": "crossval", - "classifier": "stree", - "dataset": "musk-1", - "accuracy": "0.916388", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.05, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_iter\": 10000.0}", - "time_spent": "0.0116367", - "time_spent_std": "0.000331845", - "accuracy_std": "0.0275208", - "nodes": "3.0", - "leaves": "2.0", - "depth": "2.0" - }, - { - "date": "2021-04-11", - "time": "18:51:55", - "type": "crossval", - "classifier": "stree", - "dataset": "oocytes_merluccius_nucleus_4d", - "accuracy": "0.835125", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 8.25, \"gamma\": 0.1, \"kernel\": \"poly\"}", - "time_spent": "0.208895", - "time_spent_std": "0.0270573", - "accuracy_std": "0.0220961", - "nodes": "10.52", - "leaves": "5.76", - "depth": "4.42" - }, - { - "date": "2021-04-11", - "time": "18:52:04", - "type": "crossval", - "classifier": "stree", - "dataset": "oocytes_merluccius_states_2f", - "accuracy": "0.915365", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.182198", - "time_spent_std": "0.0294267", - "accuracy_std": "0.020396", - "nodes": "18.04", - "leaves": "9.52", - "depth": "5.3" - }, - { - "date": "2021-04-11", - "time": "18:52:41", - "type": "crossval", - "classifier": "stree", - "dataset": "oocytes_trisopterus_nucleus_2f", - "accuracy": "0.800986", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.717113", - "time_spent_std": "0.209608", - "accuracy_std": "0.0218449", - "nodes": "29.88", - "leaves": "15.44", - "depth": "7.38" - }, - { - "date": "2021-04-11", - "time": "18:52:44", - "type": "crossval", - "classifier": "stree", - "dataset": "oocytes_trisopterus_states_5b", - "accuracy": "0.922249", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.11, \"max_iter\": 10000.0}", - "time_spent": "0.0545047", - "time_spent_std": "0.00853014", - "accuracy_std": "0.0179203", - "nodes": "7.44", - "leaves": "4.22", - "depth": "3.6" - }, - { - "date": "2021-04-11", - "time": "18:52:44", - "type": "crossval", - "classifier": "stree", - "dataset": "parkinsons", - "accuracy": "0.882051", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.00795048", - "time_spent_std": "0.00176761", - "accuracy_std": "0.0478327", - "nodes": "8.48", - "leaves": "4.74", - "depth": "3.76" - }, - { - "date": "2021-04-11", - "time": "18:52:48", - "type": "crossval", - "classifier": "stree", - "dataset": "pima", - "accuracy": "0.766651", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.0750048", - "time_spent_std": "0.0213995", - "accuracy_std": "0.0297203", - "nodes": "17.4", - "leaves": "9.2", - "depth": "5.66" - }, - { - "date": "2021-04-11", - "time": "18:52:48", - "type": "crossval", - "classifier": "stree", - "dataset": "pittsburg-bridges-MATERIAL", - "accuracy": "0.867749", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 7, \"gamma\": 0.1, \"kernel\": \"rbf\", \"max_iter\": 10000.0}", - "time_spent": "0.00293318", - "time_spent_std": "0.000331469", - "accuracy_std": "0.0712226", - "nodes": "5.16", - "leaves": "3.08", - "depth": "3.02" - }, - { - "date": "2021-04-11", - "time": "18:52:49", - "type": "crossval", - "classifier": "stree", - "dataset": "pittsburg-bridges-REL-L", - "accuracy": "0.632238", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.0136311", - "time_spent_std": "0.00322964", - "accuracy_std": "0.101211", - "nodes": "16.32", - "leaves": "8.66", - "depth": "5.96" - }, - { - "date": "2021-04-11", - "time": "18:52:50", - "type": "crossval", - "classifier": "stree", - "dataset": "pittsburg-bridges-SPAN", - "accuracy": "0.659766", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.05, \"max_iter\": 10000.0}", - "time_spent": "0.00524256", - "time_spent_std": "0.00158822", - "accuracy_std": "0.1165", - "nodes": "9.84", - "leaves": "5.42", - "depth": "4.58" - }, - { - "date": "2021-04-11", - "time": "18:52:50", - "type": "crossval", - "classifier": "stree", - "dataset": "pittsburg-bridges-T-OR-D", - "accuracy": "0.861619", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.00295627", - "time_spent_std": "0.000578594", - "accuracy_std": "0.0693747", - "nodes": "4.56", - "leaves": "2.78", - "depth": "2.68" - }, - { - "date": "2021-04-11", - "time": "18:52:50", - "type": "crossval", - "classifier": "stree", - "dataset": "planning", - "accuracy": "0.73527", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 7, \"gamma\": 10.0, \"kernel\": \"rbf\", \"max_iter\": 10000.0}", - "time_spent": "0.0030475", - "time_spent_std": "0.000172266", - "accuracy_std": "0.0669776", - "nodes": "3.0", - "leaves": "2.0", - "depth": "2.0" - }, - { - "date": "2021-04-11", - "time": "18:52:51", - "type": "crossval", - "classifier": "stree", - "dataset": "post-operative", - "accuracy": "0.711111", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 55, \"degree\": 5, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_iter\": 10000.0}", - "time_spent": "0.0018727", - "time_spent_std": "0.000481977", - "accuracy_std": "0.0753592", - "nodes": "2.64", - "leaves": "1.82", - "depth": "1.82" - }, - { - "date": "2021-04-11", - "time": "18:52:52", - "type": "crossval", - "classifier": "stree", - "dataset": "seeds", - "accuracy": "0.952857", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 10000.0, \"max_iter\": 10000.0}", - "time_spent": "0.0203492", - "time_spent_std": "0.00518065", - "accuracy_std": "0.0279658", - "nodes": "9.88", - "leaves": "5.44", - "depth": "4.44" - }, - { - "date": "2021-04-11", - "time": "18:52:52", - "type": "crossval", - "classifier": "stree", - "dataset": "statlog-australian-credit", - "accuracy": "0.678261", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.05, \"max_features\": \"auto\", \"max_iter\": 10000.0}", - "time_spent": "0.00205337", - "time_spent_std": "0.00083162", - "accuracy_std": "0.0390498", - "nodes": "1.32", - "leaves": "1.16", - "depth": "1.16" - }, - { - "date": "2021-04-11", - "time": "18:53:07", - "type": "crossval", - "classifier": "stree", - "dataset": "statlog-german-credit", - "accuracy": "0.7625", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.290754", - "time_spent_std": "0.0653152", - "accuracy_std": "0.0271892", - "nodes": "21.24", - "leaves": "11.12", - "depth": "6.18" - }, - { - "date": "2021-04-11", - "time": "18:53:09", - "type": "crossval", - "classifier": "stree", - "dataset": "statlog-heart", - "accuracy": "0.822963", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.0138923", - "time_spent_std": "0.00323664", - "accuracy_std": "0.044004", - "nodes": "14.56", - "leaves": "7.78", - "depth": "5.0" - }, - { - "date": "2021-04-11", - "time": "18:56:43", - "type": "crossval", - "classifier": "stree", - "dataset": "statlog-image", - "accuracy": "0.955931", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 7, \"max_iter\": 10000.0}", - "time_spent": "4.27584", - "time_spent_std": "0.200362", - "accuracy_std": "0.00956073", - "nodes": "36.92", - "leaves": "18.96", - "depth": "10.8" - }, - { - "date": "2021-04-11", - "time": "18:56:57", - "type": "crossval", - "classifier": "stree", - "dataset": "statlog-vehicle", - "accuracy": "0.793028", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.278833", - "time_spent_std": "0.0392173", - "accuracy_std": "0.030104", - "nodes": "23.88", - "leaves": "12.44", - "depth": "7.06" - }, - { - "date": "2021-04-11", - "time": "18:57:07", - "type": "crossval", - "classifier": "stree", - "dataset": "synthetic-control", - "accuracy": "0.95", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.55, \"max_iter\": 10000.0}", - "time_spent": "0.205184", - "time_spent_std": "0.040793", - "accuracy_std": "0.0253859", - "nodes": "12.48", - "leaves": "6.74", - "depth": "6.5" - }, - { - "date": "2021-04-11", - "time": "18:57:08", - "type": "crossval", - "classifier": "stree", - "dataset": "tic-tac-toe", - "accuracy": "0.984444", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.2, \"gamma\": 0.1, \"kernel\": \"poly\", \"max_iter\": 10000.0}", - "time_spent": "0.0123015", - "time_spent_std": "0.000423728", - "accuracy_std": "0.00838747", - "nodes": "3.0", - "leaves": "2.0", - "depth": "2.0" - }, - { - "date": "2021-04-11", - "time": "18:57:09", - "type": "crossval", - "classifier": "stree", - "dataset": "vertebral-column-2clases", - "accuracy": "0.852903", - "norm": 1, - "stand": 0, - "parameters": "{}", - "time_spent": "0.00576833", - "time_spent_std": "0.000910332", - "accuracy_std": "0.0408851", - "nodes": "6.04", - "leaves": "3.52", - "depth": "3.34" - }, - { - "date": "2021-04-11", - "time": "18:57:09", - "type": "crossval", - "classifier": "stree", - "dataset": "wine", - "accuracy": "0.979159", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.55, \"max_iter\": 10000.0}", - "time_spent": "0.0019741", - "time_spent_std": "0.000137745", - "accuracy_std": "0.022427", - "nodes": "5.0", - "leaves": "3.0", - "depth": "3.0" - }, - { - "date": "2021-04-11", - "time": "18:57:10", - "type": "crossval", - "classifier": "stree", - "dataset": "zoo", - "accuracy": "0.957524", - "norm": 1, - "stand": 0, - "parameters": "{\"C\": 0.1, \"max_iter\": 10000.0}", - "time_spent": "0.00556221", - "time_spent_std": "0.000230106", - "accuracy_std": "0.0454615", - "nodes": "13.04", - "leaves": "7.02", - "depth": "7.02" - } -] \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7ab1740..bcfdc4e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,18 +1,17 @@ if(ENABLE_TESTING) set(TEST_BAYESNET "unit_tests_bayesnet") - set(TEST_PLATFORM "unit_tests_platform") - include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet) - include_directories(${BayesNet_SOURCE_DIR}/src/Platform) - include_directories(${BayesNet_SOURCE_DIR}/lib/Files) - include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) - include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) - include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) - set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCES}) - set(TEST_SOURCES_PLATFORM TestFolding.cc TestUtils.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc) + include_directories( + ${BayesNet_SOURCE_DIR}/src/BayesNet + ${BayesNet_SOURCE_DIR}/src/Platform + ${BayesNet_SOURCE_DIR}/lib/Files + ${BayesNet_SOURCE_DIR}/lib/mdlp + ${BayesNet_SOURCE_DIR}/lib/folding + ${BayesNet_SOURCE_DIR}/lib/json/include + ${BayesNet_SOURCE_DIR}/lib/argparse/include + ${CMAKE_BINARY_DIR}/configured_files/include + ) + set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES}) add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET}) - add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM}) target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain) - target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain) add_test(NAME ${TEST_BAYESNET} COMMAND ${TEST_BAYESNET}) - add_test(NAME ${TEST_PLATFORM} COMMAND ${TEST_PLATFORM}) endif(ENABLE_TESTING) diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 671e961..49ed457 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -2,9 +2,9 @@ #include #include #include -#include +#include #include -#include +#include #include "KDB.h" #include "TAN.h" #include "SPODE.h" @@ -126,7 +126,7 @@ TEST_CASE("Models features", "[BayesNet]") auto raw = RawDatasets("iris", true); auto clf = bayesnet::TAN(); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - REQUIRE(clf.getNumberOfNodes() == 6); + REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 7); REQUIRE(clf.show() == std::vector{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); REQUIRE(clf.graph("Test") == graph); @@ -136,6 +136,6 @@ TEST_CASE("Get num features & num edges", "[BayesNet]") auto raw = RawDatasets("iris", true); auto clf = bayesnet::KDB(2); clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - REQUIRE(clf.getNumberOfNodes() == 6); + REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 8); } \ No newline at end of file diff --git a/tests/TestBayesNetwork.cc b/tests/TestBayesNetwork.cc index f572505..b232508 100644 --- a/tests/TestBayesNetwork.cc +++ b/tests/TestBayesNetwork.cc @@ -1,11 +1,11 @@ #include #include #include -#include +#include #include "TestUtils.h" #include "Network.h" -void buildModel(bayesnet::Network& net, const std::vector& features, const std::std::string& className) +void buildModel(bayesnet::Network& net, const std::vector& features, const std::string& className) { std::vector> network = { {0, 1}, {0, 2}, {1, 3} }; for (const auto& feature : features) { diff --git a/tests/TestFolding.cc b/tests/TestFolding.cc deleted file mode 100644 index a7b3359..0000000 --- a/tests/TestFolding.cc +++ /dev/null @@ -1,95 +0,0 @@ -#include -#include -#include -#include "TestUtils.h" -#include "Folding.h" - -TEST_CASE("KFold Test", "[Platform][KFold]") -{ - // Initialize a KFold object with k=5 and a seed of 19. - std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); - auto raw = RawDatasets(file_name, true); - int nFolds = 5; - platform::KFold kfold(nFolds, raw.nSamples, 19); - int number = raw.nSamples * (kfold.getNumberOfFolds() - 1) / kfold.getNumberOfFolds(); - - SECTION("Number of Folds") - { - REQUIRE(kfold.getNumberOfFolds() == nFolds); - } - SECTION("Fold Test") - { - // Test each fold's size and contents. - for (int i = 0; i < nFolds; ++i) { - auto [train_indices, test_indices] = kfold.getFold(i); - bool result = train_indices.size() == number || train_indices.size() == number + 1; - REQUIRE(result); - REQUIRE(train_indices.size() + test_indices.size() == raw.nSamples); - } - } -} - -map counts(std::vector y, std::vector indices) -{ - map result; - for (auto i = 0; i < indices.size(); ++i) { - result[y[indices[i]]]++; - } - return result; -} - -TEST_CASE("StratifiedKFold Test", "[Platform][StratifiedKFold]") -{ - // Initialize a StratifiedKFold object with k=3, using the y std::vector, and a seed of 17. - std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); - int nFolds = GENERATE(3, 5, 10); - auto raw = RawDatasets(file_name, true); - platform::StratifiedKFold stratified_kfoldt(nFolds, raw.yt, 17); - platform::StratifiedKFold stratified_kfoldv(nFolds, raw.yv, 17); - int number = raw.nSamples * (stratified_kfoldt.getNumberOfFolds() - 1) / stratified_kfoldt.getNumberOfFolds(); - - SECTION("Stratified Number of Folds") - { - REQUIRE(stratified_kfoldt.getNumberOfFolds() == nFolds); - } - SECTION("Stratified Fold Test") - { - // Test each fold's size and contents. - auto counts = map>(); - // Initialize the counts per Fold - for (int i = 0; i < nFolds; ++i) { - counts[i] = std::vector(raw.classNumStates, 0); - } - // Check fold and compute counts of each fold - for (int fold = 0; fold < nFolds; ++fold) { - auto [train_indicest, test_indicest] = stratified_kfoldt.getFold(fold); - auto [train_indicesv, test_indicesv] = stratified_kfoldv.getFold(fold); - REQUIRE(train_indicest == train_indicesv); - REQUIRE(test_indicest == test_indicesv); - // In the worst case scenario, the number of samples in the training set is number + raw.classNumStates - // because in that fold can come one remainder sample from each class. - REQUIRE(train_indicest.size() <= number + raw.classNumStates); - // If the number of samples in any class is less than the number of folds, then the fold is faulty. - // and the number of samples in the training set + test set will be less than nSamples - if (!stratified_kfoldt.isFaulty()) { - REQUIRE(train_indicest.size() + test_indicest.size() == raw.nSamples); - } else { - REQUIRE(train_indicest.size() + test_indicest.size() <= raw.nSamples); - } - auto train_t = torch::tensor(train_indicest); - auto ytrain = raw.yt.index({ train_t }); - // Check that the class labels have been equally assign to each fold - for (const auto& idx : train_indicest) { - counts[fold][raw.yt[idx].item()]++; - } - } - // Test the fold counting of every class - for (int fold = 0; fold < nFolds; ++fold) { - for (int j = 1; j < nFolds - 1; ++j) { - for (int k = 0; k < raw.classNumStates; ++k) { - REQUIRE(abs(counts.at(fold).at(k) - counts.at(j).at(k)) <= 1); - } - } - } - } -} diff --git a/tests/TestUtils.cc b/tests/TestUtils.cc index 1a63675..5a0910e 100644 --- a/tests/TestUtils.cc +++ b/tests/TestUtils.cc @@ -1,10 +1,11 @@ #include "TestUtils.h" +#include "config.h" class Paths { public: static std::string datasets() { - return "../../data/"; + return { data_path.begin(), data_path.end() }; } }; @@ -34,7 +35,7 @@ std::vector discretizeDataset(std::vector& X, m return Xd; } -bool file_exists(const std::std::string& name) +bool file_exists(const std::string& name) { if (FILE* file = fopen(name.c_str(), "r")) { fclose(file); @@ -44,7 +45,7 @@ bool file_exists(const std::std::string& name) } } -tuple, std::string, map>> loadDataset(const std::std::string& name, bool class_last, bool discretize_dataset) +tuple, std::string, map>> loadDataset(const std::string& name, bool class_last, bool discretize_dataset) { auto handler = ArffFiles(); handler.load(Paths::datasets() + static_cast(name) + ".arff", class_last); @@ -78,7 +79,7 @@ tuple, std::string, map>, std::vector, std::vector, std::string, map>> loadFile(const std::std::string& name) +tuple>, std::vector, std::vector, std::string, map>> loadFile(const std::string& name) { auto handler = ArffFiles(); handler.load(Paths::datasets() + static_cast(name) + ".arff"); diff --git a/tests/TestUtils.h b/tests/TestUtils.h index e6a713f..72954c0 100644 --- a/tests/TestUtils.h +++ b/tests/TestUtils.h @@ -4,11 +4,11 @@ #include #include #include -#include +#include #include "ArffFiles.h" #include "CPPFImdlp.h" -bool file_exists(const std::std::string& name); +bool file_exists(const std::string& name); std::pair, map> discretize(std::vector& X, mdlp::labels_t& y, std::vector features); std::vector discretizeDataset(std::vector& X, mdlp::labels_t& y); std::tuple>, std::vector, std::vector, std::string, map>> loadFile(const std::string& name); diff --git a/data/diabetes.arff b/tests/data/diabetes.arff similarity index 100% rename from data/diabetes.arff rename to tests/data/diabetes.arff diff --git a/data/ecoli.arff b/tests/data/ecoli.arff similarity index 100% rename from data/ecoli.arff rename to tests/data/ecoli.arff diff --git a/data/glass.arff b/tests/data/glass.arff similarity index 100% rename from data/glass.arff rename to tests/data/glass.arff diff --git a/data/iris.arff b/tests/data/iris.arff similarity index 100% rename from data/iris.arff rename to tests/data/iris.arff diff --git a/data/iris.net b/tests/data/iris.net similarity index 100% rename from data/iris.net rename to tests/data/iris.net diff --git a/data/kdd_JapaneseVowels.arff b/tests/data/kdd_JapaneseVowels.arff similarity index 100% rename from data/kdd_JapaneseVowels.arff rename to tests/data/kdd_JapaneseVowels.arff diff --git a/data/letter.arff b/tests/data/letter.arff similarity index 100% rename from data/letter.arff rename to tests/data/letter.arff diff --git a/data/liver-disorders.arff b/tests/data/liver-disorders.arff similarity index 100% rename from data/liver-disorders.arff rename to tests/data/liver-disorders.arff diff --git a/data/mfeat-factors.arff b/tests/data/mfeat-factors.arff similarity index 100% rename from data/mfeat-factors.arff rename to tests/data/mfeat-factors.arff