Compare commits
116 Commits
fa4f47ff35
...
alphablock
Author | SHA1 | Date | |
---|---|---|---|
ba455bb934
|
|||
a65955248a
|
|||
84930b0537
|
|||
10c65f44a0
|
|||
6d112f01e7
|
|||
401296293b
|
|||
9566ae4cf6
|
|||
55187ee521
|
|||
68ea06d129
|
|||
6c1d1d0d32
|
|||
b0853d169b
|
|||
26f8e07774
|
|||
315dfb104f
|
|||
381f226d53
|
|||
ea13835701
|
|||
d75468cf78
|
|||
c58bd9d60d
|
|||
148a3b831a
|
|||
69063badbb
|
|||
6ae2b2182a
|
|||
4dbd76df55
|
|||
4545f76667
|
|||
8372987dae
|
|||
d72943c749
|
|||
800246acd2
|
|||
0ea967dd9d
|
|||
97abec8b69
|
|||
17c9522e77
|
|||
45af550cf9
|
|||
5d5f49777e
|
|||
540a8ea06d
|
|||
1924c4392b
|
|||
f2556a30af
|
|||
2f2ed00ca1
|
|||
28f6a0d7a7
|
|||
028522f180
|
|||
84adf13a79
|
|||
26dfe6d056
|
|||
3acc34e4c6
|
|||
8f92b74260
|
|||
3d900f8c81
|
|||
e628d80f4c
|
|||
0f06f8971e
|
|||
f800772149
|
|||
b8a8ddaf8c
|
|||
90555489ff
|
|||
080f3cee34
|
|||
643633e6dd
|
|||
361c51d864
|
|||
5dd3deca1a
|
|||
2202a81782
|
|||
c4f4e332f6
|
|||
a7ec930fa0
|
|||
6858b3d89a
|
|||
5fb176d78a
|
|||
f5d5c35002
|
|||
b34af13eea
|
|||
e3a06264a9
|
|||
df82f82e88
|
|||
886dde7a06
|
|||
88468434e7
|
|||
ad5c3319bd
|
|||
594adb0534
|
|||
b9e0c92334
|
|||
25bd7a42c6
|
|||
c165a4bdda
|
|||
49a36904dc
|
|||
577351eda5
|
|||
a3c4bde460
|
|||
696c0564a7
|
|||
30a6d5e60d
|
|||
f8f3ca28dc
|
|||
5c190d7c66
|
|||
99c9c6731f
|
|||
8d20545fd2
|
|||
2b480cdcb7 | |||
ebaddf1a6c
|
|||
07a2efb298
|
|||
f88b223c46
|
|||
69b9609154
|
|||
6d4117d188
|
|||
ec0268c514
|
|||
dd94fd51f7
|
|||
009ed037b8
|
|||
6d1b78ada7
|
|||
3882ebd6e4
|
|||
423242d280
|
|||
b9381aa453
|
|||
33cfb78554
|
|||
1caa39c071
|
|||
018c94bfe6
|
|||
a54d6b8716
|
|||
6cde09d81e
|
|||
7be95d889d
|
|||
42d61c6fc4
|
|||
e5e947779f
|
|||
ad168d13ba
|
|||
78b8a8ae66
|
|||
7ed9073d15
|
|||
ee93789ca3
|
|||
375ed437ed
|
|||
5ec7fe8d00
|
|||
72ea62f783
|
|||
4b91f2bde0
|
|||
3bc51cb7b0
|
|||
cf83d1f8f4
|
|||
0dd10bcbe4
|
|||
622b36b2c7
|
|||
ea29a96ca1
|
|||
673a41fc4d
|
|||
634ea36169
|
|||
20fef5b6b3
|
|||
7cf864c3f3
|
|||
4a0fa33917
|
|||
d47da27571
|
|||
faccb09c43
|
10
.gitmodules
vendored
10
.gitmodules
vendored
@@ -10,10 +10,12 @@
|
||||
[submodule "lib/libxlsxwriter"]
|
||||
path = lib/libxlsxwriter
|
||||
url = https://github.com/jmcnamara/libxlsxwriter.git
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
update = merge
|
||||
[submodule "lib/folding"]
|
||||
path = lib/folding
|
||||
url = https://github.com/rmontanana/folding
|
||||
[submodule "lib/Files"]
|
||||
path = lib/Files
|
||||
url = https://github.com/rmontanana/ArffFiles
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
|
13
.vscode/c_cpp_properties.json
vendored
13
.vscode/c_cpp_properties.json
vendored
@@ -11,7 +11,18 @@
|
||||
],
|
||||
"cStandard": "c17",
|
||||
"cppStandard": "c++17",
|
||||
"compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json"
|
||||
"compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json",
|
||||
"configurationProvider": "ms-vscode.cmake-tools"
|
||||
},
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**"
|
||||
],
|
||||
"defines": [],
|
||||
"cStandard": "c17",
|
||||
"cppStandard": "c++17",
|
||||
"configurationProvider": "ms-vscode.cmake-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
|
15
.vscode/launch.json
vendored
15
.vscode/launch.json
vendored
@@ -62,9 +62,9 @@
|
||||
"--stratified",
|
||||
"--discretize",
|
||||
"-d",
|
||||
"iris",
|
||||
"glass",
|
||||
"--hyperparameters",
|
||||
"{\"repeatSparent\": true, \"maxModels\": 12}"
|
||||
"{\"block_update\": true}"
|
||||
],
|
||||
"cwd": "/home/rmontanana/Code/discretizbench",
|
||||
},
|
||||
@@ -99,7 +99,9 @@
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/build_debug/src/b_list",
|
||||
"args": [
|
||||
"--excel"
|
||||
"results",
|
||||
"-d",
|
||||
"mfeat-morphological"
|
||||
],
|
||||
//"cwd": "/Users/rmontanana/Code/discretizbench",
|
||||
"cwd": "${workspaceFolder}/../discretizbench",
|
||||
@@ -108,12 +110,13 @@
|
||||
"name": "test",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/build_debug/tests/unit_tests",
|
||||
"program": "${workspaceFolder}/build_debug/tests/unit_tests_platform",
|
||||
"args": [
|
||||
"-c=\"Metrics Test\"",
|
||||
"[Scores]",
|
||||
// "-c=\"Metrics Test\"",
|
||||
// "-s",
|
||||
],
|
||||
"cwd": "${workspaceFolder}/build/tests",
|
||||
"cwd": "${workspaceFolder}/build_debug/tests",
|
||||
},
|
||||
{
|
||||
"name": "Build & debug active file",
|
||||
|
@@ -1,16 +1,12 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
project(Platform
|
||||
VERSION 1.0.4
|
||||
VERSION 1.1.0
|
||||
DESCRIPTION "Platform to run Experiments with classifiers."
|
||||
HOMEPAGE_URL "https://github.com/rmontanana/platform"
|
||||
LANGUAGES CXX
|
||||
)
|
||||
|
||||
if (CODE_COVERAGE AND NOT ENABLE_TESTING)
|
||||
MESSAGE(FATAL_ERROR "Code coverage requires testing enabled")
|
||||
endif (CODE_COVERAGE AND NOT ENABLE_TESTING)
|
||||
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
if (POLICY CMP0135)
|
||||
@@ -25,6 +21,8 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
|
||||
# Options
|
||||
# -------
|
||||
@@ -48,7 +46,7 @@ if(Boost_FOUND)
|
||||
endif()
|
||||
|
||||
# Python
|
||||
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
|
||||
find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED)
|
||||
message("Python3_LIBRARIES=${Python3_LIBRARIES}")
|
||||
|
||||
# CMakes modules
|
||||
@@ -60,7 +58,6 @@ if (CODE_COVERAGE)
|
||||
enable_testing()
|
||||
include(CodeCoverage)
|
||||
MESSAGE("Code coverage enabled")
|
||||
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
|
||||
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
|
||||
endif (CODE_COVERAGE)
|
||||
|
||||
@@ -76,15 +73,24 @@ add_git_submodule("lib/mdlp")
|
||||
find_library(XLSXWRITER_LIB NAMES libxlsxwriter.dylib libxlsxwriter.so PATHS ${Platform_SOURCE_DIR}/lib/libxlsxwriter/lib)
|
||||
message("XLSXWRITER_LIB=${XLSXWRITER_LIB}")
|
||||
|
||||
find_library(PyClassifiers NAMES libPyClassifiers PyClassifiers libPyClassifiers.a)
|
||||
find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a)
|
||||
find_library(PyClassifiers NAMES libPyClassifiers PyClassifiers libPyClassifiers.a PATHS ${Platform_SOURCE_DIR}/../lib/lib REQUIRED)
|
||||
find_path(PyClassifiers_INCLUDE_DIRS REQUIRED NAMES pyclassifiers PATHS ${Platform_SOURCE_DIR}/../lib/include)
|
||||
find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a PATHS ${Platform_SOURCE_DIR}/../lib/lib REQUIRED)
|
||||
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ${Platform_SOURCE_DIR}/../lib/include)
|
||||
|
||||
message(STATUS "PyClassifiers=${PyClassifiers}")
|
||||
message(STATUS "PyClassifiers_INCLUDE_DIRS=${PyClassifiers_INCLUDE_DIRS}")
|
||||
message(STATUS "BayesNet=${BayesNet}")
|
||||
message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
|
||||
|
||||
# Subdirectories
|
||||
# --------------
|
||||
add_subdirectory(lib/Files)
|
||||
## Configure test data path
|
||||
cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
|
||||
configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
|
||||
add_subdirectory(config)
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(sample)
|
||||
# add_subdirectory(sample)
|
||||
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${Platform_SOURCE_DIR}/src/*.cpp)
|
||||
|
||||
# Testing
|
||||
|
9
Makefile
9
Makefile
@@ -6,7 +6,6 @@ f_release = build_release
|
||||
f_debug = build_debug
|
||||
app_targets = b_best b_list b_main b_manage b_grid
|
||||
test_targets = unit_tests_platform
|
||||
n_procs = -j 16
|
||||
|
||||
define ClearTests
|
||||
@for t in $(test_targets); do \
|
||||
@@ -41,7 +40,7 @@ setup: ## Install dependencies for tests and coverage
|
||||
dest ?= ${HOME}/bin
|
||||
install: ## Copy binary files to bin folder
|
||||
@echo "Destination folder: $(dest)"
|
||||
make buildr
|
||||
@make buildr
|
||||
@echo "*******************************************"
|
||||
@echo ">>> Copying files to $(dest)"
|
||||
@echo "*******************************************"
|
||||
@@ -56,10 +55,10 @@ dependency: ## Create a dependency graph diagram of the project (build/dependenc
|
||||
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
|
||||
|
||||
buildd: ## Build the debug targets
|
||||
cmake --build $(f_debug) -t $(app_targets) PlatformSample $(n_procs)
|
||||
cmake --build $(f_debug) -t $(app_targets) PlatformSample --parallel
|
||||
|
||||
buildr: ## Build the release targets
|
||||
cmake --build $(f_release) -t $(app_targets) $(n_procs)
|
||||
cmake --build $(f_release) -t $(app_targets) --parallel
|
||||
|
||||
clean: ## Clean the tests info
|
||||
@echo ">>> Cleaning Debug Platform tests...";
|
||||
@@ -87,7 +86,7 @@ opt = ""
|
||||
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
|
||||
@echo ">>> Running Platform tests...";
|
||||
@$(MAKE) clean
|
||||
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
|
||||
@cmake --build $(f_debug) -t $(test_targets) --parallel
|
||||
@for t in $(test_targets); do \
|
||||
if [ -f $(f_debug)/tests/$$t ]; then \
|
||||
cd $(f_debug)/tests ; \
|
||||
|
19
README.md
19
README.md
@@ -1,4 +1,4 @@
|
||||
# Platform
|
||||
# <img src="logo.png" alt="logo" width="50"/> Platform
|
||||
|
||||

|
||||
[](<https://opensource.org/licenses/MIT>)
|
||||
@@ -20,11 +20,18 @@ In Linux sometimes the library libstdc++ is mistaken from the miniconda installa
|
||||
libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by b_xxxx)
|
||||
```
|
||||
|
||||
The solution is to erase the libstdc++ library from the miniconda installation:
|
||||
The solution is to erase the libstdc++ library from the miniconda installation and no further compilation is needed.
|
||||
|
||||
### MPI
|
||||
|
||||
In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
|
||||
In Linux just install openmpi & openmpi-devel packages.
|
||||
|
||||
```bash
|
||||
source /etc/profile.d/modules.sh
|
||||
module load mpi/openmpi-x86_64
|
||||
```
|
||||
|
||||
If cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
|
||||
|
||||
```bash
|
||||
export MPI_HOME="/usr/lib64/openmpi"
|
||||
@@ -97,8 +104,6 @@ List all the datasets and its properties. The datasets are located in the _datas
|
||||
|
||||
where <real_features> can be either the word _all_ or a list of numbers separated by commas, i.e. [0,3,6,7]
|
||||
|
||||

|
||||
|
||||
### b_grid
|
||||
|
||||
Run a grid search over the parameters of the classifiers. The parameters are defined in the file _grid.txt_ located in the grid folder of the experiments. The file has to be created with the following format:
|
||||
@@ -138,14 +143,10 @@ Run the main experiment. There are several hyperparameters that can set in comma
|
||||
- -\-title <title_text>: Title of the experiment (optional if only one dataset is specificied).
|
||||
- -\-quiet: Don't display detailed progress and result of the experiment.
|
||||
|
||||

|
||||
|
||||
### b_manage
|
||||
|
||||
Manage the results of the experiments.
|
||||
|
||||

|
||||
|
||||
### b_best
|
||||
|
||||
Get and optionally compare the best results of the experiments. The results can be stored in an MS Excel file.
|
||||
|
@@ -1,4 +1,4 @@
|
||||
configure_file(
|
||||
"config.h.in"
|
||||
"${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES
|
||||
"${CMAKE_BINARY_DIR}/configured_files/include/config_platform.h" ESCAPE_QUOTES
|
||||
)
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef PLATFORM_H
|
||||
#define PLATFORM_H
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
@@ -8,3 +8,4 @@ static constexpr std::string_view platform_project_version = "@PROJECT_VERSION@"
|
||||
static constexpr std::string_view platform_project_description = "@PROJECT_DESCRIPTION@";
|
||||
static constexpr std::string_view platform_git_sha = "@GIT_SHA@";
|
||||
static constexpr std::string_view platform_data_path = "@Platform_SOURCE_DIR@/tests/data/";
|
||||
#endif
|
@@ -1,8 +1,3 @@
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
main = main
|
||||
update = merge
|
||||
[submodule "lib/catch2"]
|
||||
path = lib/catch2
|
||||
main = v2.x
|
||||
|
1
lib/Files
Submodule
1
lib/Files
Submodule
Submodule lib/Files added at a4329f5f9d
@@ -1,168 +0,0 @@
|
||||
#include "ArffFiles.h"
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
||||
ArffFiles::ArffFiles() = default;
|
||||
|
||||
std::vector<std::string> ArffFiles::getLines() const
|
||||
{
|
||||
return lines;
|
||||
}
|
||||
|
||||
unsigned long int ArffFiles::getSize() const
|
||||
{
|
||||
return lines.size();
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
|
||||
{
|
||||
return attributes;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassName() const
|
||||
{
|
||||
return className;
|
||||
}
|
||||
|
||||
std::string ArffFiles::getClassType() const
|
||||
{
|
||||
return classType;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>>& ArffFiles::getX()
|
||||
{
|
||||
return X;
|
||||
}
|
||||
|
||||
std::vector<int>& ArffFiles::getY()
|
||||
{
|
||||
return y;
|
||||
}
|
||||
|
||||
void ArffFiles::loadCommon(std::string fileName)
|
||||
{
|
||||
std::ifstream file(fileName);
|
||||
if (!file.is_open()) {
|
||||
throw std::invalid_argument("Unable to open file");
|
||||
}
|
||||
std::string line;
|
||||
std::string keyword;
|
||||
std::string attribute;
|
||||
std::string type;
|
||||
std::string type_w;
|
||||
while (getline(file, line)) {
|
||||
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
|
||||
continue;
|
||||
}
|
||||
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
|
||||
std::stringstream ss(line);
|
||||
ss >> keyword >> attribute;
|
||||
type = "";
|
||||
while (ss >> type_w)
|
||||
type += type_w + " ";
|
||||
attributes.emplace_back(trim(attribute), trim(type));
|
||||
continue;
|
||||
}
|
||||
if (line[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
lines.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
if (attributes.empty())
|
||||
throw std::invalid_argument("No attributes found");
|
||||
}
|
||||
|
||||
void ArffFiles::load(const std::string& fileName, bool classLast)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
if (classLast) {
|
||||
className = std::get<0>(attributes.back());
|
||||
classType = std::get<1>(attributes.back());
|
||||
attributes.pop_back();
|
||||
labelIndex = static_cast<int>(attributes.size());
|
||||
} else {
|
||||
className = std::get<0>(attributes.front());
|
||||
classType = std::get<1>(attributes.front());
|
||||
attributes.erase(attributes.begin());
|
||||
labelIndex = 0;
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
void ArffFiles::load(const std::string& fileName, const std::string& name)
|
||||
{
|
||||
int labelIndex;
|
||||
loadCommon(fileName);
|
||||
bool found = false;
|
||||
for (int i = 0; i < attributes.size(); ++i) {
|
||||
if (attributes[i].first == name) {
|
||||
className = std::get<0>(attributes[i]);
|
||||
classType = std::get<1>(attributes[i]);
|
||||
attributes.erase(attributes.begin() + i);
|
||||
labelIndex = i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
throw std::invalid_argument("Class name not found");
|
||||
}
|
||||
generateDataset(labelIndex);
|
||||
}
|
||||
|
||||
void ArffFiles::generateDataset(int labelIndex)
|
||||
{
|
||||
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
|
||||
auto yy = std::vector<std::string>(lines.size(), "");
|
||||
auto removeLines = std::vector<int>(); // Lines with missing values
|
||||
for (size_t i = 0; i < lines.size(); i++) {
|
||||
std::stringstream ss(lines[i]);
|
||||
std::string value;
|
||||
int pos = 0;
|
||||
int xIndex = 0;
|
||||
while (getline(ss, value, ',')) {
|
||||
if (pos++ == labelIndex) {
|
||||
yy[i] = value;
|
||||
} else {
|
||||
if (value == "?") {
|
||||
X[xIndex++][i] = -1;
|
||||
removeLines.push_back(i);
|
||||
} else
|
||||
X[xIndex++][i] = stof(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto i : removeLines) {
|
||||
yy.erase(yy.begin() + i);
|
||||
for (auto& x : X) {
|
||||
x.erase(x.begin() + i);
|
||||
}
|
||||
}
|
||||
y = factorize(yy);
|
||||
}
|
||||
|
||||
std::string ArffFiles::trim(const std::string& source)
|
||||
{
|
||||
std::string s(source);
|
||||
s.erase(0, s.find_first_not_of(" '\n\r\t"));
|
||||
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
|
||||
return s;
|
||||
}
|
||||
|
||||
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
|
||||
{
|
||||
std::vector<int> yy;
|
||||
yy.reserve(labels_t.size());
|
||||
std::map<std::string, int> labelMap;
|
||||
int i = 0;
|
||||
for (const std::string& label : labels_t) {
|
||||
if (labelMap.find(label) == labelMap.end()) {
|
||||
labelMap[label] = i++;
|
||||
}
|
||||
yy.push_back(labelMap[label]);
|
||||
}
|
||||
return yy;
|
||||
}
|
@@ -1,32 +0,0 @@
|
||||
#ifndef ARFFFILES_H
|
||||
#define ARFFFILES_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class ArffFiles {
|
||||
private:
|
||||
std::vector<std::string> lines;
|
||||
std::vector<std::pair<std::string, std::string>> attributes;
|
||||
std::string className;
|
||||
std::string classType;
|
||||
std::vector<std::vector<float>> X;
|
||||
std::vector<int> y;
|
||||
void generateDataset(int);
|
||||
void loadCommon(std::string);
|
||||
public:
|
||||
ArffFiles();
|
||||
void load(const std::string&, bool = true);
|
||||
void load(const std::string&, const std::string&);
|
||||
std::vector<std::string> getLines() const;
|
||||
unsigned long int getSize() const;
|
||||
std::string getClassName() const;
|
||||
std::string getClassType() const;
|
||||
static std::string trim(const std::string&);
|
||||
std::vector<std::vector<float>>& getX();
|
||||
std::vector<int>& getY();
|
||||
std::vector<std::pair<std::string, std::string>> getAttributes() const;
|
||||
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||
};
|
||||
|
||||
#endif
|
@@ -1 +0,0 @@
|
||||
add_library(ArffFiles ArffFiles.cc)
|
Submodule lib/argparse updated: c69d8e1960...cbd9fd8ed6
Submodule lib/catch2 updated: 8ac8190e49...0321d2fce3
Submodule lib/folding updated: 37316a54e0...2ac43e32ac
2
lib/json
2
lib/json
Submodule lib/json updated: 0457de21cf...620034ecec
Submodule lib/libxlsxwriter updated: f6d73b0ae1...8206bda64a
2
lib/mdlp
2
lib/mdlp
Submodule lib/mdlp updated: 5708dc3de9...cfb993f5ec
@@ -3,12 +3,13 @@ include_directories(
|
||||
${Platform_SOURCE_DIR}/src/main
|
||||
${Python3_INCLUDE_DIRS}
|
||||
${Platform_SOURCE_DIR}/lib/Files
|
||||
${Platform_SOURCE_DIR}/lib/mdlp
|
||||
${Platform_SOURCE_DIR}/lib/mdlp/src
|
||||
${Platform_SOURCE_DIR}/lib/argparse/include
|
||||
${Platform_SOURCE_DIR}/lib/folding
|
||||
${Platform_SOURCE_DIR}/lib/json/include
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
/usr/local/include
|
||||
${PyClassifiers_INCLUDE_DIRS}
|
||||
${Bayesnet_INCLUDE_DIRS}
|
||||
)
|
||||
add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp)
|
||||
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
@@ -5,13 +5,13 @@
|
||||
#include <torch/torch.h>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <ArffFiles.h>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <ArffFiles.hpp>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
#include <folding.hpp>
|
||||
#include <bayesnet/utils/BayesMetrics.h>
|
||||
#include "Models.h"
|
||||
#include "modelRegister.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
const std::string PATH = { platform_data_path.begin(), platform_data_path.end() };
|
||||
|
||||
@@ -161,7 +161,8 @@ int main(int argc, char** argv)
|
||||
}
|
||||
states[className] = std::vector<int>(maxes[className]);
|
||||
auto clf = platform::Models::instance()->create(model_name);
|
||||
clf->fit(Xd, y, features, className, states);
|
||||
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::ORIGINAL;
|
||||
clf->fit(Xd, y, features, className, states, smoothing);
|
||||
if (dump_cpt) {
|
||||
std::cout << "--- CPT Tables ---" << std::endl;
|
||||
clf->dump_cpt();
|
||||
@@ -210,14 +211,14 @@ int main(int argc, char** argv)
|
||||
torch::Tensor ytraint = yt.index({ ttrain });
|
||||
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
|
||||
torch::Tensor ytestt = yt.index({ ttest });
|
||||
clf->fit(Xtraint, ytraint, features, className, states);
|
||||
clf->fit(Xtraint, ytraint, features, className, states, smoothing);
|
||||
auto temp = clf->predict(Xtraint);
|
||||
score_train = clf->score(Xtraint, ytraint);
|
||||
score_test = clf->score(Xtestt, ytestt);
|
||||
} else {
|
||||
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
|
||||
auto [Xtest, ytest] = extract_indices(test, Xd, y);
|
||||
clf->fit(Xtrain, ytrain, features, className, states);
|
||||
clf->fit(Xtrain, ytrain, features, className, states, smoothing);
|
||||
std::cout << "Nodes: " << clf->getNumberOfNodes() << std::endl;
|
||||
nodes += clf->getNumberOfNodes();
|
||||
score_train = clf->score(Xtrain, ytrain);
|
||||
|
@@ -2,7 +2,7 @@ include_directories(
|
||||
## Libs
|
||||
${Platform_SOURCE_DIR}/lib/Files
|
||||
${Platform_SOURCE_DIR}/lib/folding
|
||||
${Platform_SOURCE_DIR}/lib/mdlp
|
||||
${Platform_SOURCE_DIR}/lib/mdlp/src
|
||||
${Platform_SOURCE_DIR}/lib/argparse/include
|
||||
${Platform_SOURCE_DIR}/lib/json/include
|
||||
${Platform_SOURCE_DIR}/lib/libxlsxwriter/include
|
||||
@@ -10,7 +10,8 @@ include_directories(
|
||||
${MPI_CXX_INCLUDE_DIRS}
|
||||
${TORCH_INCLUDE_DIRS}
|
||||
${CMAKE_BINARY_DIR}/configured_files/include
|
||||
/usr/local/include
|
||||
${PyClassifiers_INCLUDE_DIRS}
|
||||
${Bayesnet_INCLUDE_DIRS}
|
||||
## Platform
|
||||
${Platform_SOURCE_DIR}/src
|
||||
${Platform_SOURCE_DIR}/results
|
||||
@@ -19,49 +20,50 @@ include_directories(
|
||||
# b_best
|
||||
add_executable(
|
||||
b_best commands/b_best.cpp best/Statistics.cpp
|
||||
best/BestResultsExcel.cpp best/BestResults.cpp
|
||||
common/Datasets.cpp common/Dataset.cpp
|
||||
main/Models.cpp
|
||||
best/BestResultsExcel.cpp best/BestResultsTex.cpp best/BestResultsMd.cpp best/BestResults.cpp
|
||||
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
|
||||
main/Models.cpp main/Scores.cpp
|
||||
reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
|
||||
results/Result.cpp
|
||||
)
|
||||
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||
|
||||
# b_grid
|
||||
set(grid_sources GridSearch.cpp GridData.cpp)
|
||||
list(TRANSFORM grid_sources PREPEND grid/)
|
||||
add_executable(b_grid commands/b_grid.cpp ${grid_sources}
|
||||
common/Datasets.cpp common/Dataset.cpp
|
||||
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
|
||||
main/HyperParameters.cpp main/Models.cpp
|
||||
)
|
||||
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
|
||||
# b_list
|
||||
add_executable(b_list commands/b_list.cpp ${list_sources}
|
||||
common/Datasets.cpp common/Dataset.cpp
|
||||
main/Models.cpp
|
||||
reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ResultsDatasetConsole.cpp reports/ReportsPaged.cpp
|
||||
results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp
|
||||
add_executable(b_list commands/b_list.cpp
|
||||
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
|
||||
main/Models.cpp main/Scores.cpp
|
||||
reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
|
||||
results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
|
||||
)
|
||||
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||
|
||||
# b_main
|
||||
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp)
|
||||
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
|
||||
list(TRANSFORM main_sources PREPEND main/)
|
||||
add_executable(b_main commands/b_main.cpp ${main_sources}
|
||||
common/Datasets.cpp common/Dataset.cpp
|
||||
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
|
||||
reports/ReportConsole.cpp reports/ReportBase.cpp
|
||||
results/Result.cpp
|
||||
)
|
||||
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
|
||||
# b_manage
|
||||
set(manage_sources ManageScreen.cpp CommandParser.cpp ResultsManager.cpp)
|
||||
set(manage_sources ManageScreen.cpp OptionsMenu.cpp ResultsManager.cpp)
|
||||
list(TRANSFORM manage_sources PREPEND manage/)
|
||||
add_executable(
|
||||
b_manage commands/b_manage.cpp ${manage_sources}
|
||||
common/Datasets.cpp common/Dataset.cpp
|
||||
reports/ReportConsole.cpp reports/ReportExcel.cpp reports/ReportExcelCompared.cpp reports/ReportBase.cpp reports/ExcelFile.cpp reports/DatasetsConsole.cpp reports/ResultsDatasetConsole.cpp reports/ReportsPaged.cpp
|
||||
results/Result.cpp results/ResultsDataset.cpp
|
||||
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
|
||||
reports/ReportConsole.cpp reports/ReportExcel.cpp reports/ReportExcelCompared.cpp reports/ReportBase.cpp reports/ExcelFile.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
|
||||
results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
|
||||
main/Scores.cpp
|
||||
)
|
||||
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
|
||||
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" fimdlp "${BayesNet}")
|
||||
|
@@ -6,8 +6,12 @@
|
||||
#include <algorithm>
|
||||
#include "common/Colors.h"
|
||||
#include "common/CLocale.h"
|
||||
#include "common/Paths.h"
|
||||
#include "common/Utils.h" // compute_std
|
||||
#include "results/Result.h"
|
||||
#include "BestResultsExcel.h"
|
||||
#include "BestResultsTex.h"
|
||||
#include "BestResultsMd.h"
|
||||
#include "best/Statistics.h"
|
||||
#include "BestResults.h"
|
||||
|
||||
@@ -51,20 +55,20 @@ namespace platform {
|
||||
}
|
||||
}
|
||||
if (update) {
|
||||
bests[datasetName] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
|
||||
bests[datasetName] = { item.at("score").get<double>(), item.at("hyperparameters"), file, item.at("score_std").get<double>() };
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string bestFileName = path + bestResultFile();
|
||||
if (bests.empty()) {
|
||||
std::cerr << Colors::MAGENTA() << "No results found for model " << model << " and score " << score << Colors::RESET() << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
std::string bestFileName = path + Paths::bestResultsFile(score, model);
|
||||
std::ofstream file(bestFileName);
|
||||
file << bests;
|
||||
file.close();
|
||||
return bestFileName;
|
||||
}
|
||||
std::string BestResults::bestResultFile()
|
||||
{
|
||||
return "best_results_" + score + "_" + model + ".json";
|
||||
}
|
||||
std::pair<std::string, std::string> getModelScore(std::string name)
|
||||
{
|
||||
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
|
||||
@@ -146,7 +150,7 @@ namespace platform {
|
||||
}
|
||||
void BestResults::listFile()
|
||||
{
|
||||
std::string bestFileName = path + bestResultFile();
|
||||
std::string bestFileName = path + Paths::bestResultsFile(score, model);
|
||||
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
|
||||
fclose(fileTest);
|
||||
} else {
|
||||
@@ -192,7 +196,7 @@ namespace platform {
|
||||
auto maxDate = std::filesystem::file_time_type::max();
|
||||
for (const auto& model : models) {
|
||||
this->model = model;
|
||||
std::string bestFileName = path + bestResultFile();
|
||||
std::string bestFileName = path + Paths::bestResultsFile(score, model);
|
||||
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
|
||||
fclose(fileTest);
|
||||
} else {
|
||||
@@ -209,13 +213,20 @@ namespace platform {
|
||||
table["dateTable"] = ftime_to_string(maxDate);
|
||||
return table;
|
||||
}
|
||||
void BestResults::printTableResults(std::vector<std::string> models, json table)
|
||||
|
||||
void BestResults::printTableResults(std::vector<std::string> models, json table, bool tex)
|
||||
{
|
||||
std::stringstream oss;
|
||||
oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<std::string>() << std::endl;
|
||||
std::cout << oss.str();
|
||||
std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
|
||||
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset");
|
||||
auto bestResultsTex = BestResultsTex();
|
||||
auto bestResultsMd = BestResultsMd();
|
||||
if (tex) {
|
||||
bestResultsTex.results_header(models, table.at("dateTable").get<std::string>());
|
||||
bestResultsMd.results_header(models, table.at("dateTable").get<std::string>());
|
||||
}
|
||||
for (const auto& model : models) {
|
||||
std::cout << std::setw(maxModelName) << std::left << model << " ";
|
||||
}
|
||||
@@ -226,12 +237,13 @@ namespace platform {
|
||||
}
|
||||
std::cout << std::endl;
|
||||
auto i = 0;
|
||||
std::map<std::string, double> totals;
|
||||
std::map<std::string, std::vector<double>> totals;
|
||||
int nDatasets = table.begin().value().size();
|
||||
for (const auto& model : models) {
|
||||
totals[model] = 0.0;
|
||||
}
|
||||
auto datasets = getDatasets(table.begin().value());
|
||||
if (tex) {
|
||||
bestResultsTex.results_body(datasets, table);
|
||||
bestResultsMd.results_body(datasets, table);
|
||||
}
|
||||
for (auto const& dataset_ : datasets) {
|
||||
auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN();
|
||||
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
|
||||
@@ -266,7 +278,7 @@ namespace platform {
|
||||
if (value == -1) {
|
||||
std::cout << Colors::YELLOW() << std::setw(maxModelName) << std::right << "N/A" << " ";
|
||||
} else {
|
||||
totals[model] += value;
|
||||
totals[model].push_back(value);
|
||||
std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " ";
|
||||
}
|
||||
}
|
||||
@@ -277,19 +289,26 @@ namespace platform {
|
||||
std::cout << std::string(maxModelName, '=') << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << Colors::GREEN() << " Totals" << std::string(maxDatasetName - 6, '.') << " ";
|
||||
std::cout << Colors::GREEN() << " Average" << std::string(maxDatasetName - 7, '.') << " ";
|
||||
double max_value = 0.0;
|
||||
std::string best_model = "";
|
||||
for (const auto& total : totals) {
|
||||
if (total.second > max_value) {
|
||||
max_value = total.second;
|
||||
auto actual = std::reduce(total.second.begin(), total.second.end());
|
||||
if (actual > max_value) {
|
||||
max_value = actual;
|
||||
best_model = total.first;
|
||||
}
|
||||
}
|
||||
if (tex) {
|
||||
bestResultsTex.results_footer(totals, best_model);
|
||||
bestResultsMd.results_footer(totals, best_model);
|
||||
}
|
||||
for (const auto& model : models) {
|
||||
std::string efectiveColor = Colors::GREEN();
|
||||
if (totals[model] == max_value) {
|
||||
efectiveColor = Colors::RED();
|
||||
}
|
||||
std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << totals[model] << " ";
|
||||
std::string efectiveColor = model == best_model ? Colors::RED() : Colors::GREEN();
|
||||
double value = std::reduce(totals[model].begin(), totals[model].end()) / nDatasets;
|
||||
double std_value = compute_std(totals[model], value);
|
||||
std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << value << " ";
|
||||
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
@@ -302,26 +321,34 @@ namespace platform {
|
||||
json table = buildTableResults(models);
|
||||
std::vector<std::string> datasets = getDatasets(table.begin().value());
|
||||
BestResultsExcel excel_report(score, datasets);
|
||||
excel_report.reportSingle(model, path + bestResultFile());
|
||||
messageExcelFile(excel_report.getFileName());
|
||||
excel_report.reportSingle(model, path + Paths::bestResultsFile(score, model));
|
||||
messageOutputFile("Excel", excel_report.getFileName());
|
||||
}
|
||||
}
|
||||
void BestResults::reportAll(bool excel)
|
||||
void BestResults::reportAll(bool excel, bool tex)
|
||||
{
|
||||
auto models = getModels();
|
||||
// Build the table of results
|
||||
json table = buildTableResults(models);
|
||||
std::vector<std::string> datasets = getDatasets(table.begin().value());
|
||||
// Print the table of results
|
||||
printTableResults(models, table);
|
||||
printTableResults(models, table, tex);
|
||||
// Compute the Friedman test
|
||||
std::map<std::string, std::map<std::string, float>> ranksModels;
|
||||
if (friedman) {
|
||||
Statistics stats(models, datasets, table, significance);
|
||||
auto result = stats.friedmanTest();
|
||||
stats.postHocHolmTest(result);
|
||||
stats.postHocHolmTest(result, tex);
|
||||
ranksModels = stats.getRanks();
|
||||
}
|
||||
if (tex) {
|
||||
messageOutputFile("TeX", Paths::tex() + Paths::tex_output());
|
||||
messageOutputFile("MarkDown", Paths::tex() + Paths::md_output());
|
||||
if (friedman) {
|
||||
messageOutputFile("TeX", Paths::tex() + Paths::tex_post_hoc());
|
||||
messageOutputFile("MarkDown", Paths::tex() + Paths::md_post_hoc());
|
||||
}
|
||||
}
|
||||
if (excel) {
|
||||
BestResultsExcel excel(score, datasets);
|
||||
excel.reportAll(models, table, ranksModels, friedman, significance);
|
||||
@@ -342,13 +369,14 @@ namespace platform {
|
||||
}
|
||||
}
|
||||
model = models.at(idx);
|
||||
excel.reportSingle(model, path + bestResultFile());
|
||||
excel.reportSingle(model, path + Paths::bestResultsFile(score, model));
|
||||
}
|
||||
messageExcelFile(excel.getFileName());
|
||||
messageOutputFile("Excel", excel.getFileName());
|
||||
}
|
||||
}
|
||||
void BestResults::messageExcelFile(const std::string& fileName)
|
||||
void BestResults::messageOutputFile(const std::string& title, const std::string& fileName)
|
||||
{
|
||||
std::cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << std::endl;
|
||||
std::cout << Colors::YELLOW() << "** " << std::setw(5) << std::left << title
|
||||
<< " file generated: " << fileName << Colors::RESET() << std::endl;
|
||||
}
|
||||
}
|
@@ -1,9 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef BESTRESULTS_H
|
||||
#define BESTRESULTS_H
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
using json = nlohmann::json;
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
class BestResults {
|
||||
public:
|
||||
explicit BestResults(const std::string& path, const std::string& score, const std::string& model, const std::string& dataset, bool friedman, double significance = 0.05)
|
||||
@@ -12,16 +13,15 @@ namespace platform {
|
||||
}
|
||||
std::string build();
|
||||
void reportSingle(bool excel);
|
||||
void reportAll(bool excel);
|
||||
void reportAll(bool excel, bool tex);
|
||||
void buildAll();
|
||||
private:
|
||||
std::vector<std::string> getModels();
|
||||
std::vector<std::string> getDatasets(json table);
|
||||
std::vector<std::string> loadResultFiles();
|
||||
void messageExcelFile(const std::string& fileName);
|
||||
void messageOutputFile(const std::string& title, const std::string& fileName);
|
||||
json buildTableResults(std::vector<std::string> models);
|
||||
void printTableResults(std::vector<std::string> models, json table);
|
||||
std::string bestResultFile();
|
||||
void printTableResults(std::vector<std::string> models, json table, bool tex);
|
||||
json loadFile(const std::string& fileName);
|
||||
void listFile();
|
||||
std::string path;
|
||||
@@ -34,3 +34,4 @@ namespace platform {
|
||||
int maxDatasetName = 0;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -32,7 +32,7 @@ namespace platform {
|
||||
}
|
||||
BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets)
|
||||
{
|
||||
file_name = "BestResults.xlsx";
|
||||
file_name = Paths::bestResultsExcel(score);
|
||||
workbook = workbook_new(getFileName().c_str());
|
||||
setProperties("Best Results");
|
||||
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
|
||||
@@ -64,19 +64,21 @@ namespace platform {
|
||||
json data = loadResultData(fileName);
|
||||
|
||||
std::string title = "Best results for " + model;
|
||||
worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]);
|
||||
worksheet_merge_range(worksheet, 0, 0, 0, 5, title.c_str(), styles["headerFirst"]);
|
||||
// Body header
|
||||
row = 3;
|
||||
int col = 1;
|
||||
writeString(row, 0, "Nº", "bodyHeader");
|
||||
writeString(row, 0, "#", "bodyHeader");
|
||||
writeString(row, 1, "Dataset", "bodyHeader");
|
||||
writeString(row, 2, "Score", "bodyHeader");
|
||||
writeString(row, 3, "File", "bodyHeader");
|
||||
writeString(row, 4, "Hyperparameters", "bodyHeader");
|
||||
writeString(row, 5, "F", "bodyHeader");
|
||||
auto i = 0;
|
||||
std::string hyperparameters;
|
||||
int hypSize = 22;
|
||||
std::map<std::string, std::string> files; // map of files imported and their tabs
|
||||
int numLines = data.size();
|
||||
for (auto const& item : data.items()) {
|
||||
row++;
|
||||
writeInt(row, 0, i++, "ints");
|
||||
@@ -104,6 +106,8 @@ namespace platform {
|
||||
hypSize = hyperparameters.size();
|
||||
}
|
||||
writeString(row, 4, hyperparameters, "text");
|
||||
std::string countHyperparameters = "=COUNTIF(e5:e" + std::to_string(numLines + 4) + ", e" + std::to_string(row + 1) + ")";
|
||||
worksheet_write_formula(worksheet, row, 5, countHyperparameters.c_str(), efectiveStyle("ints"));
|
||||
}
|
||||
row++;
|
||||
// Set Totals
|
||||
@@ -180,7 +184,7 @@ namespace platform {
|
||||
// Body header
|
||||
row = 3;
|
||||
int col = 1;
|
||||
writeString(row, 0, "Nº", "bodyHeader");
|
||||
writeString(row, 0, "#", "bodyHeader");
|
||||
writeString(row, 1, "Dataset", "bodyHeader");
|
||||
for (const auto& model : models) {
|
||||
writeString(row, ++col, model.c_str(), "bodyHeader");
|
||||
|
@@ -1,14 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef BESTRESULTSEXCEL_H
|
||||
#define BESTRESULTSEXCEL_H
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "reports/ExcelFile.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace platform {
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
class BestResultsExcel : public ExcelFile {
|
||||
public:
|
||||
BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets);
|
||||
@@ -34,3 +33,4 @@ namespace platform {
|
||||
int datasetNameSize = 25; // Min size of the column
|
||||
};
|
||||
}
|
||||
#endif
|
103
src/best/BestResultsMd.cpp
Normal file
103
src/best/BestResultsMd.cpp
Normal file
@@ -0,0 +1,103 @@
|
||||
#include <iostream>
|
||||
#include "BestResultsMd.h"
|
||||
#include "common/Utils.h" // compute_std
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
void BestResultsMd::openMdFile(const std::string& name)
|
||||
{
|
||||
handler.open(name);
|
||||
if (!handler.is_open()) {
|
||||
std::cerr << "Error opening file " << name << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
void BestResultsMd::results_header(const std::vector<std::string>& models, const std::string& date)
|
||||
{
|
||||
this->models = models;
|
||||
auto file_name = Paths::tex() + Paths::md_output();
|
||||
openMdFile(file_name);
|
||||
handler << "<!-- This file has been generated by the platform program" << std::endl;
|
||||
handler << " Date: " << date.c_str() << std::endl;
|
||||
handler << "" << std::endl;
|
||||
handler << " Table of results" << std::endl;
|
||||
handler << "-->" << std::endl;
|
||||
handler << "| # | Dataset |";
|
||||
for (const auto& model : models) {
|
||||
handler << " " << model.c_str() << " |";
|
||||
}
|
||||
handler << std::endl;
|
||||
handler << "|--: | :--- |";
|
||||
for (const auto& model : models) {
|
||||
handler << " :---: |";
|
||||
}
|
||||
handler << std::endl;
|
||||
}
|
||||
void BestResultsMd::results_body(const std::vector<std::string>& datasets, json& table)
|
||||
{
|
||||
int i = 0;
|
||||
for (auto const& dataset : datasets) {
|
||||
// Find out max value for this dataset
|
||||
double max_value = 0;
|
||||
// Find out the max value for this dataset
|
||||
for (const auto& model : models) {
|
||||
double value;
|
||||
try {
|
||||
value = table[model].at(dataset).at(0).get<double>();
|
||||
}
|
||||
catch (nlohmann::json_abi_v3_11_3::detail::out_of_range err) {
|
||||
value = -1.0;
|
||||
}
|
||||
if (value > max_value) {
|
||||
max_value = value;
|
||||
}
|
||||
}
|
||||
handler << "| " << ++i << " | " << dataset.c_str() << " | ";
|
||||
for (const auto& model : models) {
|
||||
double value = table[model].at(dataset).at(0).get<double>();
|
||||
double std_value = table[model].at(dataset).at(3).get<double>();
|
||||
const char* bold = value == max_value ? "**" : "";
|
||||
handler << bold << std::setprecision(4) << std::fixed << value << "±" << std::setprecision(3) << std_value << bold << " | ";
|
||||
}
|
||||
handler << std::endl;
|
||||
}
|
||||
}
|
||||
void BestResultsMd::results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model)
|
||||
{
|
||||
handler << "| | **Average Score** | ";
|
||||
int nDatasets = totals.begin()->second.size();
|
||||
for (const auto& model : models) {
|
||||
double value = std::reduce(totals.at(model).begin(), totals.at(model).end()) / nDatasets;
|
||||
double std_value = compute_std(totals.at(model), value);
|
||||
const char* bold = model == best_model ? "**" : "";
|
||||
handler << bold << std::setprecision(4) << std::fixed << value << "±" << std::setprecision(3) << std::fixed << std_value << bold << " | ";
|
||||
}
|
||||
|
||||
handler.close();
|
||||
}
|
||||
void BestResultsMd::holm_test(struct HolmResult& holmResult, const std::string& date)
|
||||
{
|
||||
auto file_name = Paths::tex() + Paths::md_post_hoc();
|
||||
openMdFile(file_name);
|
||||
handler << "<!-- This file has been generated by the platform program" << std::endl;
|
||||
handler << " Date: " << date.c_str() << std::endl;
|
||||
handler << std::endl;
|
||||
handler << " Post-hoc handler test" << std::endl;
|
||||
handler << "-->" << std::endl;
|
||||
handler << "Post-hoc Holm test: H<sub>0</sub>: There is no significant differences between the control model and the other models." << std::endl << std::endl;
|
||||
handler << "| classifier | pvalue | rank | win | tie | loss | H<sub>0</sub> |" << std::endl;
|
||||
handler << "| :-- | --: | --: | --:| --: | --: | :--: |" << std::endl;
|
||||
for (auto const& line : holmResult.holmLines) {
|
||||
auto textStatus = !line.reject ? "**" : " ";
|
||||
if (line.model == holmResult.model) {
|
||||
handler << "| " << line.model << " | - | " << std::fixed << std::setprecision(2) << line.rank << " | - | - | - |" << std::endl;
|
||||
} else {
|
||||
handler << "| " << line.model << " | " << textStatus << std::scientific << std::setprecision(4) << line.pvalue << textStatus << " |";
|
||||
handler << std::fixed << std::setprecision(2) << line.rank << " | " << line.wtl.win << " | " << line.wtl.tie << " | " << line.wtl.loss << " |";
|
||||
handler << (line.reject ? "rejected" : "**accepted**") << " |" << std::endl;
|
||||
}
|
||||
}
|
||||
handler << std::endl;
|
||||
handler.close();
|
||||
}
|
||||
}
|
24
src/best/BestResultsMd.h
Normal file
24
src/best/BestResultsMd.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef BEST_RESULTS_MD_H
|
||||
#define BEST_RESULTS_MD_H
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "common/Paths.h"
|
||||
#include "Statistics.h"
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
class BestResultsMd {
|
||||
public:
|
||||
BestResultsMd() = default;
|
||||
~BestResultsMd() = default;
|
||||
void results_header(const std::vector<std::string>& models, const std::string& date);
|
||||
void results_body(const std::vector<std::string>& datasets, json& table);
|
||||
void results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model);
|
||||
void holm_test(struct HolmResult& holmResult, const std::string& date);
|
||||
private:
|
||||
void openMdFile(const std::string& name);
|
||||
std::ofstream handler;
|
||||
std::vector<std::string> models;
|
||||
};
|
||||
}
|
||||
#endif
|
117
src/best/BestResultsTex.cpp
Normal file
117
src/best/BestResultsTex.cpp
Normal file
@@ -0,0 +1,117 @@
|
||||
#include <iostream>
|
||||
#include "BestResultsTex.h"
|
||||
#include "common/Utils.h" // compute_std
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
void BestResultsTex::openTexFile(const std::string& name)
|
||||
{
|
||||
handler.open(name);
|
||||
if (!handler.is_open()) {
|
||||
std::cerr << "Error opening file " << name << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
void BestResultsTex::results_header(const std::vector<std::string>& models, const std::string& date)
|
||||
{
|
||||
this->models = models;
|
||||
auto file_name = Paths::tex() + Paths::tex_output();
|
||||
openTexFile(file_name);
|
||||
handler << "%% This file has been generated by the platform program" << std::endl;
|
||||
handler << "%% Date: " << date.c_str() << std::endl;
|
||||
handler << "%%" << std::endl;
|
||||
handler << "%% Table of results" << std::endl;
|
||||
handler << "%%" << std::endl;
|
||||
handler << "\\begin{table}[htbp] " << std::endl;
|
||||
handler << "\\centering " << std::endl;
|
||||
handler << "\\tiny " << std::endl;
|
||||
handler << "\\renewcommand{\\arraystretch }{1.2} " << std::endl;
|
||||
handler << "\\renewcommand{\\tabcolsep }{0.07cm} " << std::endl;
|
||||
handler << "\\caption{Accuracy results(mean $\\pm$ std) for all the algorithms and datasets} " << std::endl;
|
||||
handler << "\\label{tab:results_accuracy}" << std::endl;
|
||||
handler << "\\begin{tabular} {{r" << std::string(models.size(), 'c').c_str() << "}}" << std::endl;
|
||||
handler << "\\hline " << std::endl;
|
||||
handler << "" << std::endl;
|
||||
for (const auto& model : models) {
|
||||
handler << "& " << model.c_str();
|
||||
}
|
||||
handler << "\\\\" << std::endl;
|
||||
handler << "\\hline" << std::endl;
|
||||
}
|
||||
void BestResultsTex::results_body(const std::vector<std::string>& datasets, json& table)
|
||||
{
|
||||
int i = 0;
|
||||
for (auto const& dataset : datasets) {
|
||||
// Find out max value for this dataset
|
||||
double max_value = 0;
|
||||
// Find out the max value for this dataset
|
||||
for (const auto& model : models) {
|
||||
double value;
|
||||
try {
|
||||
value = table[model].at(dataset).at(0).get<double>();
|
||||
}
|
||||
catch (nlohmann::json_abi_v3_11_3::detail::out_of_range err) {
|
||||
value = -1.0;
|
||||
}
|
||||
if (value > max_value) {
|
||||
max_value = value;
|
||||
}
|
||||
}
|
||||
handler << ++i << " ";
|
||||
for (const auto& model : models) {
|
||||
double value = table[model].at(dataset).at(0).get<double>();
|
||||
double std_value = table[model].at(dataset).at(3).get<double>();
|
||||
const char* bold = value == max_value ? "\\bfseries" : "";
|
||||
handler << "& " << bold << std::setprecision(4) << std::fixed << value << "$\\pm$" << std::setprecision(3) << std_value;
|
||||
}
|
||||
handler << "\\\\" << std::endl;
|
||||
}
|
||||
}
|
||||
void BestResultsTex::results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model)
|
||||
{
|
||||
handler << "\\hline" << std::endl;
|
||||
handler << "Average ";
|
||||
int nDatasets = totals.begin()->second.size();
|
||||
for (const auto& model : models) {
|
||||
double value = std::reduce(totals.at(model).begin(), totals.at(model).end()) / nDatasets;
|
||||
double std_value = compute_std(totals.at(model), value);
|
||||
const char* bold = model == best_model ? "\\bfseries" : "";
|
||||
handler << "& " << bold << std::setprecision(4) << std::fixed << value << "$\\pm$" << std::setprecision(3) << std::fixed << std_value;
|
||||
}
|
||||
handler << "\\\\" << std::endl;
|
||||
handler << "\\hline " << std::endl;
|
||||
handler << "\\end{tabular}" << std::endl;
|
||||
handler << "\\end{table}" << std::endl;
|
||||
handler.close();
|
||||
}
|
||||
void BestResultsTex::holm_test(struct HolmResult& holmResult, const std::string& date)
|
||||
{
|
||||
auto file_name = Paths::tex() + Paths::tex_post_hoc();
|
||||
openTexFile(file_name);
|
||||
handler << "%% This file has been generated by the platform program" << std::endl;
|
||||
handler << "%% Date: " << date.c_str() << std::endl;
|
||||
handler << "%%" << std::endl;
|
||||
handler << "%% Post-hoc handler test" << std::endl;
|
||||
handler << "%%" << std::endl;
|
||||
handler << "\\begin{table}[htbp]" << std::endl;
|
||||
handler << "\\centering" << std::endl;
|
||||
handler << "\\caption{Results of the post-hoc test for the mean accuracy of the algorithms.}\\label{tab:tests}" << std::endl;
|
||||
handler << "\\begin{tabular}{lrrrrr}" << std::endl;
|
||||
handler << "\\hline" << std::endl;
|
||||
handler << "classifier & pvalue & rank & win & tie & loss\\\\" << std::endl;
|
||||
handler << "\\hline" << std::endl;
|
||||
for (auto const& line : holmResult.holmLines) {
|
||||
auto textStatus = !line.reject ? "\\bf " : " ";
|
||||
if (line.model == holmResult.model) {
|
||||
handler << line.model << " & - & " << std::fixed << std::setprecision(2) << line.rank << " & - & - & - \\\\" << std::endl;
|
||||
} else {
|
||||
handler << line.model << " & " << textStatus << std::scientific << std::setprecision(4) << line.pvalue << " & ";
|
||||
handler << std::fixed << std::setprecision(2) << line.rank << " & " << line.wtl.win << " & " << line.wtl.tie << " & " << line.wtl.loss << "\\\\" << std::endl;
|
||||
}
|
||||
}
|
||||
handler << "\\hline " << std::endl;
|
||||
handler << "\\end{tabular}" << std::endl;
|
||||
handler << "\\end{table}" << std::endl;
|
||||
handler.close();
|
||||
}
|
||||
}
|
24
src/best/BestResultsTex.h
Normal file
24
src/best/BestResultsTex.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef BEST_RESULTS_TEX_H
|
||||
#define BEST_RESULTS_TEX_H
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "common/Paths.h"
|
||||
#include "Statistics.h"
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
class BestResultsTex {
|
||||
public:
|
||||
BestResultsTex() = default;
|
||||
~BestResultsTex() = default;
|
||||
void results_header(const std::vector<std::string>& models, const std::string& date);
|
||||
void results_body(const std::vector<std::string>& datasets, json& table);
|
||||
void results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model);
|
||||
void holm_test(struct HolmResult& holmResult, const std::string& date);
|
||||
private:
|
||||
void openTexFile(const std::string& name);
|
||||
std::ofstream handler;
|
||||
std::vector<std::string> models;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef BESTSCORE_H
|
||||
#define BESTSCORE_H
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
@@ -24,3 +24,4 @@ namespace platform {
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -4,6 +4,8 @@
|
||||
#include "common/Colors.h"
|
||||
#include "common/Symbols.h"
|
||||
#include "common/CLocale.h"
|
||||
#include "BestResultsTex.h"
|
||||
#include "BestResultsMd.h"
|
||||
#include "Statistics.h"
|
||||
|
||||
|
||||
@@ -113,7 +115,7 @@ namespace platform {
|
||||
}
|
||||
}
|
||||
|
||||
void Statistics::postHocHolmTest(bool friedmanResult)
|
||||
void Statistics::postHocHolmTest(bool friedmanResult, bool tex)
|
||||
{
|
||||
if (!fitted) {
|
||||
fit();
|
||||
@@ -130,7 +132,7 @@ namespace platform {
|
||||
stats[i] = 0.0;
|
||||
continue;
|
||||
}
|
||||
double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
|
||||
double z = std::abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
|
||||
double p_value = (long double)2 * (1 - cdf(dist, z));
|
||||
stats[i] = p_value;
|
||||
}
|
||||
@@ -195,6 +197,12 @@ namespace platform {
|
||||
if (output) {
|
||||
std::cout << oss.str();
|
||||
}
|
||||
if (tex) {
|
||||
BestResultsTex bestResultsTex;
|
||||
BestResultsMd bestResultsMd;
|
||||
bestResultsTex.holm_test(holmResult, get_date() + " " + get_time());
|
||||
bestResultsMd.holm_test(holmResult, get_date() + " " + get_time());
|
||||
}
|
||||
}
|
||||
bool Statistics::friedmanTest()
|
||||
{
|
||||
|
@@ -1,13 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef STATISTICS_H
|
||||
#define STATISTICS_H
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
struct WTL {
|
||||
int win;
|
||||
int tie;
|
||||
@@ -34,7 +34,7 @@ namespace platform {
|
||||
public:
|
||||
Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true);
|
||||
bool friedmanTest();
|
||||
void postHocHolmTest(bool friedmanResult);
|
||||
void postHocHolmTest(bool friedmanResult, bool tex=false);
|
||||
FriedmanResult& getFriedmanResult();
|
||||
HolmResult& getHolmResult();
|
||||
std::map<std::string, std::map<std::string, float>>& getRanks();
|
||||
@@ -60,3 +60,4 @@ namespace platform {
|
||||
std::map<std::string, std::map<std::string, float>> ranksModels;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -5,26 +5,18 @@
|
||||
#include "common/Paths.h"
|
||||
#include "common/Colors.h"
|
||||
#include "best/BestResults.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
void manageArguments(argparse::ArgumentParser& program)
|
||||
{
|
||||
program.add_argument("-m", "--model")
|
||||
.help("Model to use: " + platform::Models::instance()->toString() + " or any")
|
||||
.action([](const std::string& value) {
|
||||
std::vector<std::string> valid(platform::Models::instance()->getNames());
|
||||
valid.push_back("any");
|
||||
static const std::vector<std::string> choices = valid;
|
||||
if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
||||
return value;
|
||||
}
|
||||
throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString() + " or any");
|
||||
}
|
||||
);
|
||||
.help("Model to use or any")
|
||||
.default_value("any");
|
||||
program.add_argument("-d", "--dataset").default_value("any").help("Filter results of the selected model) (any for all datasets)");
|
||||
program.add_argument("-s", "--score").default_value("accuracy").help("Filter results of the score name supplied");
|
||||
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
|
||||
program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true);
|
||||
program.add_argument("--tex").help("Output result table to TeX file").default_value(false).implicit_value(true);
|
||||
program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const std::string& value) {
|
||||
try {
|
||||
auto k = std::stod(value);
|
||||
@@ -46,7 +38,7 @@ int main(int argc, char** argv)
|
||||
argparse::ArgumentParser program("b_best", { platform_project_version.begin(), platform_project_version.end() });
|
||||
manageArguments(program);
|
||||
std::string model, dataset, score;
|
||||
bool build, report, friedman, excel;
|
||||
bool build, report, friedman, excel, tex;
|
||||
double level;
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
@@ -55,6 +47,7 @@ int main(int argc, char** argv)
|
||||
score = program.get<std::string>("score");
|
||||
friedman = program.get<bool>("friedman");
|
||||
excel = program.get<bool>("excel");
|
||||
tex = program.get<bool>("tex");
|
||||
level = program.get<double>("level");
|
||||
if (model == "" || score == "") {
|
||||
throw std::runtime_error("Model and score name must be supplied");
|
||||
@@ -74,7 +67,7 @@ int main(int argc, char** argv)
|
||||
auto results = platform::BestResults(platform::Paths::results(), score, model, dataset, friedman, level);
|
||||
if (model == "any") {
|
||||
results.buildAll();
|
||||
results.reportAll(excel);
|
||||
results.reportAll(excel, tex);
|
||||
} else {
|
||||
std::string fileName = results.build();
|
||||
std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl;
|
||||
|
@@ -11,9 +11,9 @@
|
||||
#include "common/Colors.h"
|
||||
#include "common/DotEnv.h"
|
||||
#include "grid/GridSearch.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
const int MAXL = 133;
|
||||
|
||||
void assignModel(argparse::ArgumentParser& parser)
|
||||
@@ -93,8 +93,10 @@ void list_dump(std::string& model)
|
||||
if (item.first.size() > max_dataset) {
|
||||
max_dataset = item.first.size();
|
||||
}
|
||||
if (item.second.dump().size() > max_hyper) {
|
||||
max_hyper = item.second.dump().size();
|
||||
for (auto const& [key, value] : item.second.items()) {
|
||||
if (value.dump().size() > max_hyper) {
|
||||
max_hyper = value.dump().size();
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. "
|
||||
@@ -106,7 +108,12 @@ void list_dump(std::string& model)
|
||||
std::cout << color;
|
||||
auto num_combinations = data.getNumCombinations(item.first);
|
||||
std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first
|
||||
<< " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << left << item.second.dump() << std::endl;
|
||||
<< " " << setw(5) << right << num_combinations << " ";
|
||||
std::string prefix = "";
|
||||
for (auto const& [key, value] : item.second.items()) {
|
||||
std::cout << prefix << setw(max_hyper) << std::left << value.dump() << std::endl;
|
||||
prefix = string(11 + max_dataset, ' ');
|
||||
}
|
||||
}
|
||||
std::cout << Colors::RESET() << std::endl;
|
||||
}
|
||||
|
@@ -10,10 +10,10 @@
|
||||
#include "common/Datasets.h"
|
||||
#include "reports/DatasetsExcel.h"
|
||||
#include "reports/DatasetsConsole.h"
|
||||
#include "reports/ResultsDatasetConsole.h"
|
||||
#include "results/ResultsDatasetConsole.h"
|
||||
#include "results/ResultsDataset.h"
|
||||
#include "results/ResultsDatasetExcel.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
|
||||
void list_datasets(argparse::ArgumentParser& program)
|
||||
@@ -37,7 +37,8 @@ void list_results(argparse::ArgumentParser& program)
|
||||
auto model = program.get<string>("model");
|
||||
auto excel = program.get<bool>("excel");
|
||||
auto report = platform::ResultsDatasetsConsole();
|
||||
report.report(dataset, score, model);
|
||||
if (!report.report(dataset, score, model))
|
||||
return;
|
||||
std::cout << report.getOutput();
|
||||
if (excel) {
|
||||
auto data = report.getData();
|
||||
@@ -75,18 +76,8 @@ int main(int argc, char** argv)
|
||||
}
|
||||
);
|
||||
results_command.add_argument("-m", "--model")
|
||||
.help("Model to use: " + platform::Models::instance()->toString() + " or any")
|
||||
.default_value("any")
|
||||
.action([](const std::string& value) {
|
||||
std::vector<std::string> valid(platform::Models::instance()->getNames());
|
||||
valid.push_back("any");
|
||||
static const std::vector<std::string> choices = valid;
|
||||
if (find(choices.begin(), choices.end(), value) != choices.end()) {
|
||||
return value;
|
||||
}
|
||||
throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString() + " or any");
|
||||
}
|
||||
);
|
||||
.help("Model to use or any")
|
||||
.default_value("any");
|
||||
results_command.add_argument("--excel").help("Output in Excel format").default_value(false).implicit_value(true);
|
||||
results_command.add_argument("-s", "--score").default_value("accuracy").help("Filter results of the score name supplied");
|
||||
|
||||
@@ -106,7 +97,7 @@ int main(int argc, char** argv)
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
throw std::runtime_error("You must specify one of the following commands: datasets, results\n");
|
||||
throw std::runtime_error("You must specify one of the following commands: {datasets, results}\n");
|
||||
}
|
||||
}
|
||||
catch (const exception& err) {
|
||||
|
@@ -7,29 +7,36 @@
|
||||
#include "common/Paths.h"
|
||||
#include "main/Models.h"
|
||||
#include "main/modelRegister.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
|
||||
void manageArguments(argparse::ArgumentParser& program)
|
||||
{
|
||||
auto env = platform::DotEnv();
|
||||
auto datasets = platform::Datasets(false, platform::Paths::datasets());
|
||||
program.add_argument("-d", "--dataset")
|
||||
auto& group = program.add_mutually_exclusive_group(true);
|
||||
group.add_argument("-d", "--dataset")
|
||||
.help("Dataset file name: " + datasets.toString())
|
||||
.default_value("all")
|
||||
.action([](const std::string& value) {
|
||||
auto datasets = platform::Datasets(false, platform::Paths::datasets());
|
||||
static const std::vector<std::string> choices_datasets(datasets.getNames());
|
||||
static std::vector<std::string> choices_datasets(datasets.getNames());
|
||||
choices_datasets.push_back("all");
|
||||
if (find(choices_datasets.begin(), choices_datasets.end(), value) != choices_datasets.end()) {
|
||||
return value;
|
||||
}
|
||||
throw std::runtime_error("Dataset must be one of: " + datasets.toString());
|
||||
}
|
||||
);
|
||||
group.add_argument("--datasets").nargs(1, 50).help("Datasets file names 1..50 separated by spaces").default_value(std::vector<std::string>());
|
||||
group.add_argument("--datasets-file").default_value("").help("Datasets file name. Mutually exclusive with dataset. This file should contain a list of datasets to test.");
|
||||
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
|
||||
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
|
||||
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
|
||||
program.add_argument("--hyper-best").default_value(false).help("Use best results of the model as source of hyperparameters").implicit_value(true);
|
||||
program.add_argument("-m", "--model")
|
||||
.help("Model to use: " + platform::Models::instance()->toString())
|
||||
.action([](const std::string& value) {
|
||||
@@ -42,6 +49,23 @@ void manageArguments(argparse::ArgumentParser& program)
|
||||
);
|
||||
program.add_argument("--title").default_value("").help("Experiment title");
|
||||
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
|
||||
auto valid_choices = env.valid_tokens("discretize_algo");
|
||||
auto& disc_arg = program.add_argument("--discretize-algo").help("Algorithm to use in discretization. Valid values: " + env.valid_values("discretize_algo")).default_value(env.get("discretize_algo"));
|
||||
for (auto choice : valid_choices) {
|
||||
disc_arg.choices(choice);
|
||||
}
|
||||
valid_choices = env.valid_tokens("smooth_strat");
|
||||
auto& smooth_arg = program.add_argument("--smooth-strat").help("Smooth strategy used in Bayes Network node initialization. Valid values: " + env.valid_values("smooth_strat")).default_value(env.get("smooth_strat"));
|
||||
for (auto choice : valid_choices) {
|
||||
smooth_arg.choices(choice);
|
||||
}
|
||||
auto& score_arg = program.add_argument("-s", "--score").help("Score to use. Valid values: " + env.valid_values("score")).default_value(env.get("score"));
|
||||
valid_choices = env.valid_tokens("score");
|
||||
for (auto choice : valid_choices) {
|
||||
score_arg.choices(choice);
|
||||
}
|
||||
program.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true);
|
||||
program.add_argument("--graph").help("generate graphviz dot files with the model").default_value(false).implicit_value(true);
|
||||
program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true);
|
||||
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
|
||||
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
|
||||
@@ -61,38 +85,54 @@ void manageArguments(argparse::ArgumentParser& program)
|
||||
throw std::runtime_error("Number of folds must be an integer");
|
||||
}});
|
||||
auto seed_values = env.getSeeds();
|
||||
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
|
||||
program.add_argument("--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
argparse::ArgumentParser program("b_main", { platform_project_version.begin(), platform_project_version.end() });
|
||||
manageArguments(program);
|
||||
std::string file_name, model_name, title, hyperparameters_file;
|
||||
std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat, score;
|
||||
json hyperparameters_json;
|
||||
bool discretize_dataset, stratified, saveResults, quiet, no_train_score;
|
||||
bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files, graph, hyper_best;
|
||||
std::vector<int> seeds;
|
||||
std::vector<std::string> file_names;
|
||||
std::vector<std::string> filesToTest;
|
||||
int n_folds;
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
file_name = program.get<std::string>("dataset");
|
||||
file_names = program.get<std::vector<std::string>>("datasets");
|
||||
datasets_file = program.get<std::string>("datasets-file");
|
||||
model_name = program.get<std::string>("model");
|
||||
discretize_dataset = program.get<bool>("discretize");
|
||||
discretize_algo = program.get<std::string>("discretize-algo");
|
||||
smooth_strat = program.get<std::string>("smooth-strat");
|
||||
stratified = program.get<bool>("stratified");
|
||||
quiet = program.get<bool>("quiet");
|
||||
graph = program.get<bool>("graph");
|
||||
n_folds = program.get<int>("folds");
|
||||
score = program.get<std::string>("score");
|
||||
seeds = program.get<std::vector<int>>("seeds");
|
||||
auto hyperparameters = program.get<std::string>("hyperparameters");
|
||||
hyperparameters_json = json::parse(hyperparameters);
|
||||
hyperparameters_file = program.get<std::string>("hyper-file");
|
||||
no_train_score = program.get<bool>("no-train-score");
|
||||
hyper_best = program.get<bool>("hyper-best");
|
||||
generate_fold_files = program.get<bool>("generate-fold-files");
|
||||
if (hyper_best) {
|
||||
// Build the best results file_name
|
||||
hyperparameters_file = platform::Paths::results() + platform::Paths::bestResultsFile(score, model_name);
|
||||
// ignore this parameter
|
||||
hyperparameters = "{}";
|
||||
} else {
|
||||
if (hyperparameters_file != "" && hyperparameters != "{}") {
|
||||
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
|
||||
}
|
||||
}
|
||||
title = program.get<std::string>("title");
|
||||
if (title == "" && file_name == "") {
|
||||
throw runtime_error("title is mandatory if dataset is not provided");
|
||||
if (title == "" && file_name == "all") {
|
||||
throw runtime_error("title is mandatory if all datasets are to be tested");
|
||||
}
|
||||
saveResults = program.get<bool>("save");
|
||||
}
|
||||
@@ -101,8 +141,45 @@ int main(int argc, char** argv)
|
||||
cerr << program;
|
||||
exit(1);
|
||||
}
|
||||
auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets());
|
||||
if (file_name != "") {
|
||||
auto datasets = platform::Datasets(false, platform::Paths::datasets());
|
||||
if (datasets_file != "") {
|
||||
ifstream catalog(datasets_file);
|
||||
if (catalog.is_open()) {
|
||||
std::string line;
|
||||
while (getline(catalog, line)) {
|
||||
if (line.empty() || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
if (!datasets.isDataset(line)) {
|
||||
cerr << "Dataset " << line << " not found" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
filesToTest.push_back(line);
|
||||
}
|
||||
catalog.close();
|
||||
saveResults = true;
|
||||
if (title == "") {
|
||||
title = "Test " + to_string(filesToTest.size()) + " datasets (" + datasets_file + ") "\
|
||||
+ model_name + " " + to_string(n_folds) + " folds";
|
||||
}
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open catalog file. [" + datasets_file + "]");
|
||||
}
|
||||
} else {
|
||||
if (file_names.size() > 0) {
|
||||
for (auto file : file_names) {
|
||||
if (!datasets.isDataset(file)) {
|
||||
cerr << "Dataset " << file << " not found" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
filesToTest = file_names;
|
||||
saveResults = true;
|
||||
if (title == "") {
|
||||
title = "Test " + to_string(file_names.size()) + " datasets " + model_name + " " + to_string(n_folds) + " folds";
|
||||
}
|
||||
} else {
|
||||
if (file_name != "all") {
|
||||
if (!datasets.isDataset(file_name)) {
|
||||
cerr << "Dataset " << file_name << " not found" << std::endl;
|
||||
exit(1);
|
||||
@@ -115,9 +192,12 @@ int main(int argc, char** argv)
|
||||
filesToTest = datasets.getNames();
|
||||
saveResults = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
platform::HyperParameters test_hyperparams;
|
||||
if (hyperparameters_file != "") {
|
||||
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file);
|
||||
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file, hyper_best);
|
||||
} else {
|
||||
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
|
||||
}
|
||||
@@ -127,22 +207,28 @@ int main(int argc, char** argv)
|
||||
*/
|
||||
auto env = platform::DotEnv();
|
||||
auto experiment = platform::Experiment();
|
||||
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("14.0.3");
|
||||
experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1");
|
||||
experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat);
|
||||
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
|
||||
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
|
||||
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score);
|
||||
experiment.setHyperparameters(test_hyperparams);
|
||||
for (auto seed : seeds) {
|
||||
experiment.addRandomSeed(seed);
|
||||
}
|
||||
platform::Timer timer;
|
||||
timer.start();
|
||||
experiment.go(filesToTest, quiet, no_train_score);
|
||||
experiment.go(filesToTest, quiet, no_train_score, generate_fold_files, graph);
|
||||
experiment.setDuration(timer.getDuration());
|
||||
if (!quiet) {
|
||||
// Classification report if only one dataset is tested
|
||||
experiment.report(filesToTest.size() == 1);
|
||||
}
|
||||
if (saveResults) {
|
||||
experiment.saveResult();
|
||||
}
|
||||
if (!quiet)
|
||||
experiment.report();
|
||||
if (graph) {
|
||||
experiment.saveGraph();
|
||||
}
|
||||
std::cout << "Done!" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1,24 +1,25 @@
|
||||
#include <iostream>
|
||||
#include <sys/ioctl.h>
|
||||
#include <utility>
|
||||
#include <unistd.h>
|
||||
#include <argparse/argparse.hpp>
|
||||
#include "manage/ManageScreen.h"
|
||||
#include "config.h"
|
||||
#include <signal.h>
|
||||
#include "config_platform.h"
|
||||
|
||||
platform::ManageScreen* manager = nullptr;
|
||||
|
||||
void manageArguments(argparse::ArgumentParser& program, int argc, char** argv)
|
||||
{
|
||||
program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>();
|
||||
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
|
||||
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
|
||||
program.add_argument("--platform").default_value("any").help("Filter results of the selected platform");
|
||||
program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true);
|
||||
program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true);
|
||||
program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true);
|
||||
try {
|
||||
program.parse_args(argc, argv);
|
||||
auto number = program.get<int>("number");
|
||||
if (number < 0) {
|
||||
throw std::runtime_error("Number of results must be greater than or equal to 0");
|
||||
}
|
||||
auto platform = program.get<std::string>("platform");
|
||||
auto model = program.get<std::string>("model");
|
||||
auto score = program.get<std::string>("score");
|
||||
auto complete = program.get<bool>("complete");
|
||||
@@ -32,37 +33,40 @@ void manageArguments(argparse::ArgumentParser& program, int argc, char** argv)
|
||||
}
|
||||
}
|
||||
|
||||
int numRows()
|
||||
std::pair<int, int> numRowsCols()
|
||||
{
|
||||
#ifdef TIOCGSIZE
|
||||
struct ttysize ts;
|
||||
ioctl(STDIN_FILENO, TIOCGSIZE, &ts);
|
||||
// cols = ts.ts_cols;
|
||||
return ts.ts_lines;
|
||||
return { ts.ts_lines, ts.ts_cols };
|
||||
#elif defined(TIOCGWINSZ)
|
||||
struct winsize ts;
|
||||
ioctl(STDIN_FILENO, TIOCGWINSZ, &ts);
|
||||
// cols = ts.ws_col;
|
||||
return ts.ws_row;
|
||||
return { ts.ws_row, ts.ws_col };
|
||||
#endif /* TIOCGSIZE */
|
||||
}
|
||||
void handleResize(int sig)
|
||||
{
|
||||
auto [rows, cols] = numRowsCols();
|
||||
manager->updateSize(rows, cols);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
auto program = argparse::ArgumentParser("b_manage", { platform_project_version.begin(), platform_project_version.end() });
|
||||
manageArguments(program, argc, argv);
|
||||
int number = program.get<int>("number");
|
||||
std::string model = program.get<std::string>("model");
|
||||
std::string score = program.get<std::string>("score");
|
||||
auto complete = program.get<bool>("complete");
|
||||
auto partial = program.get<bool>("partial");
|
||||
auto compare = program.get<bool>("compare");
|
||||
if (number == 0) {
|
||||
number = std::max(0, numRows() - 6); // 6 is the number of lines used by the menu & header
|
||||
}
|
||||
std::string platform = program.get<std::string>("platform");
|
||||
bool complete = program.get<bool>("complete");
|
||||
bool partial = program.get<bool>("partial");
|
||||
bool compare = program.get<bool>("compare");
|
||||
if (complete)
|
||||
partial = false;
|
||||
auto manager = platform::ManageScreen(number, model, score, complete, partial, compare);
|
||||
manager.doMenu();
|
||||
signal(SIGWINCH, handleResize);
|
||||
auto [rows, cols] = numRowsCols();
|
||||
manager = new platform::ManageScreen(rows, cols, model, score, platform, complete, partial, compare);
|
||||
manager->doMenu();
|
||||
delete manager;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef CLOCALE_H
|
||||
#define CLOCALE_H
|
||||
#include <locale>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
@@ -19,3 +19,4 @@ namespace platform {
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef COLORS_H
|
||||
#define COLORS_H
|
||||
#include <string>
|
||||
class Colors {
|
||||
public:
|
||||
@@ -27,3 +27,4 @@ public:
|
||||
static std::string CONCEALED() { return "\033[8m"; }
|
||||
static std::string CLRSCR() { return "\033[2J\033[1;1H"; }
|
||||
};
|
||||
#endif
|
@@ -1,24 +1,26 @@
|
||||
#include <ArffFiles.h>
|
||||
#include <ArffFiles.hpp>
|
||||
#include <fstream>
|
||||
#include "Dataset.h"
|
||||
namespace platform {
|
||||
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
|
||||
const std::string message_dataset_not_loaded = "Dataset not loaded.";
|
||||
Dataset::Dataset(const Dataset& dataset) :
|
||||
path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples),
|
||||
n_features(dataset.n_features), numericFeatures(dataset.numericFeatures), features(dataset.features),
|
||||
states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y),
|
||||
X_train(dataset.X_train), X_test(dataset.X_test), Xv(dataset.Xv), yv(dataset.yv),
|
||||
fileType(dataset.fileType)
|
||||
{
|
||||
}
|
||||
std::string Dataset::getName() const
|
||||
{
|
||||
return name;
|
||||
}
|
||||
std::string Dataset::getClassName() const
|
||||
{
|
||||
return className;
|
||||
}
|
||||
std::vector<std::string> Dataset::getFeatures() const
|
||||
{
|
||||
if (loaded) {
|
||||
return features;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
}
|
||||
int Dataset::getNFeatures() const
|
||||
@@ -26,7 +28,7 @@ namespace platform {
|
||||
if (loaded) {
|
||||
return n_features;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
}
|
||||
int Dataset::getNSamples() const
|
||||
@@ -34,7 +36,40 @@ namespace platform {
|
||||
if (loaded) {
|
||||
return n_samples;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
}
|
||||
std::string Dataset::getClassName() const
|
||||
{
|
||||
return className;
|
||||
}
|
||||
int Dataset::getNClasses() const
|
||||
{
|
||||
if (loaded) {
|
||||
return *std::max_element(yv.begin(), yv.end()) + 1;
|
||||
} else {
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
}
|
||||
std::vector<std::string> Dataset::getLabels() const
|
||||
{
|
||||
// Return the labels factorization result
|
||||
if (loaded) {
|
||||
return labels;
|
||||
} else {
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
}
|
||||
std::vector<int> Dataset::getClassesCounts() const
|
||||
{
|
||||
if (loaded) {
|
||||
std::vector<int> counts(*std::max_element(yv.begin(), yv.end()) + 1);
|
||||
for (auto y : yv) {
|
||||
counts[y]++;
|
||||
}
|
||||
return counts;
|
||||
} else {
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
}
|
||||
std::map<std::string, std::vector<int>> Dataset::getStates() const
|
||||
@@ -42,7 +77,7 @@ namespace platform {
|
||||
if (loaded) {
|
||||
return states;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
}
|
||||
pair<std::vector<std::vector<float>>&, std::vector<int>&> Dataset::getVectors()
|
||||
@@ -50,30 +85,24 @@ namespace platform {
|
||||
if (loaded) {
|
||||
return { Xv, yv };
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<std::vector<std::vector<int>>&, std::vector<int>&> Dataset::getVectorsDiscretized()
|
||||
{
|
||||
if (loaded) {
|
||||
return { Xd, yv };
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
}
|
||||
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
|
||||
{
|
||||
if (loaded) {
|
||||
buildTensors();
|
||||
return { X, y };
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
}
|
||||
void Dataset::load_csv()
|
||||
{
|
||||
ifstream file(path + "/" + name + ".csv");
|
||||
if (file.is_open()) {
|
||||
if (!file.is_open()) {
|
||||
throw std::invalid_argument("Unable to open dataset file.");
|
||||
}
|
||||
labels.clear();
|
||||
std::string line;
|
||||
getline(file, line);
|
||||
std::vector<std::string> tokens = split(line, ',');
|
||||
@@ -89,21 +118,23 @@ namespace platform {
|
||||
for (auto i = 0; i < features.size(); ++i) {
|
||||
Xv[i].push_back(stof(tokens[i]));
|
||||
}
|
||||
yv.push_back(stoi(tokens.back()));
|
||||
auto label = trim(tokens.back());
|
||||
if (find(labels.begin(), labels.end(), label) == labels.end()) {
|
||||
labels.push_back(label);
|
||||
}
|
||||
yv.push_back(stoi(label));
|
||||
}
|
||||
file.close();
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open dataset file.");
|
||||
}
|
||||
}
|
||||
void Dataset::computeStates()
|
||||
{
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
states[features[i]] = std::vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
|
||||
auto item = states.at(features[i]);
|
||||
iota(begin(item), end(item), 0);
|
||||
auto [max_value, idx] = torch::max(X_train.index({ i, "..." }), 0);
|
||||
states[features[i]] = std::vector<int>(max_value.item<int>() + 1);
|
||||
iota(begin(states.at(features[i])), end(states.at(features[i])), 0);
|
||||
}
|
||||
states[className] = std::vector<int>(*max_element(yv.begin(), yv.end()) + 1);
|
||||
auto [max_value, idx] = torch::max(y_train, 0);
|
||||
states[className] = std::vector<int>(max_value.item<int>() + 1);
|
||||
iota(begin(states.at(className)), end(states.at(className)), 0);
|
||||
}
|
||||
void Dataset::load_arff()
|
||||
@@ -117,6 +148,7 @@ namespace platform {
|
||||
className = arff.getClassName();
|
||||
auto attributes = arff.getAttributes();
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
|
||||
labels = arff.getLabels();
|
||||
}
|
||||
std::vector<std::string> tokenize(std::string line)
|
||||
{
|
||||
@@ -139,8 +171,11 @@ namespace platform {
|
||||
void Dataset::load_rdata()
|
||||
{
|
||||
ifstream file(path + "/" + name + "_R.dat");
|
||||
if (file.is_open()) {
|
||||
if (!file.is_open()) {
|
||||
throw std::invalid_argument("Unable to open dataset file.");
|
||||
}
|
||||
std::string line;
|
||||
labels.clear();
|
||||
getline(file, line);
|
||||
line = ArffFiles::trim(line);
|
||||
std::vector<std::string> tokens = tokenize(line);
|
||||
@@ -158,12 +193,13 @@ namespace platform {
|
||||
const float value = stof(tokens[i]);
|
||||
Xv[i - 1].push_back(value);
|
||||
}
|
||||
yv.push_back(stoi(tokens.back()));
|
||||
auto label = trim(tokens.back());
|
||||
if (find(labels.begin(), labels.end(), label) == labels.end()) {
|
||||
labels.push_back(label);
|
||||
}
|
||||
yv.push_back(stoi(label));
|
||||
}
|
||||
file.close();
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open dataset file.");
|
||||
}
|
||||
}
|
||||
void Dataset::load()
|
||||
{
|
||||
@@ -177,39 +213,66 @@ namespace platform {
|
||||
} else if (fileType == RDATA) {
|
||||
load_rdata();
|
||||
}
|
||||
if (discretize) {
|
||||
Xd = discretizeDataset(Xv, yv);
|
||||
computeStates();
|
||||
}
|
||||
n_samples = Xv[0].size();
|
||||
n_features = Xv.size();
|
||||
loaded = true;
|
||||
}
|
||||
void Dataset::buildTensors()
|
||||
{
|
||||
if (discretize) {
|
||||
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
|
||||
if (numericFeaturesIdx.size() == 0) {
|
||||
numericFeatures = std::vector<bool>(n_features, false);
|
||||
} else {
|
||||
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
|
||||
if (numericFeaturesIdx.at(0) == -1) {
|
||||
numericFeatures = std::vector<bool>(n_features, true);
|
||||
} else {
|
||||
numericFeatures = std::vector<bool>(n_features, false);
|
||||
for (auto i : numericFeaturesIdx) {
|
||||
numericFeatures[i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Build Tensors
|
||||
X = torch::zeros({ n_features, n_samples }, torch::kFloat32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
if (discretize) {
|
||||
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
|
||||
} else {
|
||||
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
|
||||
}
|
||||
}
|
||||
y = torch::tensor(yv, torch::kInt32);
|
||||
loaded = true;
|
||||
}
|
||||
std::vector<mdlp::labels_t> Dataset::discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
|
||||
std::tuple<torch::Tensor&, torch::Tensor&, torch::Tensor&, torch::Tensor&> Dataset::getTrainTestTensors(std::vector<int>& train, std::vector<int>& test)
|
||||
{
|
||||
std::vector<mdlp::labels_t> Xd;
|
||||
auto fimdlp = mdlp::CPPFImdlp();
|
||||
for (int i = 0; i < X.size(); i++) {
|
||||
fimdlp.fit(X[i], y);
|
||||
mdlp::labels_t& xd = fimdlp.transform(X[i]);
|
||||
Xd.push_back(xd);
|
||||
if (!loaded) {
|
||||
throw std::invalid_argument(message_dataset_not_loaded);
|
||||
}
|
||||
return Xd;
|
||||
auto train_t = torch::tensor(train);
|
||||
int samples_train = train.size();
|
||||
int samples_test = test.size();
|
||||
auto test_t = torch::tensor(test);
|
||||
X_train = X.index({ "...", train_t });
|
||||
y_train = y.index({ train_t });
|
||||
X_test = X.index({ "...", test_t });
|
||||
y_test = y.index({ test_t });
|
||||
if (discretize) {
|
||||
auto discretizer = Discretization::instance()->create(discretizer_algorithm);
|
||||
auto X_train_d = torch::zeros({ n_features, samples_train }, torch::kInt32);
|
||||
auto X_test_d = torch::zeros({ n_features, samples_test }, torch::kInt32);
|
||||
for (auto feature = 0; feature < n_features; ++feature) {
|
||||
if (numericFeatures[feature]) {
|
||||
auto feature_train = X_train.index({ feature, "..." });
|
||||
auto feature_test = X_test.index({ feature, "..." });
|
||||
auto feature_train_disc = discretizer->fit_transform_t(feature_train, y_train);
|
||||
auto feature_test_disc = discretizer->transform_t(feature_test);
|
||||
X_train_d.index_put_({ feature, "..." }, feature_train_disc);
|
||||
X_test_d.index_put_({ feature, "..." }, feature_test_disc);
|
||||
} else {
|
||||
X_train_d.index_put_({ feature, "..." }, X_train.index({ feature, "..." }).to(torch::kInt32));
|
||||
X_test_d.index_put_({ feature, "..." }, X_test.index({ feature, "..." }).to(torch::kInt32));
|
||||
}
|
||||
}
|
||||
X_train = X_train_d;
|
||||
X_test = X_test_d;
|
||||
assert(X_train.dtype() == torch::kInt32);
|
||||
assert(X_test.dtype() == torch::kInt32);
|
||||
computeStates();
|
||||
}
|
||||
assert(y_train.dtype() == torch::kInt32);
|
||||
assert(y_test.dtype() == torch::kInt32);
|
||||
return { X_train, X_test, y_train, y_test };
|
||||
}
|
||||
}
|
@@ -1,77 +1,60 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef DATASET_H
|
||||
#define DATASET_H
|
||||
#include <torch/torch.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <tuple>
|
||||
#include <common/DiscretizationRegister.h>
|
||||
#include "Utils.h"
|
||||
#include "SourceData.h"
|
||||
namespace platform {
|
||||
enum fileType_t { CSV, ARFF, RDATA };
|
||||
class SourceData {
|
||||
public:
|
||||
SourceData(std::string source)
|
||||
{
|
||||
if (source == "Surcov") {
|
||||
path = "datasets/";
|
||||
fileType = CSV;
|
||||
} else if (source == "Arff") {
|
||||
path = "datasets/";
|
||||
fileType = ARFF;
|
||||
} else if (source == "Tanveer") {
|
||||
path = "data/";
|
||||
fileType = RDATA;
|
||||
} else {
|
||||
throw std::invalid_argument("Unknown source.");
|
||||
}
|
||||
}
|
||||
std::string getPath()
|
||||
{
|
||||
return path;
|
||||
}
|
||||
fileType_t getFileType()
|
||||
{
|
||||
return fileType;
|
||||
}
|
||||
private:
|
||||
std::string path;
|
||||
fileType_t fileType;
|
||||
};
|
||||
class Dataset {
|
||||
public:
|
||||
Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType, std::vector<int> numericFeaturesIdx, std::string discretizer_algo = "none") :
|
||||
path(path), name(name), className(className), discretize(discretize),
|
||||
loaded(false), fileType(fileType), numericFeaturesIdx(numericFeaturesIdx), discretizer_algorithm(discretizer_algo)
|
||||
{
|
||||
};
|
||||
explicit Dataset(const Dataset&);
|
||||
std::string getName() const;
|
||||
std::string getClassName() const;
|
||||
int getNClasses() const;
|
||||
std::vector<std::string> getLabels() const; // return the labels factorization result
|
||||
std::vector<int> getClassesCounts() const;
|
||||
std::vector<string> getFeatures() const;
|
||||
std::map<std::string, std::vector<int>> getStates() const;
|
||||
std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
|
||||
std::pair<torch::Tensor&, torch::Tensor&> getTensors();
|
||||
std::tuple<torch::Tensor&, torch::Tensor&, torch::Tensor&, torch::Tensor&> getTrainTestTensors(std::vector<int>& train, std::vector<int>& test);
|
||||
int getNFeatures() const;
|
||||
int getNSamples() const;
|
||||
std::vector<bool>& getNumericFeatures() { return numericFeatures; }
|
||||
void load();
|
||||
const bool inline isLoaded() const { return loaded; };
|
||||
private:
|
||||
std::string path;
|
||||
std::string name;
|
||||
fileType_t fileType;
|
||||
std::string className;
|
||||
int n_samples{ 0 }, n_features{ 0 };
|
||||
std::vector<int> numericFeaturesIdx;
|
||||
std::string discretizer_algorithm;
|
||||
std::vector<bool> numericFeatures; // true if feature is numeric
|
||||
std::vector<std::string> features;
|
||||
std::vector<std::string> labels;
|
||||
std::map<std::string, std::vector<int>> states;
|
||||
bool loaded;
|
||||
bool discretize;
|
||||
torch::Tensor X, y;
|
||||
torch::Tensor X_train, X_test, y_train, y_test;
|
||||
std::vector<std::vector<float>> Xv;
|
||||
std::vector<std::vector<int>> Xd;
|
||||
std::vector<int> yv;
|
||||
void buildTensors();
|
||||
void load_csv();
|
||||
void load_arff();
|
||||
void load_rdata();
|
||||
void computeStates();
|
||||
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
|
||||
public:
|
||||
Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
|
||||
explicit Dataset(const Dataset&);
|
||||
std::string getName() const;
|
||||
std::string getClassName() const;
|
||||
std::vector<string> getFeatures() const;
|
||||
std::map<std::string, std::vector<int>> getStates() const;
|
||||
std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
|
||||
std::pair<vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized();
|
||||
std::pair<torch::Tensor&, torch::Tensor&> getTensors();
|
||||
int getNFeatures() const;
|
||||
int getNSamples() const;
|
||||
void load();
|
||||
const bool inline isLoaded() const { return loaded; };
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@@ -1,32 +1,70 @@
|
||||
#include <fstream>
|
||||
#include "Datasets.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
const std::string message_dataset_not_loaded = "dataset not loaded.";
|
||||
Datasets::Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm) :
|
||||
discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
|
||||
{
|
||||
if ((discretizer_algorithm == "none" || discretizer_algorithm == "") && discretize) {
|
||||
throw std::runtime_error("Can't discretize without discretization algorithm");
|
||||
}
|
||||
load();
|
||||
}
|
||||
void Datasets::load()
|
||||
{
|
||||
auto sd = SourceData(sfileType);
|
||||
fileType = sd.getFileType();
|
||||
path = sd.getPath();
|
||||
ifstream catalog(path + "all.txt");
|
||||
if (catalog.is_open()) {
|
||||
std::vector<int> numericFeaturesIdx;
|
||||
if (!catalog.is_open()) {
|
||||
throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]");
|
||||
}
|
||||
std::string line;
|
||||
while (getline(catalog, line)) {
|
||||
if (line.empty() || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
std::vector<std::string> tokens = split(line, ',');
|
||||
std::vector<std::string> tokens = split(line, ';');
|
||||
std::string name = tokens[0];
|
||||
std::string className;
|
||||
if (tokens.size() == 1) {
|
||||
numericFeaturesIdx.clear();
|
||||
int size = tokens.size();
|
||||
switch (size) {
|
||||
case 1:
|
||||
className = "-1";
|
||||
} else {
|
||||
numericFeaturesIdx.push_back(-1);
|
||||
break;
|
||||
case 2:
|
||||
className = tokens[1];
|
||||
numericFeaturesIdx.push_back(-1);
|
||||
break;
|
||||
case 3:
|
||||
{
|
||||
className = tokens[1];
|
||||
auto numericFeatures = tokens[2];
|
||||
if (numericFeatures == "all") {
|
||||
numericFeaturesIdx.push_back(-1);
|
||||
} else {
|
||||
if (numericFeatures != "none") {
|
||||
auto features = json::parse(numericFeatures);
|
||||
for (auto& f : features) {
|
||||
numericFeaturesIdx.push_back(f);
|
||||
}
|
||||
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument("Invalid catalog file format.");
|
||||
|
||||
}
|
||||
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType, numericFeaturesIdx, discretizer_algorithm);
|
||||
}
|
||||
catalog.close();
|
||||
} else {
|
||||
throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]");
|
||||
}
|
||||
}
|
||||
std::vector<std::string> Datasets::getNames()
|
||||
{
|
||||
@@ -34,94 +72,6 @@ namespace platform {
|
||||
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
|
||||
return result;
|
||||
}
|
||||
std::vector<std::string> Datasets::getFeatures(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return datasets.at(name)->getFeatures();
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
map<std::string, std::vector<int>> Datasets::getStates(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return datasets.at(name)->getStates();
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
void Datasets::loadDataset(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return;
|
||||
} else {
|
||||
datasets.at(name)->load();
|
||||
}
|
||||
}
|
||||
std::string Datasets::getClassName(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return datasets.at(name)->getClassName();
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
int Datasets::getNSamples(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
return datasets.at(name)->getNSamples();
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
int Datasets::getNClasses(const std::string& name)
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
auto className = datasets.at(name)->getClassName();
|
||||
if (discretize) {
|
||||
auto states = getStates(name);
|
||||
return states.at(className).size();
|
||||
}
|
||||
auto [Xv, yv] = getVectors(name);
|
||||
return *std::max_element(yv.begin(), yv.end()) + 1;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
std::vector<int> Datasets::getClassesCounts(const std::string& name) const
|
||||
{
|
||||
if (datasets.at(name)->isLoaded()) {
|
||||
auto [Xv, yv] = datasets.at(name)->getVectors();
|
||||
std::vector<int> counts(*std::max_element(yv.begin(), yv.end()) + 1);
|
||||
for (auto y : yv) {
|
||||
counts[y]++;
|
||||
}
|
||||
return counts;
|
||||
} else {
|
||||
throw std::invalid_argument("Dataset not loaded.");
|
||||
}
|
||||
}
|
||||
pair<std::vector<std::vector<float>>&, std::vector<int>&> Datasets::getVectors(const std::string& name)
|
||||
{
|
||||
if (!datasets[name]->isLoaded()) {
|
||||
datasets[name]->load();
|
||||
}
|
||||
return datasets[name]->getVectors();
|
||||
}
|
||||
pair<std::vector<std::vector<int>>&, std::vector<int>&> Datasets::getVectorsDiscretized(const std::string& name)
|
||||
{
|
||||
if (!datasets[name]->isLoaded()) {
|
||||
datasets[name]->load();
|
||||
}
|
||||
return datasets[name]->getVectorsDiscretized();
|
||||
}
|
||||
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const std::string& name)
|
||||
{
|
||||
if (!datasets[name]->isLoaded()) {
|
||||
datasets[name]->load();
|
||||
}
|
||||
return datasets[name]->getTensors();
|
||||
}
|
||||
bool Datasets::isDataset(const std::string& name) const
|
||||
{
|
||||
return datasets.find(name) != datasets.end();
|
||||
|
@@ -1,29 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef DATASETS_H
|
||||
#define DATASETS_H
|
||||
#include "Dataset.h"
|
||||
namespace platform {
|
||||
class Datasets {
|
||||
public:
|
||||
explicit Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm = "none");
|
||||
std::vector<std::string> getNames();
|
||||
bool isDataset(const std::string& name) const;
|
||||
Dataset& getDataset(const std::string& name) const { return *datasets.at(name); }
|
||||
std::string toString() const;
|
||||
private:
|
||||
std::string path;
|
||||
fileType_t fileType;
|
||||
std::string sfileType;
|
||||
std::string discretizer_algorithm;
|
||||
std::map<std::string, std::unique_ptr<Dataset>> datasets;
|
||||
bool discretize;
|
||||
void load(); // Loads the list of datasets
|
||||
public:
|
||||
explicit Datasets(bool discretize, std::string sfileType) : discretize(discretize), sfileType(sfileType) { load(); };
|
||||
std::vector<string> getNames();
|
||||
std::vector<string> getFeatures(const std::string& name) const;
|
||||
int getNSamples(const std::string& name) const;
|
||||
std::string getClassName(const std::string& name) const;
|
||||
int getNClasses(const std::string& name);
|
||||
std::vector<int> getClassesCounts(const std::string& name) const;
|
||||
std::map<std::string, std::vector<int>> getStates(const std::string& name) const;
|
||||
std::pair<std::vector<std::vector<float>>&, std::vector<int>&> getVectors(const std::string& name);
|
||||
std::pair<std::vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized(const std::string& name);
|
||||
std::pair<torch::Tensor&, torch::Tensor&> getTensors(const std::string& name);
|
||||
bool isDataset(const std::string& name) const;
|
||||
void loadDataset(const std::string& name) const;
|
||||
std::string toString() const;
|
||||
};
|
||||
};
|
||||
#endif
|
55
src/common/Discretization.cpp
Normal file
55
src/common/Discretization.cpp
Normal file
@@ -0,0 +1,55 @@
|
||||
#include "Discretization.h"
|
||||
|
||||
namespace platform {
|
||||
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
|
||||
Discretization* Discretization::factory = nullptr;
|
||||
Discretization* Discretization::instance()
|
||||
{
|
||||
//manages singleton
|
||||
if (factory == nullptr)
|
||||
factory = new Discretization();
|
||||
return factory;
|
||||
}
|
||||
void Discretization::registerFactoryFunction(const std::string& name,
|
||||
function<mdlp::Discretizer* (void)> classFactoryFunction)
|
||||
{
|
||||
// register the class factory function
|
||||
functionRegistry[name] = classFactoryFunction;
|
||||
}
|
||||
std::shared_ptr<mdlp::Discretizer> Discretization::create(const std::string& name)
|
||||
{
|
||||
mdlp::Discretizer* instance = nullptr;
|
||||
|
||||
// find name in the registry and call factory method.
|
||||
auto it = functionRegistry.find(name);
|
||||
if (it != functionRegistry.end())
|
||||
instance = it->second();
|
||||
// wrap instance in a shared ptr and return
|
||||
if (instance != nullptr)
|
||||
return std::unique_ptr<mdlp::Discretizer>(instance);
|
||||
else
|
||||
throw std::runtime_error("Discretizer not found: " + name);
|
||||
}
|
||||
std::vector<std::string> Discretization::getNames()
|
||||
{
|
||||
std::vector<std::string> names;
|
||||
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
|
||||
[](const pair<std::string, function<mdlp::Discretizer* (void)>>& pair) { return pair.first; });
|
||||
return names;
|
||||
}
|
||||
std::string Discretization::toString()
|
||||
{
|
||||
std::string result = "";
|
||||
std::string sep = "";
|
||||
for (const auto& pair : functionRegistry) {
|
||||
result += sep + pair.first;
|
||||
sep = ", ";
|
||||
}
|
||||
return "{" + result + "}";
|
||||
}
|
||||
RegistrarDiscretization::RegistrarDiscretization(const std::string& name, function<mdlp::Discretizer* (void)> classFactoryFunction)
|
||||
{
|
||||
// register the class factory function
|
||||
Discretization::instance()->registerFactoryFunction(name, classFactoryFunction);
|
||||
}
|
||||
}
|
33
src/common/Discretization.h
Normal file
33
src/common/Discretization.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef DISCRETIZATION_H
|
||||
#define DISCRETIZATION_H
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <fimdlp/Discretizer.h>
|
||||
#include <fimdlp/BinDisc.h>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
namespace platform {
|
||||
class Discretization {
|
||||
public:
|
||||
Discretization(Discretization&) = delete;
|
||||
void operator=(const Discretization&) = delete;
|
||||
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
|
||||
static Discretization* instance();
|
||||
std::shared_ptr<mdlp::Discretizer> create(const std::string& name);
|
||||
void registerFactoryFunction(const std::string& name,
|
||||
function<mdlp::Discretizer* (void)> classFactoryFunction);
|
||||
std::vector<string> getNames();
|
||||
std::string toString();
|
||||
private:
|
||||
map<std::string, function<mdlp::Discretizer* (void)>> functionRegistry;
|
||||
static Discretization* factory; //singleton
|
||||
Discretization() {};
|
||||
};
|
||||
class RegistrarDiscretization {
|
||||
public:
|
||||
RegistrarDiscretization(const std::string& className, function<mdlp::Discretizer* (void)> classFactoryFunction);
|
||||
};
|
||||
}
|
||||
#endif
|
38
src/common/DiscretizationRegister.h
Normal file
38
src/common/DiscretizationRegister.h
Normal file
@@ -0,0 +1,38 @@
|
||||
#ifndef DISCRETIZATIONREGISTER_H
|
||||
#define DISCRETIZATIONREGISTER_H
|
||||
#include <common/Discretization.h>
|
||||
static platform::RegistrarDiscretization registrarM("mdlp",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::CPPFImdlp();});
|
||||
static platform::RegistrarDiscretization registrarBU3("bin3u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(3, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ3("bin3q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(3, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU4("bin4u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ4("bin4q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU5("bin5u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ5("bin5q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU6("bin6u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ6("bin6q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU7("bin7u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ7("bin7q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU8("bin8u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ8("bin8q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU9("bin9u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ9("bin9q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::QUANTILE);});
|
||||
static platform::RegistrarDiscretization registrarBU10("bin10u",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::UNIFORM);});
|
||||
static platform::RegistrarDiscretization registrarBQ10("bin10q",
|
||||
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::QUANTILE);});
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef DOTENV_H
|
||||
#define DOTENV_H
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <fstream>
|
||||
@@ -13,9 +13,55 @@ namespace platform {
|
||||
class DotEnv {
|
||||
private:
|
||||
std::map<std::string, std::string> env;
|
||||
std::map<std::string, std::vector<std::string>> valid;
|
||||
public:
|
||||
DotEnv()
|
||||
DotEnv(bool create = false)
|
||||
{
|
||||
valid =
|
||||
{
|
||||
{"depth", {"any"}},
|
||||
{"discretize", {"0", "1"}},
|
||||
{"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q", "bin5q", "bin5u", "bin6q", "bin6u", "bin7q", "bin7u", "bin8q", "bin8u", "bin9q", "bin9u", "bin10q", "bin10u"}},
|
||||
{"experiment", {"discretiz", "odte", "covid", "Test"}},
|
||||
{"fit_features", {"0", "1"}},
|
||||
{"framework", {"bulma", "bootstrap"}},
|
||||
{"ignore_nan", {"0", "1"}},
|
||||
{"leaves", {"any"}},
|
||||
{"margin", {"0.1", "0.2", "0.3"}},
|
||||
{"model", {"any"}},
|
||||
{"n_folds", {"5", "10"}},
|
||||
{"nodes", {"any"}},
|
||||
{"platform", {"any"}},
|
||||
{"stratified", {"0", "1"}},
|
||||
{"score", {"accuracy", "roc-auc-ovr"}},
|
||||
{"seeds", {"any"}},
|
||||
{"smooth_strat", {"ORIGINAL", "LAPLACE", "CESTNIK"}},
|
||||
{"source_data", {"Arff", "Tanveer", "Surcov", "Test"}},
|
||||
};
|
||||
if (create) {
|
||||
// For testing purposes
|
||||
std::ofstream file(".env");
|
||||
file << "experiment=Test" << std::endl;
|
||||
file << "source_data=Test" << std::endl;
|
||||
file << "margin=0.1" << std::endl;
|
||||
file << "score=accuracy" << std::endl;
|
||||
file << "platform=um790Linux" << std::endl;
|
||||
file << "n_folds=5" << std::endl;
|
||||
file << "discretize_algo=mdlp" << std::endl;
|
||||
file << "smooth_strat=ORIGINAL" << std::endl;
|
||||
file << "stratified=0" << std::endl;
|
||||
file << "model=TAN" << std::endl;
|
||||
file << "seeds=[271]" << std::endl;
|
||||
file << "discretize=0" << std::endl;
|
||||
file << "ignore_nan=0" << std::endl;
|
||||
file << "nodes=Nodes" << std::endl;
|
||||
file << "leaves=Edges" << std::endl;
|
||||
file << "depth=States" << std::endl;
|
||||
file << "fit_features=0" << std::endl;
|
||||
file << "framework=bulma" << std::endl;
|
||||
file << "margin=0.1" << std::endl;
|
||||
file.close();
|
||||
}
|
||||
std::ifstream file(".env");
|
||||
if (!file.is_open()) {
|
||||
std::cerr << "File .env not found" << std::endl;
|
||||
@@ -30,12 +76,62 @@ namespace platform {
|
||||
std::istringstream iss(line);
|
||||
std::string key, value;
|
||||
if (std::getline(iss, key, '=') && std::getline(iss, value)) {
|
||||
key = trim(key);
|
||||
value = trim(value);
|
||||
parse(key, value);
|
||||
env[key] = value;
|
||||
}
|
||||
}
|
||||
parseEnv();
|
||||
}
|
||||
void parse(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (valid.find(key) == valid.end()) {
|
||||
std::cerr << "Invalid key in .env: " << key << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
if (valid[key].front() == "any") {
|
||||
return;
|
||||
}
|
||||
if (std::find(valid[key].begin(), valid[key].end(), value) == valid[key].end()) {
|
||||
std::cerr << "Invalid value in .env: " << key << " = " << value << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
std::vector<std::string> valid_tokens(const std::string& key)
|
||||
{
|
||||
if (valid.find(key) == valid.end()) {
|
||||
return {};
|
||||
}
|
||||
return valid.at(key);
|
||||
}
|
||||
std::string valid_values(const std::string& key)
|
||||
{
|
||||
std::string valid_values = "{", sep = "";
|
||||
if (valid.find(key) == valid.end()) {
|
||||
return "{}";
|
||||
}
|
||||
for (const auto& value : valid.at(key)) {
|
||||
valid_values += sep + value;
|
||||
sep = ", ";
|
||||
}
|
||||
return valid_values + "}";
|
||||
}
|
||||
void parseEnv()
|
||||
{
|
||||
for (auto& [key, values] : valid) {
|
||||
if (env.find(key) == env.end()) {
|
||||
std::cerr << "Key not found in .env: " << key << ", valid values: " << valid_values(key) << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::string get(const std::string& key)
|
||||
{
|
||||
if (env.find(key) == env.end()) {
|
||||
std::cerr << "Key not found in .env: " << key << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
return env.at(key);
|
||||
}
|
||||
std::vector<int> getSeeds()
|
||||
@@ -52,3 +148,4 @@ namespace platform {
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,20 +1,35 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef PATHS_H
|
||||
#define PATHS_H
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include "DotEnv.h"
|
||||
namespace platform {
|
||||
class Paths {
|
||||
public:
|
||||
static std::string results() { return "results/"; }
|
||||
static std::string hiddenResults() { return "hidden_results/"; }
|
||||
static std::string excel() { return "excel/"; }
|
||||
static std::string grid() { return "grid/"; }
|
||||
static std::string createIfNotExists(const std::string& folder)
|
||||
{
|
||||
if (!std::filesystem::exists(folder)) {
|
||||
std::filesystem::create_directory(folder);
|
||||
}
|
||||
return folder;
|
||||
}
|
||||
static std::string results() { return createIfNotExists("results/"); }
|
||||
static std::string hiddenResults() { return createIfNotExists("hidden_results/"); }
|
||||
static std::string excel() { return createIfNotExists("excel/"); }
|
||||
static std::string grid() { return createIfNotExists("grid/"); }
|
||||
static std::string graphs() { return createIfNotExists("graphs/"); }
|
||||
static std::string tex() { return createIfNotExists("tex/"); }
|
||||
static std::string datasets()
|
||||
{
|
||||
auto env = platform::DotEnv();
|
||||
return env.get("source_data");
|
||||
}
|
||||
static std::string experiment_file(const std::string& fileName, bool discretize, bool stratified, int seed, int nfold)
|
||||
{
|
||||
std::string disc = discretize ? "_disc_" : "_ndisc_";
|
||||
std::string strat = stratified ? "strat_" : "nstrat_";
|
||||
return "datasets_experiment/" + fileName + disc + strat + std::to_string(seed) + "_" + std::to_string(nfold) + ".json";
|
||||
}
|
||||
static void createPath(const std::string& path)
|
||||
{
|
||||
// Create directory if it does not exist
|
||||
@@ -25,6 +40,14 @@ namespace platform {
|
||||
throw std::runtime_error("Could not create directory " + path);
|
||||
}
|
||||
}
|
||||
static std::string bestResultsFile(const std::string& score, const std::string& model)
|
||||
{
|
||||
return "best_results_" + score + "_" + model + ".json";
|
||||
}
|
||||
static std::string bestResultsExcel(const std::string& score)
|
||||
{
|
||||
return "BestResults_" + score + ".xlsx";
|
||||
}
|
||||
static std::string excelResults() { return "some_results.xlsx"; }
|
||||
static std::string grid_input(const std::string& model)
|
||||
{
|
||||
@@ -34,5 +57,22 @@ namespace platform {
|
||||
{
|
||||
return grid() + "grid_" + model + "_output.json";
|
||||
}
|
||||
static std::string tex_output()
|
||||
{
|
||||
return "results.tex";
|
||||
}
|
||||
static std::string md_output()
|
||||
{
|
||||
return "results.md";
|
||||
}
|
||||
static std::string tex_post_hoc()
|
||||
{
|
||||
return "post_hoc.tex";
|
||||
}
|
||||
static std::string md_post_hoc()
|
||||
{
|
||||
return "post_hoc.md";
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
38
src/common/SourceData.h.in
Normal file
38
src/common/SourceData.h.in
Normal file
@@ -0,0 +1,38 @@
|
||||
#ifndef SOURCEDATA_H
|
||||
#define SOURCEDATA_H
|
||||
namespace platform {
|
||||
enum fileType_t { CSV, ARFF, RDATA };
|
||||
class SourceData {
|
||||
public:
|
||||
SourceData(std::string source)
|
||||
{
|
||||
if (source == "Surcov") {
|
||||
path = "datasets/";
|
||||
fileType = CSV;
|
||||
} else if (source == "Arff") {
|
||||
path = "datasets/";
|
||||
fileType = ARFF;
|
||||
} else if (source == "Tanveer") {
|
||||
path = "data/";
|
||||
fileType = RDATA;
|
||||
} else if (source == "Test") {
|
||||
path = "@TEST_DATA_PATH@/";
|
||||
fileType = ARFF;
|
||||
} else {
|
||||
throw std::invalid_argument("Unknown source.");
|
||||
}
|
||||
}
|
||||
std::string getPath()
|
||||
{
|
||||
return path;
|
||||
}
|
||||
fileType_t getFileType()
|
||||
{
|
||||
return fileType;
|
||||
}
|
||||
private:
|
||||
std::string path;
|
||||
fileType_t fileType;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef SYMBOLS_H
|
||||
#define SYMBOLS_H
|
||||
#include <string>
|
||||
namespace platform {
|
||||
class Symbols {
|
||||
@@ -9,10 +9,13 @@ namespace platform {
|
||||
inline static const std::string black_star{ "\u2605" };
|
||||
inline static const std::string cross{ "\u2717" };
|
||||
inline static const std::string upward_arrow{ "\u27B6" };
|
||||
inline static const std::string down_arrow{ "\u27B4" };
|
||||
inline static const std::string downward_arrow{ "\u2B07" };
|
||||
inline static const std::string downward_arrow{ "\u27B4" };
|
||||
inline static const std::string up_arrow{ "\u2B06" };
|
||||
inline static const std::string down_arrow{ "\u2B07" };
|
||||
inline static const std::string ellipsis{ "\u2026" };
|
||||
inline static const std::string equal_best{ check_mark };
|
||||
inline static const std::string better_best{ black_star };
|
||||
inline static const std::string notebook{ "\U0001F5C8" };
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef TIMER_H
|
||||
#define TIMER_H
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
@@ -40,3 +40,4 @@ namespace platform {
|
||||
}
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif
|
@@ -1,18 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <torch/torch.h>
|
||||
namespace platform {
|
||||
//static std::vector<std::string> split(const std::string& text, char delimiter);
|
||||
static std::vector<std::string> split(const std::string& text, char delimiter)
|
||||
template <typename T>
|
||||
std::vector<T> tensorToVector(const torch::Tensor& tensor)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
std::stringstream ss(text);
|
||||
std::string token;
|
||||
while (std::getline(ss, token, delimiter)) {
|
||||
result.push_back(token);
|
||||
}
|
||||
torch::Tensor contig_tensor = tensor.contiguous();
|
||||
auto num_elements = contig_tensor.numel();
|
||||
const T* tensor_data = contig_tensor.data_ptr<T>();
|
||||
std::vector<T> result(tensor_data, tensor_data + num_elements);
|
||||
return result;
|
||||
}
|
||||
static std::string trim(const std::string& str)
|
||||
@@ -26,4 +26,45 @@ namespace platform {
|
||||
}).base(), result.end());
|
||||
return result;
|
||||
}
|
||||
static std::vector<std::string> split(const std::string& text, char delimiter)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
std::stringstream ss(text);
|
||||
std::string token;
|
||||
while (std::getline(ss, token, delimiter)) {
|
||||
result.push_back(trim(token));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
inline double compute_std(std::vector<double> values, double mean)
|
||||
{
|
||||
// Compute standard devation of the values
|
||||
double sum = 0.0;
|
||||
for (const auto& value : values) {
|
||||
sum += std::pow(value - mean, 2);
|
||||
}
|
||||
double variance = sum / values.size();
|
||||
return std::sqrt(variance);
|
||||
}
|
||||
inline std::string get_date()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%Y-%m-%d");
|
||||
return oss.str();
|
||||
}
|
||||
inline std::string get_time()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%H:%M:%S");
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
#endif
|
@@ -1,12 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef GRIDDATA_H
|
||||
#define GRIDDATA_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
const std::string ALL_DATASETS = "all";
|
||||
class GridData {
|
||||
public:
|
||||
@@ -23,3 +23,4 @@ namespace platform {
|
||||
std::map<std::string, json> grid;
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif
|
@@ -5,29 +5,11 @@
|
||||
#include "main/Models.h"
|
||||
#include "common/Paths.h"
|
||||
#include "common/Colors.h"
|
||||
#include "common/Utils.h"
|
||||
#include "GridSearch.h"
|
||||
|
||||
namespace platform {
|
||||
std::string get_date()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%Y-%m-%d");
|
||||
return oss.str();
|
||||
}
|
||||
std::string get_time()
|
||||
{
|
||||
time_t rawtime;
|
||||
tm* timeinfo;
|
||||
time(&rawtime);
|
||||
timeinfo = std::localtime(&rawtime);
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(timeinfo, "%H:%M:%S");
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string get_color_rank(int rank)
|
||||
{
|
||||
auto colors = { Colors::WHITE(), Colors::RED(), Colors::GREEN(), Colors::BLUE(), Colors::MAGENTA(), Colors::CYAN() };
|
||||
@@ -103,11 +85,11 @@ namespace platform {
|
||||
std::mt19937 g{ 271 }; // Use fixed seed to obtain the same shuffle
|
||||
std::shuffle(tasks.begin(), tasks.end(), g);
|
||||
std::cout << get_color_rank(rank) << "* Number of tasks: " << tasks.size() << std::endl;
|
||||
std::cout << "|";
|
||||
std::cout << separator;
|
||||
for (int i = 0; i < tasks.size(); ++i) {
|
||||
std::cout << (i + 1) % 10;
|
||||
}
|
||||
std::cout << "|" << std::endl << "|" << std::flush;
|
||||
std::cout << separator << std::endl << separator << std::flush;
|
||||
return tasks;
|
||||
}
|
||||
void process_task_mpi_consumer(struct ConfigGrid& config, struct ConfigMPI& config_mpi, json& tasks, int n_task, Datasets& datasets, Task_Result* result)
|
||||
@@ -118,17 +100,18 @@ namespace platform {
|
||||
json task = tasks[n_task];
|
||||
auto model = config.model;
|
||||
auto grid = GridData(Paths::grid_input(model));
|
||||
auto dataset = task["dataset"].get<std::string>();
|
||||
auto dataset_name = task["dataset"].get<std::string>();
|
||||
auto idx_dataset = task["idx_dataset"].get<int>();
|
||||
auto seed = task["seed"].get<int>();
|
||||
auto n_fold = task["fold"].get<int>();
|
||||
bool stratified = config.stratified;
|
||||
// Generate the hyperparamters combinations
|
||||
auto combinations = grid.getGrid(dataset);
|
||||
auto [X, y] = datasets.getTensors(dataset);
|
||||
auto states = datasets.getStates(dataset);
|
||||
auto features = datasets.getFeatures(dataset);
|
||||
auto className = datasets.getClassName(dataset);
|
||||
auto& dataset = datasets.getDataset(dataset_name);
|
||||
auto combinations = grid.getGrid(dataset_name);
|
||||
dataset.load();
|
||||
auto [X, y] = dataset.getTensors();
|
||||
auto features = dataset.getFeatures();
|
||||
auto className = dataset.getClassName();
|
||||
//
|
||||
// Start working on task
|
||||
//
|
||||
@@ -138,14 +121,11 @@ namespace platform {
|
||||
else
|
||||
fold = new folding::KFold(config.n_folds, y.size(0), seed);
|
||||
auto [train, test] = fold->getFold(n_fold);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
auto X_train = X.index({ "...", train_t });
|
||||
auto y_train = y.index({ train_t });
|
||||
auto X_test = X.index({ "...", test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
auto [X_train, X_test, y_train, y_test] = dataset.getTrainTestTensors(train, test);
|
||||
auto states = dataset.getStates(); // Get the states of the features Once they are discretized
|
||||
double best_fold_score = 0.0;
|
||||
int best_idx_combination = -1;
|
||||
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE;
|
||||
json best_fold_hyper;
|
||||
for (int idx_combination = 0; idx_combination < combinations.size(); ++idx_combination) {
|
||||
auto hyperparam_line = combinations[idx_combination];
|
||||
@@ -168,10 +148,10 @@ namespace platform {
|
||||
// Build Classifier with selected hyperparameters
|
||||
auto clf = Models::instance()->create(config.model);
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, dataset);
|
||||
clf->setHyperparameters(hyperparameters.get(dataset));
|
||||
hyperparameters.check(valid, dataset_name);
|
||||
clf->setHyperparameters(hyperparameters.get(dataset_name));
|
||||
// Train model
|
||||
clf->fit(X_nested_train, y_nested_train, features, className, states);
|
||||
clf->fit(X_nested_train, y_nested_train, features, className, states, smoothing);
|
||||
// Test model
|
||||
score += clf->score(X_nested_test, y_nested_test);
|
||||
}
|
||||
@@ -188,9 +168,9 @@ namespace platform {
|
||||
auto hyperparameters = platform::HyperParameters(datasets.getNames(), best_fold_hyper);
|
||||
auto clf = Models::instance()->create(config.model);
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, dataset);
|
||||
hyperparameters.check(valid, dataset_name);
|
||||
clf->setHyperparameters(best_fold_hyper);
|
||||
clf->fit(X_train, y_train, features, className, states);
|
||||
clf->fit(X_train, y_train, features, className, states, smoothing);
|
||||
best_fold_score = clf->score(X_test, y_test);
|
||||
// Return the result
|
||||
result->idx_dataset = task["idx_dataset"].get<int>();
|
||||
@@ -373,14 +353,16 @@ namespace platform {
|
||||
MPI_Bcast(msg, tasks_size + 1, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD);
|
||||
tasks = json::parse(msg);
|
||||
delete[] msg;
|
||||
auto datasets = Datasets(config.discretize, Paths::datasets());
|
||||
auto env = platform::DotEnv();
|
||||
auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretize_algo"));
|
||||
|
||||
if (config_mpi.rank == config_mpi.manager) {
|
||||
//
|
||||
// 2a. Producer delivers the tasks to the consumers
|
||||
//
|
||||
auto datasets_names = filterDatasets(datasets);
|
||||
json all_results = producer(datasets_names, tasks, config_mpi, MPI_Result);
|
||||
std::cout << get_color_rank(config_mpi.rank) << "|" << std::endl;
|
||||
std::cout << get_color_rank(config_mpi.rank) << separator << std::endl;
|
||||
//
|
||||
// 3. Manager select the bests sccores for each dataset
|
||||
//
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef GRIDSEARCH_H
|
||||
#define GRIDSEARCH_H
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <mpi.h>
|
||||
@@ -10,7 +10,7 @@
|
||||
#include "GridData.h"
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
struct ConfigGrid {
|
||||
std::string model;
|
||||
std::string score;
|
||||
@@ -55,5 +55,7 @@ namespace platform {
|
||||
struct ConfigGrid config;
|
||||
json build_tasks_mpi(int rank);
|
||||
Timer timer; // used to measure the time of the whole process
|
||||
const std::string separator = "|";
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif
|
@@ -2,30 +2,55 @@
|
||||
#include "reports/ReportConsole.h"
|
||||
#include "common/Paths.h"
|
||||
#include "Models.h"
|
||||
#include "Scores.h"
|
||||
#include "Experiment.h"
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
void Experiment::saveResult()
|
||||
{
|
||||
result.save();
|
||||
std::cout << "Result saved in " << Paths::results() << result.getFilename() << std::endl;
|
||||
}
|
||||
void Experiment::report()
|
||||
void Experiment::report(bool classification_report)
|
||||
{
|
||||
ReportConsole report(result.getJson());
|
||||
report.show();
|
||||
if (classification_report) {
|
||||
std::cout << report.showClassificationReport(Colors::BLUE());
|
||||
}
|
||||
}
|
||||
void Experiment::show()
|
||||
{
|
||||
std::cout << result.getJson().dump(4) << std::endl;
|
||||
}
|
||||
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score)
|
||||
void Experiment::saveGraph()
|
||||
{
|
||||
std::cout << "Saving graphs..." << std::endl;
|
||||
auto data = result.getJson();
|
||||
for (const auto& item : data["results"]) {
|
||||
auto graphs = item["graph"];
|
||||
int i = 0;
|
||||
for (const auto& graph : graphs) {
|
||||
i++;
|
||||
auto fileName = Paths::graphs() + result.getFilename() + "_graph_" + item["dataset"].get<std::string>() + "_" + std::to_string(i) + ".dot";
|
||||
auto file = std::ofstream(fileName);
|
||||
file << graph.get<std::string>();
|
||||
file.close();
|
||||
std::cout << "Graph saved in " << fileName << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
|
||||
{
|
||||
for (auto fileName : filesToProcess) {
|
||||
if (fileName.size() > max_name)
|
||||
max_name = fileName.size();
|
||||
}
|
||||
std::cout << Colors::MAGENTA() << "*** Starting experiment: " << result.getTitle() << " ***" << Colors::RESET() << std::endl << std::endl;
|
||||
auto clf = Models::instance()->create(result.getModel());
|
||||
auto version = clf->getVersion();
|
||||
std::cout << Colors::BLUE() << " Using " << result.getModel() << " ver. " << version << std::endl << std::endl;
|
||||
if (!quiet) {
|
||||
std::cout << Colors::GREEN() << " Status Meaning" << std::endl;
|
||||
std::cout << " ------ --------------------------------" << Colors::RESET() << std::endl;
|
||||
@@ -33,14 +58,14 @@ namespace platform {
|
||||
std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " ) Scoring train dataset" << std::endl;
|
||||
std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " ) Scoring test dataset" << std::endl << std::endl;
|
||||
std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl;
|
||||
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << std::endl;
|
||||
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << Colors::RESET() << std::endl;
|
||||
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << string(3 * nfolds - 2, ' ') << " Time" << std::endl;
|
||||
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << " ----------" << Colors::RESET() << std::endl;
|
||||
}
|
||||
int num = 0;
|
||||
for (auto fileName : filesToProcess) {
|
||||
if (!quiet)
|
||||
std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush;
|
||||
cross_validation(fileName, quiet, no_train_score);
|
||||
cross_validation(fileName, quiet, no_train_score, generate_fold_files, graph);
|
||||
if (!quiet)
|
||||
std::cout << std::endl;
|
||||
}
|
||||
@@ -60,44 +85,106 @@ namespace platform {
|
||||
return Colors::RESET();
|
||||
}
|
||||
}
|
||||
|
||||
score_t Experiment::parse_score() const
|
||||
{
|
||||
if (result.getScoreName() == "accuracy")
|
||||
return score_t::ACCURACY;
|
||||
if (result.getScoreName() == "roc-auc-ovr")
|
||||
return score_t::ROC_AUC_OVR;
|
||||
throw std::runtime_error("Unknown score: " + result.getScoreName());
|
||||
}
|
||||
void showProgress(int fold, const std::string& color, const std::string& phase)
|
||||
{
|
||||
std::string prefix = phase == "a" ? "" : "\b\b\b\b";
|
||||
std::string prefix = phase == "-" ? "" : "\b\b\b\b";
|
||||
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
|
||||
|
||||
}
|
||||
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score)
|
||||
void generate_files(const std::string& fileName, bool discretize, bool stratified, int seed, int nfold, torch::Tensor X_train, torch::Tensor y_train, torch::Tensor X_test, torch::Tensor y_test, std::vector<int>& train, std::vector<int>& test)
|
||||
{
|
||||
auto datasets = Datasets(discretized, Paths::datasets());
|
||||
// Get dataset
|
||||
auto [X, y] = datasets.getTensors(fileName);
|
||||
auto states = datasets.getStates(fileName);
|
||||
auto features = datasets.getFeatures(fileName);
|
||||
auto samples = datasets.getNSamples(fileName);
|
||||
auto className = datasets.getClassName(fileName);
|
||||
if (!quiet) {
|
||||
std::cout << " " << setw(5) << samples << " " << setw(5) << features.size() << flush;
|
||||
std::string file_name = Paths::experiment_file(fileName, discretize, stratified, seed, nfold);
|
||||
auto file = std::ofstream(file_name);
|
||||
json output;
|
||||
output["seed"] = seed;
|
||||
output["nfold"] = nfold;
|
||||
output["X_train"] = json::array();
|
||||
auto n = X_train.size(1);
|
||||
for (int i = 0; i < X_train.size(0); i++) {
|
||||
if (X_train.dtype() == torch::kFloat32) {
|
||||
auto xvf_ptr = X_train.index({ i }).data_ptr<float>();
|
||||
auto feature = std::vector<float>(xvf_ptr, xvf_ptr + n);
|
||||
output["X_train"].push_back(feature);
|
||||
} else {
|
||||
auto feature = std::vector<int>(X_train.index({ i }).data_ptr<int>(), X_train.index({ i }).data_ptr<int>() + n);
|
||||
output["X_train"].push_back(feature);
|
||||
}
|
||||
}
|
||||
output["y_train"] = std::vector<int>(y_train.data_ptr<int>(), y_train.data_ptr<int>() + n);
|
||||
output["X_test"] = json::array();
|
||||
n = X_test.size(1);
|
||||
for (int i = 0; i < X_test.size(0); i++) {
|
||||
if (X_train.dtype() == torch::kFloat32) {
|
||||
auto xvf_ptr = X_test.index({ i }).data_ptr<float>();
|
||||
auto feature = std::vector<float>(xvf_ptr, xvf_ptr + n);
|
||||
output["X_test"].push_back(feature);
|
||||
} else {
|
||||
auto feature = std::vector<int>(X_test.index({ i }).data_ptr<int>(), X_test.index({ i }).data_ptr<int>() + n);
|
||||
output["X_test"].push_back(feature);
|
||||
}
|
||||
}
|
||||
output["y_test"] = std::vector<int>(y_test.data_ptr<int>(), y_test.data_ptr<int>() + n);
|
||||
output["train"] = train;
|
||||
output["test"] = test;
|
||||
file << output.dump(4);
|
||||
file.close();
|
||||
}
|
||||
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
|
||||
{
|
||||
//
|
||||
// Load dataset and prepare data
|
||||
//
|
||||
auto datasets = Datasets(discretized, Paths::datasets(), discretization_algo);
|
||||
auto& dataset = datasets.getDataset(fileName);
|
||||
dataset.load();
|
||||
auto [X, y] = dataset.getTensors(); // Only need y for folding
|
||||
auto features = dataset.getFeatures();
|
||||
auto n_features = dataset.getNFeatures();
|
||||
auto n_samples = dataset.getNSamples();
|
||||
auto className = dataset.getClassName();
|
||||
auto labels = dataset.getLabels();
|
||||
int num_classes = dataset.getNClasses();
|
||||
if (!quiet) {
|
||||
std::cout << " " << setw(5) << n_samples << " " << setw(5) << n_features << flush;
|
||||
}
|
||||
//
|
||||
// Prepare Result
|
||||
//
|
||||
auto partial_result = PartialResult();
|
||||
auto [values, counts] = at::_unique(y);
|
||||
partial_result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
|
||||
partial_result.setSamples(n_samples).setFeatures(n_features).setClasses(num_classes);
|
||||
partial_result.setHyperparameters(hyperparameters.get(fileName));
|
||||
//
|
||||
// Initialize results std::vectors
|
||||
//
|
||||
int nResults = nfolds * static_cast<int>(randomSeeds.size());
|
||||
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto score_test = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto score_train = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto edges = torch::zeros({ nResults }, torch::kFloat64);
|
||||
auto num_states = torch::zeros({ nResults }, torch::kFloat64);
|
||||
json confusion_matrices = json::array();
|
||||
json confusion_matrices_train = json::array();
|
||||
std::vector<std::string> notes;
|
||||
Timer train_timer, test_timer;
|
||||
std::vector<std::string> graphs;
|
||||
Timer train_timer, test_timer, seed_timer;
|
||||
int item = 0;
|
||||
bool first_seed = true;
|
||||
//
|
||||
// Loop over random seeds
|
||||
//
|
||||
auto score = parse_score();
|
||||
for (auto seed : randomSeeds) {
|
||||
seed_timer.start();
|
||||
if (!quiet) {
|
||||
string prefix = " ";
|
||||
if (!first_seed) {
|
||||
@@ -110,26 +197,33 @@ namespace platform {
|
||||
if (stratified)
|
||||
fold = new folding::StratifiedKFold(nfolds, y, seed);
|
||||
else
|
||||
fold = new folding::KFold(nfolds, y.size(0), seed);
|
||||
fold = new folding::KFold(nfolds, n_samples, seed);
|
||||
//
|
||||
// Loop over folds
|
||||
//
|
||||
for (int nfold = 0; nfold < nfolds; nfold++) {
|
||||
auto clf = Models::instance()->create(result.getModel());
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "-");
|
||||
setModelVersion(clf->getVersion());
|
||||
auto valid = clf->getValidHyperparameters();
|
||||
hyperparameters.check(valid, fileName);
|
||||
clf->setHyperparameters(hyperparameters.get(fileName));
|
||||
//
|
||||
// Split train - test dataset
|
||||
//
|
||||
train_timer.start();
|
||||
auto [train, test] = fold->getFold(nfold);
|
||||
auto train_t = torch::tensor(train);
|
||||
auto test_t = torch::tensor(test);
|
||||
auto X_train = X.index({ "...", train_t });
|
||||
auto y_train = y.index({ train_t });
|
||||
auto X_test = X.index({ "...", test_t });
|
||||
auto y_test = y.index({ test_t });
|
||||
auto [X_train, X_test, y_train, y_test] = dataset.getTrainTestTensors(train, test);
|
||||
auto states = dataset.getStates(); // Get the states of the features Once they are discretized
|
||||
if (generate_fold_files)
|
||||
generate_files(fileName, discretized, stratified, seed, nfold, X_train, y_train, X_test, y_test, train, test);
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "a");
|
||||
//
|
||||
// Train model
|
||||
clf->fit(X_train, y_train, features, className, states);
|
||||
//
|
||||
clf->fit(X_train, y_train, features, className, states, smooth_type);
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
|
||||
auto clf_notes = clf->getNotes();
|
||||
@@ -139,37 +233,67 @@ namespace platform {
|
||||
edges[item] = clf->getNumberOfEdges();
|
||||
num_states[item] = clf->getNumberOfStates();
|
||||
train_time[item] = train_timer.getDuration();
|
||||
double accuracy_train_value = 0.0;
|
||||
double score_train_value = 0.0;
|
||||
//
|
||||
// Score train
|
||||
if (!no_train_score)
|
||||
accuracy_train_value = clf->score(X_train, y_train);
|
||||
//
|
||||
if (!no_train_score) {
|
||||
auto y_proba_train = clf->predict_proba(X_train);
|
||||
Scores scores(y_train, y_proba_train, num_classes, labels);
|
||||
score_train_value = score == score_t::ACCURACY ? scores.accuracy() : scores.auc();
|
||||
confusion_matrices_train.push_back(scores.get_confusion_matrix_json(true));
|
||||
}
|
||||
//
|
||||
// Test model
|
||||
//
|
||||
if (!quiet)
|
||||
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
|
||||
test_timer.start();
|
||||
auto accuracy_test_value = clf->score(X_test, y_test);
|
||||
// auto y_predict = clf->predict(X_test);
|
||||
auto y_proba_test = clf->predict_proba(X_test);
|
||||
Scores scores(y_test, y_proba_test, num_classes, labels);
|
||||
auto score_test_value = score == score_t::ACCURACY ? scores.accuracy() : scores.auc();
|
||||
test_time[item] = test_timer.getDuration();
|
||||
accuracy_train[item] = accuracy_train_value;
|
||||
accuracy_test[item] = accuracy_test_value;
|
||||
score_train[item] = score_train_value;
|
||||
score_test[item] = score_test_value;
|
||||
confusion_matrices.push_back(scores.get_confusion_matrix_json(true));
|
||||
if (!quiet)
|
||||
std::cout << "\b\b\b, " << flush;
|
||||
//
|
||||
// Store results and times in std::vector
|
||||
partial_result.addScoreTrain(accuracy_train_value);
|
||||
partial_result.addScoreTest(accuracy_test_value);
|
||||
//
|
||||
partial_result.addScoreTrain(score_train_value);
|
||||
partial_result.addScoreTest(score_test_value);
|
||||
partial_result.addTimeTrain(train_time[item].item<double>());
|
||||
partial_result.addTimeTest(test_time[item].item<double>());
|
||||
item++;
|
||||
if (graph) {
|
||||
std::string result = "";
|
||||
for (const auto& line : clf->graph()) {
|
||||
result += line + "\n";
|
||||
}
|
||||
graphs.push_back(result);
|
||||
}
|
||||
}
|
||||
if (!quiet) {
|
||||
seed_timer.stop();
|
||||
std::cout << "end. [" << seed_timer.getDurationString() << "]" << std::endl;
|
||||
}
|
||||
if (!quiet)
|
||||
std::cout << "end. " << flush;
|
||||
delete fold;
|
||||
}
|
||||
partial_result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
|
||||
partial_result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
|
||||
//
|
||||
// Store result totals in Result
|
||||
//
|
||||
partial_result.setGraph(graphs);
|
||||
partial_result.setScoreTest(torch::mean(score_test).item<double>()).setScoreTrain(torch::mean(score_train).item<double>());
|
||||
partial_result.setScoreTestStd(torch::std(score_test).item<double>()).setScoreTrainStd(torch::std(score_train).item<double>());
|
||||
partial_result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
||||
partial_result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
|
||||
partial_result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
|
||||
partial_result.setDataset(fileName).setNotes(notes);
|
||||
partial_result.setConfusionMatrices(confusion_matrices);
|
||||
if (!no_train_score)
|
||||
partial_result.setConfusionMatricesTrain(confusion_matrices_train);
|
||||
addResult(partial_result);
|
||||
}
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef EXPERIMENT_H
|
||||
#define EXPERIMENT_H
|
||||
#include <torch/torch.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <string>
|
||||
@@ -7,10 +7,11 @@
|
||||
#include "bayesnet/BaseClassifier.h"
|
||||
#include "HyperParameters.h"
|
||||
#include "results/Result.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
enum class score_t { NONE, ACCURACY, ROC_AUC_OVR };
|
||||
class Experiment {
|
||||
public:
|
||||
Experiment() = default;
|
||||
@@ -20,6 +21,25 @@ namespace platform {
|
||||
Experiment& setModelVersion(const std::string& model_version) { this->result.setModelVersion(model_version); return *this; }
|
||||
Experiment& setModel(const std::string& model) { this->result.setModel(model); return *this; }
|
||||
Experiment& setLanguage(const std::string& language) { this->result.setLanguage(language); return *this; }
|
||||
Experiment& setDiscretizationAlgorithm(const std::string& discretization_algo)
|
||||
{
|
||||
this->discretization_algo = discretization_algo; this->result.setDiscretizationAlgorithm(discretization_algo); return *this;
|
||||
}
|
||||
Experiment& setSmoothSrategy(const std::string& smooth_strategy)
|
||||
{
|
||||
this->smooth_strategy = smooth_strategy; this->result.setSmoothStrategy(smooth_strategy);
|
||||
if (smooth_strategy == "ORIGINAL")
|
||||
smooth_type = bayesnet::Smoothing_t::ORIGINAL;
|
||||
else if (smooth_strategy == "LAPLACE")
|
||||
smooth_type = bayesnet::Smoothing_t::LAPLACE;
|
||||
else if (smooth_strategy == "CESTNIK")
|
||||
smooth_type = bayesnet::Smoothing_t::CESTNIK;
|
||||
else {
|
||||
std::cerr << "Experiment: Unknown smoothing strategy: " << smooth_strategy << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
Experiment& setLanguageVersion(const std::string& language_version) { this->result.setLanguageVersion(language_version); return *this; }
|
||||
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; result.setDiscretized(discretized); return *this; }
|
||||
Experiment& setStratified(bool stratified) { this->stratified = stratified; result.setStratified(stratified); return *this; }
|
||||
@@ -28,18 +48,24 @@ namespace platform {
|
||||
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); result.addSeed(randomSeed); return *this; }
|
||||
Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; }
|
||||
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
|
||||
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score);
|
||||
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score);
|
||||
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
|
||||
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
|
||||
void saveResult();
|
||||
void show();
|
||||
void report();
|
||||
void saveGraph();
|
||||
void report(bool classification_report = false);
|
||||
private:
|
||||
score_t parse_score() const;
|
||||
Result result;
|
||||
bool discretized{ false }, stratified{ false };
|
||||
std::vector<PartialResult> results;
|
||||
std::vector<int> randomSeeds;
|
||||
std::string discretization_algo;
|
||||
std::string smooth_strategy;
|
||||
bayesnet::Smoothing_t smooth_type{ bayesnet::Smoothing_t::NONE };
|
||||
HyperParameters hyperparameters;
|
||||
int nfolds{ 0 };
|
||||
int max_name{ 7 }; // max length of dataset name for formatting (default 7)
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -10,16 +10,9 @@ namespace platform {
|
||||
for (const auto& item : datasets) {
|
||||
hyperparameters[item] = hyperparameters_;
|
||||
}
|
||||
normalize_nested(datasets);
|
||||
}
|
||||
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/
|
||||
std::string join(std::vector<std::string> const& strings, std::string delim)
|
||||
{
|
||||
std::stringstream ss;
|
||||
std::copy(strings.begin(), strings.end(),
|
||||
std::ostream_iterator<std::string>(ss, delim.c_str()));
|
||||
return ss.str();
|
||||
}
|
||||
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file)
|
||||
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file, bool best)
|
||||
{
|
||||
// Check if file exists
|
||||
std::ifstream file(hyperparameters_file);
|
||||
@@ -28,7 +21,14 @@ namespace platform {
|
||||
}
|
||||
// Check if file is a json
|
||||
json file_hyperparameters = json::parse(file);
|
||||
auto input_hyperparameters = file_hyperparameters["results"];
|
||||
json input_hyperparameters;
|
||||
if (best) {
|
||||
for (const auto& [key, value] : file_hyperparameters.items()) {
|
||||
input_hyperparameters[key]["hyperparameters"] = value[1];
|
||||
}
|
||||
} else {
|
||||
input_hyperparameters = file_hyperparameters["results"];
|
||||
}
|
||||
// Check if hyperparameters are valid
|
||||
for (const auto& dataset : datasets) {
|
||||
if (!input_hyperparameters.contains(dataset)) {
|
||||
@@ -38,6 +38,24 @@ namespace platform {
|
||||
}
|
||||
hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get<json>();
|
||||
}
|
||||
normalize_nested(datasets);
|
||||
}
|
||||
void HyperParameters::normalize_nested(const std::vector<std::string>& datasets)
|
||||
{
|
||||
// for (const auto& dataset : datasets) {
|
||||
// if (hyperparameters[dataset].contains("be_hyperparams")) {
|
||||
// // Odte has base estimator hyperparameters set this way
|
||||
// hyperparameters[dataset]["be_hyperparams"] = hyperparameters[dataset]["be_hyperparams"].dump();
|
||||
// }
|
||||
// }
|
||||
}
|
||||
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/
|
||||
std::string join(std::vector<std::string> const& strings, std::string delim)
|
||||
{
|
||||
std::stringstream ss;
|
||||
std::copy(strings.begin(), strings.end(),
|
||||
std::ostream_iterator<std::string>(ss, delim.c_str()));
|
||||
return ss.str();
|
||||
}
|
||||
void HyperParameters::check(const std::vector<std::string>& valid, const std::string& fileName)
|
||||
{
|
||||
|
@@ -1,22 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef HYPERPARAMETERS_H
|
||||
#define HYPERPARAMETERS_H
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
class HyperParameters {
|
||||
public:
|
||||
HyperParameters() = default;
|
||||
// Constructor to use command line hyperparameters
|
||||
explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_);
|
||||
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file);
|
||||
// Constructor to use hyperparameters file generated by grid or by best results
|
||||
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file, bool best = false);
|
||||
~HyperParameters() = default;
|
||||
bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); }
|
||||
void check(const std::vector<std::string>& valid, const std::string& fileName);
|
||||
json get(const std::string& fileName);
|
||||
private:
|
||||
void normalize_nested(const std::vector<std::string>& datasets);
|
||||
std::map<std::string, json> hyperparameters;
|
||||
bool best = false; // Used to separate grid/best hyperparameters as the format of those files are different
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif
|
@@ -1,16 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef MODELS_H
|
||||
#define MODELS_H
|
||||
#include <map>
|
||||
#include <bayesnet/BaseClassifier.h>
|
||||
#include <bayesnet/ensembles/AODE.h>
|
||||
#include <bayesnet/ensembles/A2DE.h>
|
||||
#include <bayesnet/ensembles/AODELd.h>
|
||||
#include <bayesnet/ensembles/BoostAODE.h>
|
||||
#include <bayesnet/ensembles/BoostA2DE.h>
|
||||
#include <bayesnet/classifiers/TAN.h>
|
||||
#include <bayesnet/classifiers/KDB.h>
|
||||
#include <bayesnet/classifiers/SPODE.h>
|
||||
#include <bayesnet/classifiers/SPnDE.h>
|
||||
#include <bayesnet/classifiers/TANLd.h>
|
||||
#include <bayesnet/classifiers/KDBLd.h>
|
||||
#include <bayesnet/classifiers/SPODELd.h>
|
||||
#include <bayesnet/classifiers/SPODELd.h>
|
||||
#include <pyclassifiers/STree.h>
|
||||
#include <pyclassifiers/ODTE.h>
|
||||
#include <pyclassifiers/SVC.h>
|
||||
@@ -18,10 +22,6 @@
|
||||
#include <pyclassifiers/RandomForest.h>
|
||||
namespace platform {
|
||||
class Models {
|
||||
private:
|
||||
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
|
||||
static Models* factory; //singleton
|
||||
Models() {};
|
||||
public:
|
||||
Models(Models&) = delete;
|
||||
void operator=(const Models&) = delete;
|
||||
@@ -32,10 +32,14 @@ namespace platform {
|
||||
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
|
||||
std::vector<string> getNames();
|
||||
std::string toString();
|
||||
|
||||
private:
|
||||
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
|
||||
static Models* factory; //singleton
|
||||
Models() {};
|
||||
};
|
||||
class Registrar {
|
||||
public:
|
||||
Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,10 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef PARTIAL_RESULT_H
|
||||
#define PARTIAL_RESULT_H
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
class PartialResult {
|
||||
|
||||
public:
|
||||
@@ -15,6 +15,7 @@ namespace platform {
|
||||
data["times_train"] = json::array();
|
||||
data["times_test"] = json::array();
|
||||
data["notes"] = json::array();
|
||||
data["graph"] = json::array();
|
||||
data["train_time"] = 0.0;
|
||||
data["train_time_std"] = 0.0;
|
||||
data["test_time"] = 0.0;
|
||||
@@ -27,6 +28,14 @@ namespace platform {
|
||||
data["notes"].insert(data["notes"].end(), notes_.begin(), notes_.end());
|
||||
return *this;
|
||||
}
|
||||
PartialResult& setGraph(const std::vector<std::string>& graph)
|
||||
{
|
||||
json graph_ = graph;
|
||||
data["graph"].insert(data["graph"].end(), graph_.begin(), graph_.end());
|
||||
return *this;
|
||||
}
|
||||
PartialResult& setConfusionMatrices(const json& confusion_matrices) { data["confusion_matrices"] = confusion_matrices; return *this; }
|
||||
PartialResult& setConfusionMatricesTrain(const json& confusion_matrices) { data["confusion_matrices_train"] = confusion_matrices; return *this; }
|
||||
PartialResult& setHyperparameters(const json& hyperparameters) { data["hyperparameters"] = hyperparameters; return *this; }
|
||||
PartialResult& setSamples(int samples) { data["samples"] = samples; return *this; }
|
||||
PartialResult& setFeatures(int features) { data["features"] = features; return *this; }
|
||||
@@ -71,3 +80,4 @@ namespace platform {
|
||||
json data;
|
||||
};
|
||||
}
|
||||
#endif
|
67
src/main/RocAuc.cpp
Normal file
67
src/main/RocAuc.cpp
Normal file
@@ -0,0 +1,67 @@
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
#include "RocAuc.h"
|
||||
namespace platform {
|
||||
|
||||
double RocAuc::compute(const torch::Tensor& y_proba, const torch::Tensor& labels)
|
||||
{
|
||||
size_t nClasses = y_proba.size(1);
|
||||
// In binary classification problem there's no need to calculate the average of the AUCs
|
||||
if (nClasses == 2)
|
||||
nClasses = 1;
|
||||
size_t nSamples = y_proba.size(0);
|
||||
y_test = tensorToVector(labels);
|
||||
std::vector<double> aucScores(nClasses, 0.0);
|
||||
for (size_t classIdx = 0; classIdx < nClasses; ++classIdx) {
|
||||
scoresAndLabels.clear();
|
||||
for (size_t i = 0; i < nSamples; ++i) {
|
||||
scoresAndLabels.emplace_back(y_proba[i][classIdx].item<float>(), y_test[i] == classIdx ? 1 : 0);
|
||||
}
|
||||
aucScores[classIdx] = compute_common(nSamples, classIdx);
|
||||
}
|
||||
return std::accumulate(aucScores.begin(), aucScores.end(), 0.0) / nClasses;
|
||||
}
|
||||
double RocAuc::compute(const std::vector<std::vector<double>>& y_proba, const std::vector<int>& labels)
|
||||
{
|
||||
y_test = labels;
|
||||
size_t nClasses = y_proba[0].size();
|
||||
// In binary classification problem there's no need to calculate the average of the AUCs
|
||||
if (nClasses == 2)
|
||||
nClasses = 1;
|
||||
size_t nSamples = y_proba.size();
|
||||
std::vector<double> aucScores(nClasses, 0.0);
|
||||
for (size_t classIdx = 0; classIdx < nClasses; ++classIdx) {
|
||||
scoresAndLabels.clear();
|
||||
for (size_t i = 0; i < nSamples; ++i) {
|
||||
scoresAndLabels.emplace_back(y_proba[i][classIdx], labels[i] == classIdx ? 1 : 0);
|
||||
}
|
||||
aucScores[classIdx] = compute_common(nSamples, classIdx);
|
||||
}
|
||||
return std::accumulate(aucScores.begin(), aucScores.end(), 0.0) / nClasses;
|
||||
}
|
||||
double RocAuc::compute_common(size_t nSamples, size_t classIdx)
|
||||
{
|
||||
std::sort(scoresAndLabels.begin(), scoresAndLabels.end(), std::greater<>());
|
||||
std::vector<double> tpr, fpr;
|
||||
double tp = 0, fp = 0;
|
||||
double totalPos = std::count(y_test.begin(), y_test.end(), classIdx);
|
||||
double totalNeg = nSamples - totalPos;
|
||||
|
||||
for (const auto& [score, label] : scoresAndLabels) {
|
||||
if (label == 1) {
|
||||
tp += 1;
|
||||
} else {
|
||||
fp += 1;
|
||||
}
|
||||
tpr.push_back(tp / totalPos);
|
||||
fpr.push_back(fp / totalNeg);
|
||||
}
|
||||
double auc = 0.0;
|
||||
for (size_t i = 1; i < tpr.size(); ++i) {
|
||||
auc += 0.5 * (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]);
|
||||
}
|
||||
return auc;
|
||||
}
|
||||
}
|
21
src/main/RocAuc.h
Normal file
21
src/main/RocAuc.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef ROCAUC_H
|
||||
#define ROCAUC_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
class RocAuc {
|
||||
public:
|
||||
RocAuc() = default;
|
||||
double compute(const std::vector<std::vector<double>>& y_proba, const std::vector<int>& y_test);
|
||||
double compute(const torch::Tensor& y_proba, const torch::Tensor& y_test);
|
||||
private:
|
||||
double compute_common(size_t nSamples, size_t classIdx);
|
||||
std::vector<std::pair<double, int>> scoresAndLabels;
|
||||
std::vector<int> y_test;
|
||||
};
|
||||
}
|
||||
#endif
|
270
src/main/Scores.cpp
Normal file
270
src/main/Scores.cpp
Normal file
@@ -0,0 +1,270 @@
|
||||
#include <sstream>
|
||||
#include "Scores.h"
|
||||
#include "common/Utils.h" // tensorToVector
|
||||
#include "common/Colors.h"
|
||||
namespace platform {
|
||||
Scores::Scores(torch::Tensor& y_test, torch::Tensor& y_proba, int num_classes, std::vector<std::string> labels) : num_classes(num_classes), labels(labels), y_test(y_test), y_proba(y_proba)
|
||||
{
|
||||
if (labels.size() == 0) {
|
||||
init_default_labels();
|
||||
}
|
||||
total = y_test.size(0);
|
||||
auto y_pred = y_proba.argmax(1);
|
||||
accuracy_value = (y_pred == y_test).sum().item<float>() / total;
|
||||
init_confusion_matrix();
|
||||
for (int i = 0; i < total; i++) {
|
||||
int actual = y_test[i].item<int>();
|
||||
int predicted = y_pred[i].item<int>();
|
||||
confusion_matrix[actual][predicted] += 1;
|
||||
}
|
||||
}
|
||||
Scores::Scores(const json& confusion_matrix_)
|
||||
{
|
||||
json values;
|
||||
total = 0;
|
||||
num_classes = confusion_matrix_.size();
|
||||
init_confusion_matrix();
|
||||
int i = 0;
|
||||
for (const auto& item : confusion_matrix_.items()) {
|
||||
values = item.value();
|
||||
json key = item.key();
|
||||
if (key.is_number_integer()) {
|
||||
labels.push_back("Class " + std::to_string(key.get<int>()));
|
||||
} else {
|
||||
labels.push_back(key.get<std::string>());
|
||||
}
|
||||
for (int j = 0; j < num_classes; ++j) {
|
||||
int value_int = values[j].get<int>();
|
||||
confusion_matrix[i][j] = value_int;
|
||||
total += value_int;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
compute_accuracy_value();
|
||||
}
|
||||
float Scores::auc()
|
||||
{
|
||||
size_t nSamples = y_test.numel();
|
||||
if (nSamples == 0) return 0;
|
||||
// In binary classification problem there's no need to calculate the average of the AUCs
|
||||
auto nClasses = num_classes;
|
||||
if (num_classes == 2)
|
||||
nClasses = 1;
|
||||
auto y_testv = tensorToVector<int>(y_test);
|
||||
std::vector<double> aucScores(nClasses, 0.0);
|
||||
std::vector<std::pair<double, int>> scoresAndLabels;
|
||||
for (size_t classIdx = 0; classIdx < nClasses; ++classIdx) {
|
||||
if (classIdx >= y_proba.size(1)) {
|
||||
std::cerr << "AUC warning - class index out of range" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
scoresAndLabels.clear();
|
||||
for (size_t i = 0; i < nSamples; ++i) {
|
||||
scoresAndLabels.emplace_back(y_proba[i][classIdx].item<float>(), y_testv[i] == classIdx ? 1 : 0);
|
||||
}
|
||||
std::sort(scoresAndLabels.begin(), scoresAndLabels.end(), std::greater<>());
|
||||
std::vector<double> tpr, fpr;
|
||||
double tp = 0, fp = 0;
|
||||
double totalPos = std::count(y_testv.begin(), y_testv.end(), classIdx);
|
||||
double totalNeg = nSamples - totalPos;
|
||||
for (const auto& [score, label] : scoresAndLabels) {
|
||||
if (label == 1) {
|
||||
tp += 1;
|
||||
} else {
|
||||
fp += 1;
|
||||
}
|
||||
tpr.push_back(tp / totalPos);
|
||||
fpr.push_back(fp / totalNeg);
|
||||
}
|
||||
double auc = 0.0;
|
||||
for (size_t i = 1; i < tpr.size(); ++i) {
|
||||
auc += 0.5 * (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]);
|
||||
}
|
||||
aucScores[classIdx] = auc;
|
||||
}
|
||||
return std::accumulate(aucScores.begin(), aucScores.end(), 0.0) / nClasses;
|
||||
}
|
||||
Scores Scores::create_aggregate(const json& data, const std::string key)
|
||||
{
|
||||
auto scores = Scores(data[key][0]);
|
||||
for (int i = 1; i < data[key].size(); i++) {
|
||||
auto score = Scores(data[key][i]);
|
||||
scores.aggregate(score);
|
||||
}
|
||||
return scores;
|
||||
}
|
||||
void Scores::compute_accuracy_value()
|
||||
{
|
||||
accuracy_value = 0;
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
accuracy_value += confusion_matrix[i][i].item<int>();
|
||||
}
|
||||
accuracy_value /= total;
|
||||
accuracy_value = std::min(accuracy_value, 1.0f);
|
||||
}
|
||||
void Scores::init_confusion_matrix()
|
||||
{
|
||||
confusion_matrix = torch::zeros({ num_classes, num_classes }, torch::kInt32);
|
||||
}
|
||||
void Scores::init_default_labels()
|
||||
{
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
labels.push_back("Class " + std::to_string(i));
|
||||
}
|
||||
}
|
||||
void Scores::aggregate(const Scores& a)
|
||||
{
|
||||
if (a.num_classes != num_classes)
|
||||
throw std::invalid_argument("The number of classes must be the same");
|
||||
confusion_matrix += a.confusion_matrix;
|
||||
total += a.total;
|
||||
compute_accuracy_value();
|
||||
}
|
||||
float Scores::accuracy()
|
||||
{
|
||||
return accuracy_value;
|
||||
}
|
||||
float Scores::f1_score(int num_class)
|
||||
{
|
||||
// Compute f1_score in a one vs rest fashion
|
||||
auto precision_value = precision(num_class);
|
||||
auto recall_value = recall(num_class);
|
||||
if (precision_value + recall_value == 0) return 0; // Avoid division by zero (0/0 = 0)
|
||||
return 2 * precision_value * recall_value / (precision_value + recall_value);
|
||||
}
|
||||
float Scores::f1_weighted()
|
||||
{
|
||||
float f1_weighted = 0;
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
f1_weighted += confusion_matrix[i].sum().item<int>() * f1_score(i);
|
||||
}
|
||||
return f1_weighted / total;
|
||||
}
|
||||
float Scores::f1_macro()
|
||||
{
|
||||
float f1_macro = 0;
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
f1_macro += f1_score(i);
|
||||
}
|
||||
return f1_macro / num_classes;
|
||||
}
|
||||
float Scores::precision(int num_class)
|
||||
{
|
||||
int tp = confusion_matrix[num_class][num_class].item<int>();
|
||||
int fp = confusion_matrix.index({ "...", num_class }).sum().item<int>() - tp;
|
||||
int fn = confusion_matrix[num_class].sum().item<int>() - tp;
|
||||
if (tp + fp == 0) return 0; // Avoid division by zero (0/0 = 0
|
||||
return float(tp) / (tp + fp);
|
||||
}
|
||||
float Scores::recall(int num_class)
|
||||
{
|
||||
int tp = confusion_matrix[num_class][num_class].item<int>();
|
||||
int fp = confusion_matrix.index({ "...", num_class }).sum().item<int>() - tp;
|
||||
int fn = confusion_matrix[num_class].sum().item<int>() - tp;
|
||||
if (tp + fn == 0) return 0; // Avoid division by zero (0/0 = 0
|
||||
return float(tp) / (tp + fn);
|
||||
}
|
||||
std::string Scores::classification_report_line(std::string label, float precision, float recall, float f1_score, int support)
|
||||
{
|
||||
std::stringstream oss;
|
||||
oss << std::right << std::setw(label_len) << label << " ";
|
||||
if (precision == 0) {
|
||||
oss << std::string(dlen, ' ') << " ";
|
||||
} else {
|
||||
oss << std::setw(dlen) << std::setprecision(ndec) << std::fixed << precision << " ";
|
||||
}
|
||||
if (recall == 0) {
|
||||
oss << std::string(dlen, ' ') << " ";
|
||||
} else {
|
||||
oss << std::setw(dlen) << std::setprecision(ndec) << std::fixed << recall << " ";
|
||||
}
|
||||
oss << std::setw(dlen) << std::setprecision(ndec) << std::fixed << f1_score << " "
|
||||
<< std::setw(dlen) << std::right << support;
|
||||
return oss.str();
|
||||
}
|
||||
std::tuple<float, float, float, float> Scores::compute_averages()
|
||||
{
|
||||
float precision_avg = 0;
|
||||
float recall_avg = 0;
|
||||
float precision_wavg = 0;
|
||||
float recall_wavg = 0;
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
int support = confusion_matrix[i].sum().item<int>();
|
||||
precision_avg += precision(i);
|
||||
precision_wavg += precision(i) * support;
|
||||
recall_avg += recall(i);
|
||||
recall_wavg += recall(i) * support;
|
||||
}
|
||||
precision_wavg /= total;
|
||||
recall_wavg /= total;
|
||||
precision_avg /= num_classes;
|
||||
recall_avg /= num_classes;
|
||||
return { precision_avg, recall_avg, precision_wavg, recall_wavg };
|
||||
}
|
||||
std::vector<std::string> Scores::classification_report(std::string color, std::string title)
|
||||
{
|
||||
std::stringstream oss;
|
||||
std::vector<std::string> report;
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
label_len = std::max(label_len, (int)labels[i].size());
|
||||
}
|
||||
report.push_back("Classification Report using " + title + " dataset");
|
||||
report.push_back("=========================================");
|
||||
oss << std::string(label_len, ' ') << " precision recall f1-score support";
|
||||
report.push_back(oss.str()); oss.str("");
|
||||
oss << std::string(label_len, ' ') << " ========= ========= ========= =========";
|
||||
report.push_back(oss.str()); oss.str("");
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
report.push_back(classification_report_line(labels[i], precision(i), recall(i), f1_score(i), confusion_matrix[i].sum().item<int>()));
|
||||
}
|
||||
report.push_back(" ");
|
||||
oss << classification_report_line("accuracy", 0, 0, accuracy(), total);
|
||||
report.push_back(oss.str()); oss.str("");
|
||||
auto [precision_avg, recall_avg, precision_wavg, recall_wavg] = compute_averages();
|
||||
report.push_back(classification_report_line("macro avg", precision_avg, recall_avg, f1_macro(), total));
|
||||
report.push_back(classification_report_line("weighted avg", precision_wavg, recall_wavg, f1_weighted(), total));
|
||||
report.push_back("");
|
||||
report.push_back("Confusion Matrix");
|
||||
report.push_back("================");
|
||||
auto number = total > 1000 ? 4 : 3;
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
oss << std::right << std::setw(label_len) << labels[i] << " ";
|
||||
for (int j = 0; j < num_classes; j++) {
|
||||
if (i == j) oss << Colors::GREEN();
|
||||
oss << std::setw(number) << confusion_matrix[i][j].item<int>() << " ";
|
||||
if (i == j) oss << color;
|
||||
}
|
||||
report.push_back(oss.str()); oss.str("");
|
||||
}
|
||||
return report;
|
||||
}
|
||||
json Scores::classification_report_json(std::string title)
|
||||
{
|
||||
json output;
|
||||
output["title"] = "Classification Report using " + title + " dataset";
|
||||
output["headers"] = { " ", "precision", "recall", "f1-score", "support" };
|
||||
output["body"] = {};
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
output["body"].push_back({ labels[i], precision(i), recall(i), f1_score(i), confusion_matrix[i].sum().item<int>() });
|
||||
}
|
||||
output["accuracy"] = { "accuracy", 0, 0, accuracy(), total };
|
||||
auto [precision_avg, recall_avg, precision_wavg, recall_wavg] = compute_averages();
|
||||
output["averages"] = { "macro avg", precision_avg, recall_avg, f1_macro(), total };
|
||||
output["weighted"] = { "weighted avg", precision_wavg, recall_wavg, f1_weighted(), total };
|
||||
output["confusion_matrix"] = get_confusion_matrix_json();
|
||||
return output;
|
||||
}
|
||||
json Scores::get_confusion_matrix_json(bool labels_as_keys)
|
||||
{
|
||||
json output;
|
||||
for (int i = 0; i < num_classes; i++) {
|
||||
auto r_ptr = confusion_matrix[i].data_ptr<int>();
|
||||
if (labels_as_keys) {
|
||||
output[labels[i]] = std::vector<int>(r_ptr, r_ptr + num_classes);
|
||||
} else {
|
||||
output[i] = std::vector<int>(r_ptr, r_ptr + num_classes);
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
}
|
46
src/main/Scores.h
Normal file
46
src/main/Scores.h
Normal file
@@ -0,0 +1,46 @@
|
||||
#ifndef SCORES_H
|
||||
#define SCORES_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
class Scores {
|
||||
public:
|
||||
Scores(torch::Tensor& y_test, torch::Tensor& y_proba, int num_classes, std::vector<std::string> labels = {});
|
||||
explicit Scores(const json& confusion_matrix_);
|
||||
static Scores create_aggregate(const json& data, const std::string key);
|
||||
float accuracy();
|
||||
float auc();
|
||||
float f1_score(int num_class);
|
||||
float f1_weighted();
|
||||
float f1_macro();
|
||||
float precision(int num_class);
|
||||
float recall(int num_class);
|
||||
torch::Tensor get_confusion_matrix() { return confusion_matrix; }
|
||||
std::vector<std::string> classification_report(std::string color = "", std::string title = "");
|
||||
json classification_report_json(std::string title = "");
|
||||
json get_confusion_matrix_json(bool labels_as_keys = false);
|
||||
void aggregate(const Scores& a);
|
||||
private:
|
||||
std::string classification_report_line(std::string label, float precision, float recall, float f1_score, int support);
|
||||
void init_confusion_matrix();
|
||||
void init_default_labels();
|
||||
void compute_accuracy_value();
|
||||
std::tuple<float, float, float, float> compute_averages();
|
||||
int num_classes;
|
||||
float accuracy_value;
|
||||
int total;
|
||||
std::vector<std::string> labels;
|
||||
torch::Tensor confusion_matrix; // Rows ar actual, columns are predicted
|
||||
torch::Tensor null_t; // Covenient null tensor needed when confusion_matrix constructor is used
|
||||
torch::Tensor& y_test = null_t; // for ROC AUC
|
||||
torch::Tensor& y_proba = null_t; // for ROC AUC
|
||||
int label_len = 16;
|
||||
int dlen = 9;
|
||||
int ndec = 7;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef MODELREGISTER_H
|
||||
#define MODELREGISTER_H
|
||||
|
||||
static platform::Registrar registrarT("TAN",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
|
||||
@@ -6,6 +7,8 @@ static platform::Registrar registrarTLD("TANLd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
|
||||
static platform::Registrar registrarS("SPODE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
|
||||
static platform::Registrar registrarSn("SPnDE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPnDE({ 0, 1 });});
|
||||
static platform::Registrar registrarSLD("SPODELd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
|
||||
static platform::Registrar registrarK("KDB",
|
||||
@@ -14,10 +17,14 @@ static platform::Registrar registrarKLD("KDBLd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
|
||||
static platform::Registrar registrarA("AODE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
|
||||
static platform::Registrar registrarA2("A2DE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::A2DE();});
|
||||
static platform::Registrar registrarALD("AODELd",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
|
||||
static platform::Registrar registrarBA("BoostAODE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostAODE();});
|
||||
static platform::Registrar registrarBA2("BoostA2DE",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::BoostA2DE();});
|
||||
static platform::Registrar registrarSt("STree",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::STree();});
|
||||
static platform::Registrar registrarOdte("Odte",
|
||||
@@ -28,3 +35,5 @@ static platform::Registrar registrarRaF("RandomForest",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();});
|
||||
static platform::Registrar registrarXGB("XGBoost",
|
||||
[](void) -> bayesnet::BaseClassifier* { return new pywrap::XGBoost();});
|
||||
|
||||
#endif
|
@@ -1,20 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
|
||||
namespace platform {
|
||||
class CommandParser {
|
||||
public:
|
||||
CommandParser() = default;
|
||||
std::tuple<char, int, bool> parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int minIndex, const int maxIndex);
|
||||
char getCommand() const { return command; };
|
||||
int getIndex() const { return index; };
|
||||
std::string getErrorMessage() const { return errorMessage; };
|
||||
private:
|
||||
std::string errorMessage;
|
||||
char command;
|
||||
int index;
|
||||
};
|
||||
} /* namespace platform */
|
@@ -1,48 +1,81 @@
|
||||
#include <filesystem>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include "common/Colors.h"
|
||||
#include <algorithm>
|
||||
#include "folding.hpp"
|
||||
#include "common/CLocale.h"
|
||||
#include "common/Paths.h"
|
||||
#include "CommandParser.h"
|
||||
#include "OptionsMenu.h"
|
||||
#include "ManageScreen.h"
|
||||
#include "reports/DatasetsConsole.h"
|
||||
#include "reports/ReportConsole.h"
|
||||
#include "reports/ReportExcel.h"
|
||||
#include "reports/ReportExcelCompared.h"
|
||||
|
||||
#include <bayesnet/classifiers/TAN.h>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
|
||||
namespace platform {
|
||||
const std::string STATUS_OK = "Ok.";
|
||||
const std::string STATUS_COLOR = Colors::GREEN();
|
||||
ManageScreen::ManageScreen(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare) :
|
||||
numFiles{ numFiles }, complete{ complete }, partial{ partial }, compare{ compare }, didExcel(false), results(ResultsManager(model, score, complete, partial))
|
||||
|
||||
ManageScreen::ManageScreen(int rows, int cols, const std::string& model, const std::string& score, const std::string& platform, bool complete, bool partial, bool compare) :
|
||||
rows{ rows }, cols{ cols }, complete{ complete }, partial{ partial }, compare{ compare }, didExcel(false), results(ResultsManager(model, score, platform, complete, partial))
|
||||
{
|
||||
results.load();
|
||||
results.sortDate();
|
||||
sort_field = "Date";
|
||||
indexList = true;
|
||||
openExcel = false;
|
||||
workbook = NULL;
|
||||
if (numFiles == 0 or numFiles > results.size()) {
|
||||
this->numFiles = results.size();
|
||||
}
|
||||
maxModel = results.maxModelSize();
|
||||
maxTitle = results.maxTitleSize();
|
||||
header_lengths = { 3, 10, maxModel, 11, 10, 12, 2, 3, 7, maxTitle };
|
||||
header_labels = { " #", "Date", "Model", "Score Name", "Score", "Platform", "SD", "C/P", "Time", "Title" };
|
||||
sort_fields = { "Date", "Model", "Score", "Time" };
|
||||
updateSize(rows, cols);
|
||||
// Initializes the paginator for each output type (experiments, datasets, result)
|
||||
for (int i = 0; i < static_cast<int>(OutputType::Count); i++) {
|
||||
paginator.push_back(Paginator(numFiles, results.size()));
|
||||
paginator.push_back(Paginator(this->rows, results.size()));
|
||||
}
|
||||
index_A = -1;
|
||||
index_B = -1;
|
||||
max_status_line = 140;
|
||||
index = -1;
|
||||
subIndex = -1;
|
||||
output_type = OutputType::EXPERIMENTS;
|
||||
}
|
||||
void ManageScreen::computeSizes()
|
||||
{
|
||||
int minTitle = 10;
|
||||
// set 10 chars as minimum for Title
|
||||
auto header_title = header_lengths[header_lengths.size() - 1];
|
||||
min_columns = std::accumulate(header_lengths.begin(), header_lengths.end(), 0) + header_lengths.size() - header_title + minTitle;
|
||||
maxTitle = minTitle + cols - min_columns;
|
||||
header_lengths[header_lengths.size() - 1] = maxTitle;
|
||||
cols = std::min(cols, min_columns + maxTitle);
|
||||
for (auto& paginator_ : paginator) {
|
||||
paginator_.setPageSize(rows);
|
||||
}
|
||||
}
|
||||
bool ManageScreen::checkWrongColumns()
|
||||
{
|
||||
if (min_columns > cols) {
|
||||
std::cerr << Colors::MAGENTA() << "Make screen bigger to fit the results! " + std::to_string(min_columns - cols) + " columns needed! " << std::endl;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
void ManageScreen::updateSize(int rows_, int cols_)
|
||||
{
|
||||
rows = std::max(6, rows_ - 6); // 6 is the number of lines used by the menu & header
|
||||
cols = cols_;
|
||||
computeSizes();
|
||||
}
|
||||
void ManageScreen::doMenu()
|
||||
{
|
||||
if (results.empty()) {
|
||||
std::cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl;
|
||||
std::cerr << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl;
|
||||
return;
|
||||
}
|
||||
results.sortDate();
|
||||
if (checkWrongColumns())
|
||||
return;
|
||||
results.sortResults(sort_field, sort_type);
|
||||
list(STATUS_OK, STATUS_COLOR);
|
||||
menu();
|
||||
if (openExcel) {
|
||||
@@ -53,6 +86,13 @@ namespace platform {
|
||||
}
|
||||
std::cout << Colors::RESET() << "Done!" << std::endl;
|
||||
}
|
||||
std::string ManageScreen::getVersions()
|
||||
{
|
||||
std::string kfold_version = folding::KFold(5, 100).version();
|
||||
std::string bayesnet_version = bayesnet::TAN().getVersion();
|
||||
std::string mdlp_version = mdlp::CPPFImdlp::version();
|
||||
return " BayesNet: " + bayesnet_version + " Folding: " + kfold_version + " MDLP: " + mdlp_version + " ";
|
||||
}
|
||||
void ManageScreen::header()
|
||||
{
|
||||
auto [index_from, index_to] = paginator[static_cast<int>(output_type)].getOffset();
|
||||
@@ -70,17 +110,18 @@ namespace platform {
|
||||
std::string header = " Lines " + std::to_string(lines) + " of "
|
||||
+ std::to_string(total) + " - Page " + std::to_string(page) + " of "
|
||||
+ std::to_string(pages) + " ";
|
||||
|
||||
std::string prefix = std::string(max_status_line - suffix.size() - header.size(), ' ');
|
||||
std::cout << Colors::CLRSCR() << Colors::REVERSE() << Colors::WHITE() << header << prefix
|
||||
<< Colors::MAGENTA() << suffix << Colors::RESET() << std::endl;
|
||||
std::string versions = getVersions();
|
||||
int filler = std::max(cols - versions.size() - suffix.size() - header.size(), size_t(0));
|
||||
std::string prefix = std::string(filler, ' ');
|
||||
std::cout << Colors::CLRSCR() << Colors::REVERSE() << Colors::WHITE() << header
|
||||
<< prefix << Colors::GREEN() << versions << Colors::MAGENTA() << suffix << Colors::RESET() << std::endl;
|
||||
}
|
||||
void ManageScreen::footer(const std::string& status, const std::string& status_color)
|
||||
{
|
||||
std::stringstream oss;
|
||||
oss << " A: " << (index_A == -1 ? "<notset>" : std::to_string(index_A)) <<
|
||||
" B: " << (index_B == -1 ? "<notset>" : std::to_string(index_B)) << " ";
|
||||
int status_length = std::max(oss.str().size(), max_status_line - oss.str().size());
|
||||
int status_length = std::max(oss.str().size(), cols - oss.str().size());
|
||||
auto status_message = status.substr(0, status_length - 1);
|
||||
std::string status_line = status_message + std::string(std::max(size_t(0), status_length - status_message.size() - 1), ' ');
|
||||
auto color = (index_A != -1 && index_B != -1) ? Colors::IGREEN() : Colors::IYELLOW();
|
||||
@@ -94,6 +135,9 @@ namespace platform {
|
||||
case static_cast<int>(OutputType::RESULT):
|
||||
list_result(status_message, status_color);
|
||||
break;
|
||||
case static_cast<int>(OutputType::DETAIL):
|
||||
list_detail(status_message, status_color);
|
||||
break;
|
||||
case static_cast<int>(OutputType::DATASETS):
|
||||
list_datasets(status_message, status_color);
|
||||
break;
|
||||
@@ -104,23 +148,57 @@ namespace platform {
|
||||
}
|
||||
void ManageScreen::list_result(const std::string& status_message, const std::string& status_color)
|
||||
{
|
||||
|
||||
//auto report = DatasetsConsole();
|
||||
//report.report();
|
||||
//paginator[static_cast<int>(output_type)].setTotal(report.getNumLines());
|
||||
auto data = results.at(index).getJson();
|
||||
ReportConsole report(data, compare);
|
||||
auto header_text = report.getHeader();
|
||||
auto body = report.getBody();
|
||||
paginator[static_cast<int>(output_type)].setTotal(body.size());
|
||||
// We need to subtract 8 from the page size to make room for the extra header in report
|
||||
auto page_size = paginator[static_cast<int>(OutputType::EXPERIMENTS)].getPageSize();
|
||||
paginator[static_cast<int>(output_type)].setPage(page_size - 8);
|
||||
|
||||
paginator[static_cast<int>(output_type)].setPageSize(page_size - 8);
|
||||
//
|
||||
// header
|
||||
//
|
||||
header();
|
||||
//
|
||||
// Results
|
||||
//
|
||||
std::cout << header_text;
|
||||
auto [index_from, index_to] = paginator[static_cast<int>(output_type)].getOffset();
|
||||
for (int i = index_from; i <= index_to; i++) {
|
||||
std::cout << body[i];
|
||||
}
|
||||
//
|
||||
// Status Area
|
||||
//
|
||||
footer(status_message, status_color);
|
||||
}
|
||||
void ManageScreen::list_detail(const std::string& status_message, const std::string& status_color)
|
||||
{
|
||||
auto data = results.at(index).getJson();
|
||||
ReportConsole report(data, compare, subIndex);
|
||||
auto header_text = report.getHeader();
|
||||
auto body = report.getBody();
|
||||
paginator[static_cast<int>(output_type)].setTotal(body.size());
|
||||
// We need to subtract 8 from the page size to make room for the extra header in report
|
||||
auto page_size = paginator[static_cast<int>(OutputType::EXPERIMENTS)].getPageSize();
|
||||
paginator[static_cast<int>(output_type)].setPageSize(page_size - 8);
|
||||
//
|
||||
// header
|
||||
//
|
||||
header();
|
||||
//
|
||||
// Results
|
||||
//
|
||||
std::cout << header_text;
|
||||
auto [index_from, index_to] = paginator[static_cast<int>(output_type)].getOffset();
|
||||
for (int i = index_from; i <= index_to; i++) {
|
||||
std::cout << body[i];
|
||||
}
|
||||
//
|
||||
// Status Area
|
||||
//
|
||||
footer(status_message, status_color);
|
||||
|
||||
}
|
||||
void ManageScreen::list_datasets(const std::string& status_message, const std::string& status_color)
|
||||
{
|
||||
@@ -134,17 +212,16 @@ namespace platform {
|
||||
//
|
||||
// Results
|
||||
//
|
||||
auto data = report.getBody();
|
||||
auto body = report.getBody();
|
||||
std::cout << report.getHeader();
|
||||
auto [index_from, index_to] = paginator[static_cast<int>(output_type)].getOffset();
|
||||
for (int i = index_from; i <= index_to; i++) {
|
||||
std::cout << data[i];
|
||||
std::cout << body[i];
|
||||
}
|
||||
//
|
||||
// Status Area
|
||||
//
|
||||
footer(status_message, status_color);
|
||||
|
||||
}
|
||||
void ManageScreen::list_experiments(const std::string& status_message, const std::string& status_color)
|
||||
{
|
||||
@@ -152,23 +229,16 @@ namespace platform {
|
||||
// header
|
||||
//
|
||||
header();
|
||||
//
|
||||
// Field names
|
||||
//
|
||||
int maxModel = results.maxModelSize();
|
||||
int maxTitle = results.maxTitleSize();
|
||||
std::vector<int> header_lengths = { 3, 10, maxModel, 10, 9, 3, 7, maxTitle };
|
||||
//
|
||||
std::cout << Colors::RESET();
|
||||
std::string arrow = Symbols::downward_arrow + " ";
|
||||
std::vector<std::string> header_labels = { " #", "Date", "Model", "Score Name", "Score", "C/P", "Time", "Title" };
|
||||
std::string arrow_dn = Symbols::down_arrow + " ";
|
||||
std::string arrow_up = Symbols::up_arrow + " ";
|
||||
for (int i = 0; i < header_labels.size(); i++) {
|
||||
std::string suffix = "", color = Colors::GREEN();
|
||||
int diff = 0;
|
||||
if (header_labels[i] == sort_field) {
|
||||
if (header_labels[i] == sort_fields[static_cast<int>(sort_field)]) {
|
||||
color = Colors::YELLOW();
|
||||
diff = 2;
|
||||
suffix = arrow;
|
||||
suffix = sort_type == SortType::ASC ? arrow_up : arrow_dn;
|
||||
}
|
||||
std::cout << color << std::setw(header_lengths[i] + diff) << std::left << std::string(header_labels[i] + suffix) << " ";
|
||||
}
|
||||
@@ -180,11 +250,15 @@ namespace platform {
|
||||
//
|
||||
// Results
|
||||
//
|
||||
if (results.empty()) {
|
||||
std::cout << "No results found!" << std::endl;
|
||||
return;
|
||||
}
|
||||
auto [index_from, index_to] = paginator[static_cast<int>(output_type)].getOffset();
|
||||
for (int i = index_from; i <= index_to; i++) {
|
||||
auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN();
|
||||
std::cout << color << std::setw(3) << std::fixed << std::right << i << " ";
|
||||
std::cout << results.at(i).to_string(maxModel) << std::endl;
|
||||
std::cout << results.at(i).to_string(maxModel, maxTitle) << std::endl;
|
||||
}
|
||||
//
|
||||
// Status Area
|
||||
@@ -204,7 +278,7 @@ namespace platform {
|
||||
while (!finished) {
|
||||
std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): ";
|
||||
getline(std::cin, line);
|
||||
finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n'));
|
||||
finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0]) == 'n');
|
||||
}
|
||||
if (tolower(line[0]) == 'y') {
|
||||
return true;
|
||||
@@ -237,91 +311,110 @@ namespace platform {
|
||||
return "Reporting " + results.at(index).getFilename();
|
||||
}
|
||||
}
|
||||
void ManageScreen::showIndex(const int index, const int idx)
|
||||
{
|
||||
// Show a dataset result inside a report
|
||||
auto data = results.at(index).getJson();
|
||||
ReportConsole reporter(data, compare, idx);
|
||||
std::cout << Colors::CLRSCR() << reporter.fileReport();
|
||||
}
|
||||
std::pair<std::string, std::string> ManageScreen::sortList()
|
||||
{
|
||||
std::cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', time='t', model='m'): ";
|
||||
std::string line;
|
||||
std::vector<std::tuple<std::string, char, bool>> sortOptions = {
|
||||
{"date", 'd', false},
|
||||
{"score", 's', false},
|
||||
{"time", 't', false},
|
||||
{"model", 'm', false},
|
||||
{"ascending+", '+', false},
|
||||
{"descending-", '-', false}
|
||||
};
|
||||
auto sortMenu = OptionsMenu(sortOptions, Colors::YELLOW(), Colors::RED(), cols);
|
||||
std::string invalid_option = "Invalid sorting option";
|
||||
char option;
|
||||
getline(std::cin, line);
|
||||
if (line.size() == 0 || line.size() > 1) {
|
||||
return { Colors::RED(), "Invalid sorting option" };
|
||||
bool parserError = true; // force the first iteration
|
||||
while (parserError) {
|
||||
if (checkWrongColumns())
|
||||
return { Colors::RED(), "Invalid column size" };
|
||||
auto [min_index, max_index] = paginator[static_cast<int>(output_type)].getOffset();
|
||||
std::tie(option, index, parserError) = sortMenu.parse(' ', 0, 0);
|
||||
sortMenu.updateColumns(cols);
|
||||
if (parserError) {
|
||||
return { Colors::RED(), invalid_option };
|
||||
}
|
||||
}
|
||||
option = line[0];
|
||||
switch (option) {
|
||||
case 'd':
|
||||
results.sortDate();
|
||||
sort_field = "Date";
|
||||
return { Colors::GREEN(), "Sorted by date" };
|
||||
sort_field = SortField::DATE;
|
||||
break;
|
||||
case 's':
|
||||
results.sortScore();
|
||||
sort_field = "Score";
|
||||
return { Colors::GREEN(), "Sorted by score" };
|
||||
sort_field = SortField::SCORE;
|
||||
break;
|
||||
case 't':
|
||||
results.sortDuration();
|
||||
sort_field = "Time";
|
||||
return { Colors::GREEN(), "Sorted by time" };
|
||||
sort_field = SortField::DURATION;
|
||||
break;
|
||||
case 'm':
|
||||
results.sortModel();
|
||||
sort_field = "Model";
|
||||
return { Colors::GREEN(), "Sorted by model" };
|
||||
sort_field = SortField::MODEL;
|
||||
break;
|
||||
case '+':
|
||||
sort_type = SortType::ASC;
|
||||
break;
|
||||
case '-':
|
||||
sort_type = SortType::DESC;
|
||||
break;
|
||||
default:
|
||||
return { Colors::RED(), "Invalid sorting option" };
|
||||
return { Colors::RED(), invalid_option };
|
||||
}
|
||||
results.sortResults(sort_field, sort_type);
|
||||
return { Colors::GREEN(), "Sorted by " + sort_fields[static_cast<int>(sort_field)] + " " + (sort_type == SortType::ASC ? "ascending" : "descending") };
|
||||
}
|
||||
void ManageScreen::menu()
|
||||
{
|
||||
char option;
|
||||
int index, subIndex;
|
||||
bool finished = false;
|
||||
std::string filename;
|
||||
// tuple<Option, digit, requires value>
|
||||
std::vector<std::tuple<std::string, char, bool>> mainOptions = {
|
||||
{"quit", 'q', false},
|
||||
{"list", 'l', false},
|
||||
{"delete", 'D', true},
|
||||
{"Delete", 'D', true},
|
||||
{"datasets", 'd', false},
|
||||
{"hide", 'h', true},
|
||||
{"sort", 's', false},
|
||||
{"report", 'r', true},
|
||||
{"excel", 'e', true},
|
||||
{"title", 't', true},
|
||||
{"set A", 'a', true},
|
||||
{"set B", 'b', true},
|
||||
{"set A", 'A', true},
|
||||
{"set B", 'B', true},
|
||||
{"compare A~B", 'c', false},
|
||||
{"Page", 'p', true},
|
||||
{"page", 'p', true},
|
||||
{"Page+", '+', false },
|
||||
{"Page-", '-', false}
|
||||
};
|
||||
// tuple<Option, digit, requires value>
|
||||
std::vector<std::tuple<std::string, char, bool>> listOptions = {
|
||||
{"quit", 'q', false},
|
||||
{"report", 'r', true},
|
||||
{"list", 'l', false},
|
||||
{"excel", 'e', true},
|
||||
{"back", 'b', false},
|
||||
{"quit", 'q', false}
|
||||
{"page", 'p', true},
|
||||
{"Page+", '+', false},
|
||||
{"Page-", '-', false}
|
||||
};
|
||||
|
||||
auto parser = CommandParser();
|
||||
while (!finished) {
|
||||
auto main_menu = OptionsMenu(mainOptions, Colors::IGREEN(), Colors::YELLOW(), cols);
|
||||
auto list_menu = OptionsMenu(listOptions, Colors::IBLUE(), Colors::YELLOW(), cols);
|
||||
OptionsMenu& menu = output_type == OutputType::EXPERIMENTS ? main_menu : list_menu;
|
||||
bool parserError = true; // force the first iteration
|
||||
while (parserError) {
|
||||
if (indexList) {
|
||||
int index_menu;
|
||||
auto [min_index, max_index] = paginator[static_cast<int>(output_type)].getOffset();
|
||||
std::tie(option, index, parserError) = parser.parse(Colors::IGREEN(), mainOptions, 'r', min_index, max_index);
|
||||
std::tie(option, index_menu, parserError) = menu.parse('r', min_index, max_index);
|
||||
if (output_type == OutputType::EXPERIMENTS) {
|
||||
index = index_menu;
|
||||
} else {
|
||||
std::tie(option, subIndex, parserError) = parser.parse(Colors::IBLUE(), listOptions, 'r', 0, results.at(index).getJson()["results"].size() - 1);
|
||||
subIndex = index_menu;
|
||||
}
|
||||
if (min_columns > cols) {
|
||||
std::cerr << "Make screen bigger to fit the results! " + std::to_string(min_columns - cols) + " columns needed! " << std::endl;
|
||||
return;
|
||||
}
|
||||
menu.updateColumns(cols);
|
||||
if (parserError) {
|
||||
if (indexList)
|
||||
list(parser.getErrorMessage(), Colors::RED());
|
||||
else
|
||||
report(index, false);
|
||||
list(menu.getErrorMessage(), Colors::RED());
|
||||
}
|
||||
}
|
||||
switch (option) {
|
||||
@@ -330,10 +423,13 @@ namespace platform {
|
||||
list_datasets(STATUS_OK, STATUS_COLOR);
|
||||
break;
|
||||
case 'p':
|
||||
if (paginator[static_cast<int>(output_type)].setPage(index)) {
|
||||
{
|
||||
auto page = output_type == OutputType::EXPERIMENTS ? index : subIndex;
|
||||
if (paginator[static_cast<int>(output_type)].setPage(page)) {
|
||||
list(STATUS_OK, STATUS_COLOR);
|
||||
} else {
|
||||
list("Invalid page!", Colors::RED());
|
||||
list("Invalid page! (" + std::to_string(page) + ")", Colors::RED());
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '+':
|
||||
@@ -353,7 +449,7 @@ namespace platform {
|
||||
case 'q':
|
||||
finished = true;
|
||||
break;
|
||||
case 'a':
|
||||
case 'A':
|
||||
if (index == index_B) {
|
||||
list("A and B cannot be the same!", Colors::RED());
|
||||
break;
|
||||
@@ -361,8 +457,8 @@ namespace platform {
|
||||
index_A = index;
|
||||
list("A set to " + std::to_string(index), Colors::GREEN());
|
||||
break;
|
||||
case 'b':
|
||||
if (indexList) {
|
||||
case 'B': // set_b or back to list
|
||||
if (output_type == OutputType::EXPERIMENTS) {
|
||||
if (index == index_A) {
|
||||
list("A and B cannot be the same!", Colors::RED());
|
||||
break;
|
||||
@@ -371,7 +467,9 @@ namespace platform {
|
||||
list("B set to " + std::to_string(index), Colors::GREEN());
|
||||
} else {
|
||||
// back to show the report
|
||||
report(index, false);
|
||||
output_type = OutputType::RESULT;
|
||||
paginator[static_cast<int>(OutputType::DETAIL)].setPage(1);
|
||||
list(STATUS_OK, STATUS_COLOR);
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
@@ -383,8 +481,10 @@ namespace platform {
|
||||
break;
|
||||
case 'l':
|
||||
output_type = OutputType::EXPERIMENTS;
|
||||
paginator[static_cast<int>(OutputType::DATASETS)].setPage(1);
|
||||
paginator[static_cast<int>(OutputType::RESULT)].setPage(1);
|
||||
paginator[static_cast<int>(OutputType::DETAIL)].setPage(1);
|
||||
list(STATUS_OK, STATUS_COLOR);
|
||||
indexList = true;
|
||||
break;
|
||||
case 'D':
|
||||
filename = results.at(index).getFilename();
|
||||
@@ -394,6 +494,7 @@ namespace platform {
|
||||
}
|
||||
std::cout << "Deleting " << filename << std::endl;
|
||||
results.deleteResult(index);
|
||||
paginator[static_cast<int>(OutputType::EXPERIMENTS)].setTotal(results.size());
|
||||
list(filename + " deleted!", Colors::RED());
|
||||
break;
|
||||
case 'h':
|
||||
@@ -408,6 +509,7 @@ namespace platform {
|
||||
std::cout << "Hiding " << filename << std::endl;
|
||||
results.hideResult(index, Paths::hiddenResults());
|
||||
status_message = filename + " hidden! (moved to " + Paths::hiddenResults() + ")";
|
||||
paginator[static_cast<int>(OutputType::EXPERIMENTS)].setTotal(results.size());
|
||||
list(status_message, Colors::YELLOW());
|
||||
}
|
||||
break;
|
||||
@@ -423,16 +525,22 @@ namespace platform {
|
||||
list(STATUS_OK, STATUS_COLOR);
|
||||
break;
|
||||
}
|
||||
if (indexList) {
|
||||
report(index, false);
|
||||
indexList = false;
|
||||
if (output_type == OutputType::EXPERIMENTS) {
|
||||
output_type = OutputType::RESULT;
|
||||
paginator[static_cast<int>(OutputType::DETAIL)].setPage(1);
|
||||
list(STATUS_OK, STATUS_COLOR);
|
||||
} else {
|
||||
showIndex(index, subIndex);
|
||||
output_type = OutputType::DETAIL;
|
||||
list(STATUS_OK, STATUS_COLOR);
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if (output_type == OutputType::EXPERIMENTS) {
|
||||
list(report(index, true), Colors::GREEN());
|
||||
break;
|
||||
}
|
||||
list(report(subIndex, true), Colors::GREEN());
|
||||
break;
|
||||
case 't':
|
||||
{
|
||||
std::string status_message;
|
||||
|
@@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef MANAGE_SCREEN_H
|
||||
#define MANAGE_SCREEN_H
|
||||
#include <xlsxwriter.h>
|
||||
#include "ResultsManager.h"
|
||||
#include "common/Colors.h"
|
||||
#include "Paginator.hpp"
|
||||
|
||||
namespace platform {
|
||||
@@ -9,39 +10,53 @@ namespace platform {
|
||||
EXPERIMENTS = 0,
|
||||
DATASETS = 1,
|
||||
RESULT = 2,
|
||||
DETAIL = 3,
|
||||
Count
|
||||
};
|
||||
class ManageScreen {
|
||||
public:
|
||||
ManageScreen(int numFiles, const std::string& model, const std::string& score, bool complete, bool partial, bool compare);
|
||||
ManageScreen(int rows, int cols, const std::string& model, const std::string& score, const std::string& platform, bool complete, bool partial, bool compare);
|
||||
~ManageScreen() = default;
|
||||
void doMenu();
|
||||
void updateSize(int rows, int cols);
|
||||
private:
|
||||
void list(const std::string& status, const std::string& color);
|
||||
void list_experiments(const std::string& status, const std::string& color);
|
||||
void list_result(const std::string& status, const std::string& color);
|
||||
void list_detail(const std::string& status, const std::string& color);
|
||||
void list_datasets(const std::string& status, const std::string& color);
|
||||
bool confirmAction(const std::string& intent, const std::string& fileName) const;
|
||||
std::string report(const int index, const bool excelReport);
|
||||
std::string report_compared();
|
||||
void showIndex(const int index, const int idx);
|
||||
std::pair<std::string, std::string> sortList();
|
||||
std::string getVersions();
|
||||
void computeSizes();
|
||||
bool checkWrongColumns();
|
||||
void menu();
|
||||
void header();
|
||||
void footer(const std::string& status, const std::string& color);
|
||||
OutputType output_type;
|
||||
int numFiles;
|
||||
int rows;
|
||||
int cols;
|
||||
int min_columns;
|
||||
int index;
|
||||
int subIndex;
|
||||
int index_A, index_B; // used for comparison of experiments
|
||||
int max_status_line;
|
||||
bool indexList;
|
||||
bool openExcel;
|
||||
bool didExcel;
|
||||
bool complete;
|
||||
bool partial;
|
||||
bool compare;
|
||||
std::string sort_field;
|
||||
int maxModel, maxTitle;
|
||||
std::vector<std::string> header_labels;
|
||||
std::vector<int> header_lengths;
|
||||
std::vector<std::string> sort_fields;
|
||||
SortField sort_field = SortField::DATE;
|
||||
SortType sort_type = SortType::DESC;
|
||||
std::vector<Paginator> paginator;
|
||||
ResultsManager results;
|
||||
lxw_workbook* workbook;
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -1,30 +1,46 @@
|
||||
#include "CommandParser.h"
|
||||
#include "OptionsMenu.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "common/Colors.h"
|
||||
#include "common/Utils.h"
|
||||
|
||||
namespace platform {
|
||||
|
||||
std::tuple<char, int, bool> CommandParser::parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int minIndex, const int maxIndex)
|
||||
std::string OptionsMenu::to_string()
|
||||
{
|
||||
bool first = true;
|
||||
std::string result = color_normal + "Options: (";
|
||||
size_t size = 10; // Size of "Options: ("
|
||||
for (auto& option : options) {
|
||||
if (!first) {
|
||||
result += ", ";
|
||||
size += 2;
|
||||
}
|
||||
std::string title = std::get<0>(option);
|
||||
auto pos = title.find(std::get<1>(option));
|
||||
result += color_normal + title.substr(0, pos) + color_bold + title.substr(pos, 1) + color_normal + title.substr(pos + 1);
|
||||
size += title.size();
|
||||
first = false;
|
||||
}
|
||||
if (size + 3 > cols) { // 3 is the size of the "): " at the end
|
||||
result = "";
|
||||
first = true;
|
||||
for (auto& option : options) {
|
||||
if (!first) {
|
||||
result += color_normal + ", ";
|
||||
}
|
||||
result += color_bold + std::get<1>(option);
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
result += "): ";
|
||||
return result;
|
||||
}
|
||||
std::tuple<char, int, bool> OptionsMenu::parse(char defaultCommand, int minIndex, int maxIndex)
|
||||
{
|
||||
bool finished = false;
|
||||
while (!finished) {
|
||||
std::stringstream oss;
|
||||
std::cout << to_string();
|
||||
std::string line;
|
||||
oss << color << "Options (";
|
||||
bool first = true;
|
||||
for (auto& option : options) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
oss << ", ";
|
||||
}
|
||||
oss << std::get<char>(option) << "=" << std::get<std::string>(option);
|
||||
}
|
||||
oss << "): ";
|
||||
std::cout << oss.str();
|
||||
getline(std::cin, line);
|
||||
line = trim(line);
|
||||
if (line.size() == 0) {
|
26
src/manage/OptionsMenu.h
Normal file
26
src/manage/OptionsMenu.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#ifndef OPTIONS_MENU_H
|
||||
#define OPTIONS_MENU_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
|
||||
namespace platform {
|
||||
class OptionsMenu {
|
||||
public:
|
||||
OptionsMenu(std::vector<std::tuple<std::string, char, bool>>& options, std::string color_normal, std::string color_bold, int cols) : options(options), color_normal(color_normal), color_bold(color_bold), cols(cols) {}
|
||||
std::string to_string();
|
||||
std::tuple<char, int, bool> parse(char defaultCommand, int minIndex, int maxIndex);
|
||||
char getCommand() const { return command; };
|
||||
int getIndex() const { return index; };
|
||||
std::string getErrorMessage() const { return errorMessage; };
|
||||
void updateColumns(int cols) { this->cols = cols; }
|
||||
private:
|
||||
std::vector<std::tuple<std::string, char, bool>>& options;
|
||||
std::string color_normal, color_bold;
|
||||
int cols;
|
||||
std::string errorMessage;
|
||||
char command;
|
||||
int index;
|
||||
};
|
||||
} /* namespace platform */
|
||||
#endif
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef PAGINATOR_HPP
|
||||
#define PAGINATOR_HPP
|
||||
#include <utility>
|
||||
|
||||
class Paginator {
|
||||
@@ -10,26 +10,37 @@ public:
|
||||
computePages();
|
||||
};
|
||||
~Paginator() = default;
|
||||
// Getters
|
||||
int getPageSize() const { return pageSize; }
|
||||
int getLines() const
|
||||
{
|
||||
auto [start, end] = getOffset();
|
||||
return std::min(pageSize, end - start + 1);
|
||||
}
|
||||
int getPage() const { return page; }
|
||||
int getTotal() const { return total; }
|
||||
void setTotal(int total) { this->total = total; computePages(); }
|
||||
int getPages() const { return numPages; }
|
||||
std::pair<int, int> getOffset() const
|
||||
{
|
||||
return { (page - 1) * pageSize, std::min(total - 1, page * pageSize - 1) };
|
||||
}
|
||||
int getPages() const { return numPages; }
|
||||
int getPage() const { return page; }
|
||||
// Setters
|
||||
void setTotal(int total) { this->total = total; computePages(); }
|
||||
void setPageSize(int page) { this->pageSize = page; computePages(); }
|
||||
bool setPage(int page) { return valid(page) ? this->page = page, true : false; }
|
||||
// Utils
|
||||
bool valid(int page) const { return page > 0 && page <= numPages; }
|
||||
bool hasPrev(int page) const { return page > 1; }
|
||||
bool hasNext(int page) const { return page < getPages(); }
|
||||
bool setPage(int page) { return valid(page) ? this->page = page, true : false; }
|
||||
bool addPage() { return page < numPages ? ++page, true : false; }
|
||||
bool subPage() { return page > 1 ? --page, true : false; }
|
||||
std::string to_string() const
|
||||
{
|
||||
auto offset = getOffset();
|
||||
return "Paginator: { pageSize: " + std::to_string(pageSize) + ", total: " + std::to_string(total)
|
||||
+ ", page: " + std::to_string(page) + ", numPages: " + std::to_string(numPages)
|
||||
+ " Offset [" + std::to_string(offset.first) + ", " + std::to_string(offset.second) + "]}";
|
||||
}
|
||||
private:
|
||||
void computePages()
|
||||
{
|
||||
@@ -43,3 +54,4 @@ private:
|
||||
int page;
|
||||
int numPages;
|
||||
};
|
||||
#endif
|
@@ -3,8 +3,8 @@
|
||||
#include "ResultsManager.h"
|
||||
|
||||
namespace platform {
|
||||
ResultsManager::ResultsManager(const std::string& model, const std::string& score, bool complete, bool partial) :
|
||||
path(Paths::results()), model(model), scoreName(score), complete(complete), partial(partial), maxModel(0), maxTitle(0)
|
||||
ResultsManager::ResultsManager(const std::string& model, const std::string& score, const std::string& platform, bool complete, bool partial) :
|
||||
path(Paths::results()), model(model), scoreName(score), platform(platform), complete(complete), partial(partial), maxModel(0), maxTitle(0)
|
||||
{
|
||||
}
|
||||
void ResultsManager::load()
|
||||
@@ -17,7 +17,11 @@ namespace platform {
|
||||
auto result = Result();
|
||||
result.load(path, filename);
|
||||
bool addResult = true;
|
||||
if (model != "any" && result.getModel() != model || scoreName != "any" && scoreName != result.getScoreName() || complete && !result.isComplete() || partial && result.isComplete())
|
||||
if (platform != "any" && result.getPlatform() != platform
|
||||
|| model != "any" && result.getModel() != model
|
||||
|| scoreName != "any" && scoreName != result.getScoreName()
|
||||
|| complete && !result.isComplete()
|
||||
|| partial && result.isComplete())
|
||||
addResult = false;
|
||||
if (addResult) {
|
||||
files.push_back(result);
|
||||
@@ -46,47 +50,79 @@ namespace platform {
|
||||
{
|
||||
return files.size();
|
||||
}
|
||||
void ResultsManager::sortDate()
|
||||
void ResultsManager::sortDate(SortType type)
|
||||
{
|
||||
if (empty())
|
||||
return;
|
||||
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
|
||||
sort(files.begin(), files.end(), [type](const Result& a, const Result& b) {
|
||||
if (a.getDate() == b.getDate()) {
|
||||
if (type == SortType::ASC)
|
||||
return a.getModel() < b.getModel();
|
||||
return a.getModel() > b.getModel();
|
||||
}
|
||||
if (type == SortType::ASC)
|
||||
return a.getDate() < b.getDate();
|
||||
return a.getDate() > b.getDate();
|
||||
});
|
||||
}
|
||||
void ResultsManager::sortModel()
|
||||
void ResultsManager::sortModel(SortType type)
|
||||
{
|
||||
if (empty())
|
||||
return;
|
||||
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
|
||||
sort(files.begin(), files.end(), [type](const Result& a, const Result& b) {
|
||||
if (a.getModel() == b.getModel()) {
|
||||
if (type == SortType::ASC)
|
||||
return a.getDate() < b.getDate();
|
||||
return a.getDate() > b.getDate();
|
||||
}
|
||||
if (type == SortType::ASC)
|
||||
return a.getModel() < b.getModel();
|
||||
return a.getModel() > b.getModel();
|
||||
});
|
||||
}
|
||||
void ResultsManager::sortDuration()
|
||||
void ResultsManager::sortDuration(SortType type)
|
||||
{
|
||||
if (empty())
|
||||
return;
|
||||
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
|
||||
sort(files.begin(), files.end(), [type](const Result& a, const Result& b) {
|
||||
if (type == SortType::ASC)
|
||||
return a.getDuration() < b.getDuration();
|
||||
return a.getDuration() > b.getDuration();
|
||||
});
|
||||
}
|
||||
void ResultsManager::sortScore()
|
||||
void ResultsManager::sortScore(SortType type)
|
||||
{
|
||||
if (files.empty())
|
||||
if (empty())
|
||||
return;
|
||||
sort(files.begin(), files.end(), [](const Result& a, const Result& b) {
|
||||
sort(files.begin(), files.end(), [type](const Result& a, const Result& b) {
|
||||
if (a.getScore() == b.getScore()) {
|
||||
if (type == SortType::ASC)
|
||||
return a.getDate() < b.getDate();
|
||||
return a.getDate() > b.getDate();
|
||||
}
|
||||
if (type == SortType::ASC)
|
||||
return a.getScore() < b.getScore();
|
||||
return a.getScore() > b.getScore();
|
||||
});
|
||||
}
|
||||
|
||||
void ResultsManager::sortResults(SortField field, SortType type)
|
||||
{
|
||||
switch (field) {
|
||||
case SortField::DATE:
|
||||
sortDate(type);
|
||||
break;
|
||||
case SortField::MODEL:
|
||||
sortModel(type);
|
||||
break;
|
||||
case SortField::SCORE:
|
||||
sortScore(type);
|
||||
break;
|
||||
case SortField::DURATION:
|
||||
sortDuration(type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
bool ResultsManager::empty() const
|
||||
{
|
||||
return files.empty();
|
||||
|
@@ -1,19 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef RESULTSMANAGER_H
|
||||
#define RESULTSMANAGER_H
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "results/Result.h"
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
enum class SortType {
|
||||
ASC = 0,
|
||||
DESC = 1,
|
||||
};
|
||||
enum class SortField {
|
||||
DATE = 0,
|
||||
MODEL = 1,
|
||||
SCORE = 2,
|
||||
DURATION = 3,
|
||||
};
|
||||
class ResultsManager {
|
||||
public:
|
||||
ResultsManager(const std::string& model, const std::string& score, bool complete, bool partial);
|
||||
ResultsManager(const std::string& model, const std::string& score, const std::string& platform, bool complete, bool partial);
|
||||
void load(); // Loads the list of results
|
||||
void sortDate();
|
||||
void sortScore();
|
||||
void sortModel();
|
||||
void sortDuration();
|
||||
void sortResults(SortField field, SortType type); // Sorts the list of results
|
||||
void sortDate(SortType type);
|
||||
void sortScore(SortType type);
|
||||
void sortModel(SortType type);
|
||||
void sortDuration(SortType type);
|
||||
int maxModelSize() const { return maxModel; };
|
||||
int maxTitleSize() const { return maxTitle; };
|
||||
void hideResult(int index, const std::string& pathHidden);
|
||||
@@ -27,6 +38,7 @@ namespace platform {
|
||||
std::string path;
|
||||
std::string model;
|
||||
std::string scoreName;
|
||||
std::string platform;
|
||||
bool complete;
|
||||
bool partial;
|
||||
int maxModel;
|
||||
@@ -34,3 +46,4 @@ namespace platform {
|
||||
std::vector<Result> files;
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -1,3 +1,4 @@
|
||||
#include <algorithm>
|
||||
#include "common/Colors.h"
|
||||
#include "common/Datasets.h"
|
||||
#include "common/Paths.h"
|
||||
@@ -12,7 +13,7 @@ namespace platform {
|
||||
auto part = temp.substr(0, DatasetsConsole::BALANCE_LENGTH);
|
||||
line += part + "\n";
|
||||
body.push_back(line);
|
||||
line = string(name_len + 22, ' ');
|
||||
line = string(name_len + 28, ' ');
|
||||
temp = temp.substr(DatasetsConsole::BALANCE_LENGTH);
|
||||
}
|
||||
line += temp + "\n";
|
||||
@@ -26,8 +27,8 @@ namespace platform {
|
||||
std::stringstream sheader;
|
||||
auto datasets_names = datasets.getNames();
|
||||
int maxName = std::max(size_t(7), (*max_element(datasets_names.begin(), datasets_names.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size());
|
||||
std::vector<std::string> header_labels = { " #", "Dataset", "Sampl.", "Feat.", "Cls", "Balance" };
|
||||
std::vector<int> header_lengths = { 3, maxName, 6, 5, 3, DatasetsConsole::BALANCE_LENGTH };
|
||||
std::vector<std::string> header_labels = { " #", "Dataset", "Sampl.", "Feat.", "#Num.", "Cls", "Balance" };
|
||||
std::vector<int> header_lengths = { 3, maxName, 6, 5, 5, 3, DatasetsConsole::BALANCE_LENGTH };
|
||||
sheader << Colors::GREEN();
|
||||
for (int i = 0; i < header_labels.size(); i++) {
|
||||
sheader << setw(header_lengths[i]) << left << header_labels[i] << " ";
|
||||
@@ -41,30 +42,37 @@ namespace platform {
|
||||
sline += "\n";
|
||||
header.push_back(sline);
|
||||
int num = 0;
|
||||
for (const auto& dataset : datasets.getNames()) {
|
||||
for (const auto& dataset_name : datasets.getNames()) {
|
||||
std::stringstream line;
|
||||
line.imbue(loc);
|
||||
auto color = num % 2 ? Colors::CYAN() : Colors::BLUE();
|
||||
line << color << setw(3) << right << num++ << " ";
|
||||
line << setw(maxName) << left << dataset << " ";
|
||||
datasets.loadDataset(dataset);
|
||||
auto nSamples = datasets.getNSamples(dataset);
|
||||
line << setw(maxName) << left << dataset_name << " ";
|
||||
auto& dataset = datasets.getDataset(dataset_name);
|
||||
dataset.load();
|
||||
auto nSamples = dataset.getNSamples();
|
||||
line << setw(6) << right << nSamples << " ";
|
||||
line << setw(5) << right << datasets.getFeatures(dataset).size() << " ";
|
||||
line << setw(3) << right << datasets.getNClasses(dataset) << " ";
|
||||
auto nFeatures = dataset.getFeatures().size();
|
||||
line << setw(5) << right << nFeatures << " ";
|
||||
auto numericFeatures = dataset.getNumericFeatures();
|
||||
auto num = std::count(numericFeatures.begin(), numericFeatures.end(), true);
|
||||
line << setw(5) << right << num << " ";
|
||||
auto nClasses = dataset.getNClasses();
|
||||
line << setw(3) << right << nClasses << " ";
|
||||
std::string sep = "";
|
||||
oss.str("");
|
||||
for (auto number : datasets.getClassesCounts(dataset)) {
|
||||
for (auto number : dataset.getClassesCounts()) {
|
||||
oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
|
||||
sep = " / ";
|
||||
}
|
||||
split_lines(maxName, line.str(), oss.str());
|
||||
// Store data for Excel report
|
||||
data[dataset] = json::object();
|
||||
data[dataset]["samples"] = nSamples;
|
||||
data[dataset]["features"] = datasets.getFeatures(dataset).size();
|
||||
data[dataset]["classes"] = datasets.getNClasses(dataset);
|
||||
data[dataset]["balance"] = oss.str();
|
||||
data[dataset_name] = json::object();
|
||||
data[dataset_name]["samples"] = nSamples;
|
||||
data[dataset_name]["features"] = nFeatures;
|
||||
data[dataset_name]["numericFeatures"] = num;
|
||||
data[dataset_name]["classes"] = nClasses;
|
||||
data[dataset_name]["balance"] = oss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,13 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef DATASETSCONSOLE_H
|
||||
#define DATASETSCONSOLE_H
|
||||
#include <locale>
|
||||
#include <sstream>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "ReportsPaged.h"
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
class DatasetsConsole : public ReportsPaged {
|
||||
public:
|
||||
@@ -19,4 +18,4 @@ namespace platform {
|
||||
void split_lines(int name_len, std::string line, const std::string& balance);
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -1,5 +1,4 @@
|
||||
#include "DatasetsExcel.h"
|
||||
|
||||
namespace platform {
|
||||
DatasetsExcel::DatasetsExcel()
|
||||
{
|
||||
@@ -18,11 +17,11 @@ namespace platform {
|
||||
int balanceSize = 75; // Min size of the column
|
||||
worksheet = workbook_add_worksheet(workbook, "Datasets");
|
||||
// Header
|
||||
worksheet_merge_range(worksheet, 0, 0, 0, 5, "Datasets", styles["headerFirst"]);
|
||||
worksheet_merge_range(worksheet, 0, 0, 0, 6, "Datasets", styles["headerFirst"]);
|
||||
// Body header
|
||||
row = 2;
|
||||
int col = 0;
|
||||
for (const auto& name : { "Nº", "Dataset", "Samples", "Features", "Classes", "Balance" }) {
|
||||
for (const auto& name : { "#", "Dataset", "Samples", "Features", "#Numer.", "Classes", "Balance" }) {
|
||||
writeString(row, col++, name, "bodyHeader");
|
||||
}
|
||||
// Body
|
||||
@@ -35,12 +34,13 @@ namespace platform {
|
||||
writeString(row, 1, key.c_str(), "text");
|
||||
writeInt(row, 2, value["samples"], "ints");
|
||||
writeInt(row, 3, value["features"], "ints");
|
||||
writeInt(row, 4, value["classes"], "ints");
|
||||
writeString(row, 5, value["balance"].get<std::string>().c_str(), "text");
|
||||
writeInt(row, 4, value["numericFeatures"], "ints");
|
||||
writeInt(row, 5, value["classes"], "ints");
|
||||
writeString(row, 6, value["balance"].get<std::string>().c_str(), "text");
|
||||
}
|
||||
// Format columns
|
||||
worksheet_freeze_panes(worksheet, 3, 2);
|
||||
std::vector<int> columns_sizes = { 5, datasetNameSize, 10, 10, 10, balanceSize };
|
||||
std::vector<int> columns_sizes = { 5, datasetNameSize, 10, 10, 10, 10, balanceSize };
|
||||
for (int i = 0; i < columns_sizes.size(); ++i) {
|
||||
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
|
||||
}
|
||||
|
@@ -1,12 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef DATASETSEXCEL_H
|
||||
#define DATASETSEXCEL_H
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "reports/ExcelFile.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace platform {
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
class DatasetsExcel : public ExcelFile {
|
||||
public:
|
||||
DatasetsExcel();
|
||||
@@ -14,3 +13,4 @@ namespace platform {
|
||||
void report(json& data);
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -22,6 +22,27 @@ namespace platform {
|
||||
colorOdd = 0xDCE6F1;
|
||||
colorEven = 0xFDE9D9;
|
||||
}
|
||||
lxw_worksheet* ExcelFile::createWorksheet(const std::string& name)
|
||||
{
|
||||
lxw_worksheet* sheet;
|
||||
std::string suffix = "";
|
||||
std::string efectiveName;
|
||||
int num = 1;
|
||||
// Create a sheet with the name of the model
|
||||
while (true) {
|
||||
efectiveName = name + suffix;
|
||||
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
|
||||
suffix = std::to_string(++num);
|
||||
} else {
|
||||
sheet = workbook_add_worksheet(workbook, efectiveName.c_str());
|
||||
break;
|
||||
}
|
||||
if (num > 100) {
|
||||
throw std::invalid_argument("Couldn't create sheet " + efectiveName);
|
||||
}
|
||||
}
|
||||
return sheet;
|
||||
}
|
||||
|
||||
lxw_workbook* ExcelFile::getWorkbook()
|
||||
{
|
||||
@@ -75,7 +96,7 @@ namespace platform {
|
||||
}
|
||||
void ExcelFile::boldGreen()
|
||||
{
|
||||
boldFontColor(0x00FF00);
|
||||
boldFontColor(0x009900);
|
||||
}
|
||||
void ExcelFile::boldRed()
|
||||
{
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef EXCELFILE_H
|
||||
#define EXCELFILE_H
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include <map>
|
||||
@@ -24,6 +24,7 @@ namespace platform {
|
||||
void boldBlue(); //set blue color for the bold styles
|
||||
void boldGreen(); //set green color for the bold styles
|
||||
void createStyle(const std::string& name, lxw_format* style, bool odd);
|
||||
lxw_worksheet* createWorksheet(const std::string& name);
|
||||
void addColor(lxw_format* style, bool odd);
|
||||
lxw_format* efectiveStyle(const std::string& name);
|
||||
lxw_workbook* workbook;
|
||||
@@ -39,3 +40,4 @@ namespace platform {
|
||||
void setDefault();
|
||||
};
|
||||
}
|
||||
#endif
|
@@ -15,6 +15,10 @@ namespace platform {
|
||||
{Symbols::cross, "Less than or equal to ZeroR"},
|
||||
{Symbols::upward_arrow, oss.str()}
|
||||
};
|
||||
auto env = DotEnv();
|
||||
nodes_label = env.get("nodes");
|
||||
leaves_label = env.get("leaves");
|
||||
depth_label = env.get("depth");
|
||||
}
|
||||
std::string ReportBase::fromVector(const std::string& key)
|
||||
{
|
||||
@@ -57,12 +61,13 @@ namespace platform {
|
||||
}
|
||||
} else {
|
||||
if (data["score_name"].get<std::string>() == "accuracy") {
|
||||
auto dt = Datasets(false, Paths::datasets());
|
||||
dt.loadDataset(dataset);
|
||||
auto numClasses = dt.getNClasses(dataset);
|
||||
auto datasets = Datasets(false, Paths::datasets());
|
||||
auto& dt = datasets.getDataset(dataset);
|
||||
dt.load();
|
||||
auto numClasses = dt.getNClasses();
|
||||
if (numClasses == 2) {
|
||||
std::vector<int> distribution = dt.getClassesCounts(dataset);
|
||||
double nSamples = dt.getNSamples(dataset);
|
||||
std::vector<int> distribution = dt.getClassesCounts();
|
||||
double nSamples = dt.getNSamples();
|
||||
std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
|
||||
double mark = *maxValue / nSamples * (1 + margin);
|
||||
if (mark > 1) {
|
||||
|
@@ -1,14 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef REPORTBASE_H
|
||||
#define REPORTBASE_H
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "common/Paths.h"
|
||||
#include "common/Symbols.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
namespace platform {
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
class ReportBase {
|
||||
public:
|
||||
explicit ReportBase(json data_, bool compare);
|
||||
@@ -27,9 +26,13 @@ namespace platform {
|
||||
double margin;
|
||||
std::map<std::string, std::string> meaning;
|
||||
bool compare;
|
||||
std::string nodes_label;
|
||||
std::string leaves_label;
|
||||
std::string depth_label;
|
||||
private:
|
||||
double bestResult(const std::string& dataset, const std::string& model);
|
||||
json bestResults;
|
||||
bool existBestFile = true;
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -2,7 +2,9 @@
|
||||
#include <locale>
|
||||
#include "best/BestScore.h"
|
||||
#include "common/CLocale.h"
|
||||
#include "common/Timer.h"
|
||||
#include "ReportConsole.h"
|
||||
#include "main/Scores.h"
|
||||
|
||||
namespace platform {
|
||||
std::string ReportConsole::headerLine(const std::string& text, int utf = 0)
|
||||
@@ -22,12 +24,30 @@ namespace platform {
|
||||
+ " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
|
||||
);
|
||||
sheader << headerLine(data["title"].get<std::string>());
|
||||
std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL";
|
||||
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
|
||||
std::string smooth = data.find("smooth_strategy") != data.end() ? data["smooth_strategy"].get<std::string>() : "ORIGINAL";
|
||||
std::string stratified;
|
||||
try {
|
||||
stratified = data["stratified"].get<bool>() ? "True" : "False";
|
||||
}
|
||||
catch (nlohmann::json::type_error) {
|
||||
stratified = data["stratified"].get<int>() == 1 ? "True" : "False";
|
||||
}
|
||||
std::string discretized;
|
||||
try {
|
||||
discretized = data["discretized"].get<bool>() ? "True" : "False";
|
||||
}
|
||||
catch (nlohmann::json::type_error) {
|
||||
discretized = data["discretized"].get<int>() == 1 ? "True" : "False";
|
||||
}
|
||||
sheader << headerLine(
|
||||
"Random seeds: " + fromVector("seeds") + " Discretized: " + (data["discretized"].get<bool>() ? "True" : "False")
|
||||
+ " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False")
|
||||
"Random seeds: " + fromVector("seeds") + " Discretized: " + discretized + " " + algorithm
|
||||
+ " Stratified: " + stratified + " Smooth Strategy: " + smooth
|
||||
);
|
||||
oss << "Execution took " << std::setprecision(2) << std::fixed << data["duration"].get<float>()
|
||||
<< " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<std::string>();
|
||||
Timer timer;
|
||||
oss << "Execution took " << timer.translate2String(data["duration"].get<float>())
|
||||
<< " on " << data["platform"].get<std::string>() << " Language: " << data["language"].get<std::string>();
|
||||
sheader << headerLine(oss.str());
|
||||
sheader << headerLine("Score is " + data["score_name"].get<std::string>());
|
||||
sheader << std::string(MAXL, '*') << std::endl;
|
||||
@@ -35,7 +55,12 @@ namespace platform {
|
||||
}
|
||||
void ReportConsole::header()
|
||||
{
|
||||
std::cout << sheader.str();
|
||||
do_header();
|
||||
}
|
||||
void ReportConsole::body()
|
||||
{
|
||||
do_body();
|
||||
std::cout << sbody.str();
|
||||
}
|
||||
std::string ReportConsole::fileReport()
|
||||
{
|
||||
@@ -48,16 +73,26 @@ namespace platform {
|
||||
void ReportConsole::do_body()
|
||||
{
|
||||
sbody.str("");
|
||||
vbody.clear();
|
||||
auto tmp = ConfigLocale();
|
||||
int maxHyper = 15;
|
||||
int maxDataset = 7;
|
||||
for (const auto& r : data["results"]) {
|
||||
maxHyper = std::max(maxHyper, (int)r["hyperparameters"].dump().size());
|
||||
maxDataset = std::max(maxDataset, (int)r["dataset"].get<std::string>().size());
|
||||
|
||||
}
|
||||
sbody << Colors::GREEN() << " # " << std::setw(maxDataset) << std::left << "Dataset" << " Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << std::endl;
|
||||
sbody << "=== " << std::string(maxDataset, '=') << " ====== ===== === ========= ========= ========= =============== =================== " << std::string(maxHyper, '=') << std::endl;
|
||||
std::vector<std::string> header_labels = { " #", "Dataset", "Sampl.", "Feat.", "Cls", nodes_label, leaves_label, depth_label, "Score", "Time", "Hyperparameters" };
|
||||
sheader << Colors::GREEN();
|
||||
std::vector<int> header_lengths = { 3, maxDataset, 6, 5, 3, 9, 9, 9, 15, 20, maxHyper };
|
||||
for (int i = 0; i < header_labels.size(); i++) {
|
||||
sheader << std::setw(header_lengths[i]) << std::left << header_labels[i] << " ";
|
||||
}
|
||||
sheader << std::endl;
|
||||
for (int i = 0; i < header_labels.size(); i++) {
|
||||
sheader << std::string(header_lengths[i], '=') << " ";
|
||||
}
|
||||
sheader << std::endl;
|
||||
std::cout << sheader.str();
|
||||
json lastResult;
|
||||
double totalScore = 0.0;
|
||||
int index = 0;
|
||||
@@ -67,51 +102,84 @@ namespace platform {
|
||||
continue;
|
||||
}
|
||||
auto color = (index % 2) ? Colors::CYAN() : Colors::BLUE();
|
||||
sbody << color;
|
||||
std::string separator{ " " };
|
||||
if (r.find("notes") != r.end()) {
|
||||
separator = r["notes"].size() > 0 ? Colors::YELLOW() + Symbols::notebook + color : " ";
|
||||
}
|
||||
sbody << std::setw(3) << std::right << index++ << separator;
|
||||
sbody << std::setw(maxDataset) << std::left << r["dataset"].get<std::string>() << " ";
|
||||
sbody << std::setw(6) << std::right << r["samples"].get<int>() << " ";
|
||||
sbody << std::setw(5) << std::right << r["features"].get<int>() << " ";
|
||||
sbody << std::setw(3) << std::right << r["classes"].get<int>() << " ";
|
||||
sbody << std::setw(9) << std::setprecision(2) << std::fixed << r["nodes"].get<float>() << " ";
|
||||
sbody << std::setw(9) << std::setprecision(2) << std::fixed << r["leaves"].get<float>() << " ";
|
||||
sbody << std::setw(9) << std::setprecision(2) << std::fixed << r["depth"].get<float>() << " ";
|
||||
sbody << std::setw(8) << std::right << std::setprecision(6) << std::fixed << r["score"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["score_std"].get<double>();
|
||||
std::stringstream line;
|
||||
line << color;
|
||||
line << std::setw(3) << std::right << index++ << " ";
|
||||
line << std::setw(maxDataset) << std::left << r["dataset"].get<std::string>() << " ";
|
||||
line << std::setw(6) << std::right << r["samples"].get<int>() << " ";
|
||||
line << std::setw(5) << std::right << r["features"].get<int>() << " ";
|
||||
line << std::setw(3) << std::right << r["classes"].get<int>() << " ";
|
||||
line << std::setw(9) << std::setprecision(2) << std::fixed << r["nodes"].get<float>() << " ";
|
||||
line << std::setw(9) << std::setprecision(2) << std::fixed << r["leaves"].get<float>() << " ";
|
||||
line << std::setw(9) << std::setprecision(2) << std::fixed << r["depth"].get<float>() << " ";
|
||||
line << std::setw(8) << std::right << std::setprecision(6) << std::fixed << r["score"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["score_std"].get<double>();
|
||||
const std::string status = compareResult(r["dataset"].get<std::string>(), r["score"].get<double>());
|
||||
sbody << status;
|
||||
sbody << std::setw(12) << std::right << std::setprecision(6) << std::fixed << r["time"].get<double>() << "±" << std::setw(6) << std::setprecision(4) << std::fixed << r["time_std"].get<double>() << " ";
|
||||
sbody << r["hyperparameters"].dump();
|
||||
sbody << std::endl;
|
||||
sbody << std::flush;
|
||||
line << status;
|
||||
line << std::setw(12) << std::right << std::setprecision(6) << std::fixed << r["time"].get<double>() << "±" << std::setw(7) << std::setprecision(4) << std::fixed << r["time_std"].get<double>() << " ";
|
||||
line << r["hyperparameters"].dump();
|
||||
line << std::endl;
|
||||
vbody.push_back(line.str());
|
||||
sbody << line.str();
|
||||
lastResult = r;
|
||||
totalScore += r["score"].get<double>();
|
||||
}
|
||||
if (data["results"].size() == 1 || selectedIndex != -1) {
|
||||
sbody << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
|
||||
std::stringstream line;
|
||||
line << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
|
||||
vbody.push_back(line.str());
|
||||
sbody << line.str();
|
||||
if (lastResult.find("notes") != lastResult.end()) {
|
||||
if (lastResult["notes"].size() > 0) {
|
||||
sbody << headerLine("Notes: ");
|
||||
vbody.push_back(headerLine("Notes: "));
|
||||
for (const auto& note : lastResult["notes"]) {
|
||||
sbody << headerLine(note.get<std::string>());
|
||||
line.str("");
|
||||
line << headerLine(note.get<std::string>());
|
||||
vbody.push_back(line.str());
|
||||
sbody << line.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
sbody << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
|
||||
sbody << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
|
||||
sbody << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
|
||||
sbody << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
|
||||
line.str("");
|
||||
if (lastResult.find("score_train") == lastResult.end()) {
|
||||
line << headerLine("Train score: -");
|
||||
} else {
|
||||
line << headerLine("Train score: " + std::to_string(lastResult["score_train"].get<double>()));
|
||||
}
|
||||
vbody.push_back(line.str()); sbody << line.str();
|
||||
line.str(""); line << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
|
||||
vbody.push_back(line.str()); sbody << line.str();
|
||||
line.str(""); line << headerLine("Test score: " + std::to_string(lastResult["score"].get<double>()));
|
||||
vbody.push_back(line.str()); sbody << line.str();
|
||||
line.str(""); line << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
|
||||
vbody.push_back(line.str()); sbody << line.str();
|
||||
line.str("");
|
||||
if (lastResult.find("train_time") == lastResult.end()) {
|
||||
line << headerLine("Train time: -");
|
||||
} else {
|
||||
line << headerLine("Train time: " + std::to_string(lastResult["train_time"].get<double>()));
|
||||
}
|
||||
vbody.push_back(line.str()); sbody << line.str();
|
||||
line.str(""); line << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
|
||||
vbody.push_back(line.str()); sbody << line.str();
|
||||
line.str("");
|
||||
if (lastResult.find("test_time") == lastResult.end()) {
|
||||
line << headerLine("Test time: -");
|
||||
} else {
|
||||
line << headerLine("Test time: " + std::to_string(lastResult["test_time"].get<double>()));
|
||||
}
|
||||
vbody.push_back(line.str()); sbody << line.str();
|
||||
line.str(""); line << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
|
||||
vbody.push_back(line.str()); sbody << line.str();
|
||||
|
||||
} else {
|
||||
footer(totalScore);
|
||||
}
|
||||
sbody << std::string(MAXL, '*') << Colors::RESET() << std::endl;
|
||||
vbody.push_back(std::string(MAXL, '*') + Colors::RESET() + "\n");
|
||||
if (data["results"].size() == 1 || selectedIndex != -1) {
|
||||
vbody.push_back(buildClassificationReport(lastResult, Colors::BLUE()));
|
||||
}
|
||||
void ReportConsole::body()
|
||||
{
|
||||
std::cout << sbody.str();
|
||||
}
|
||||
void ReportConsole::showSummary()
|
||||
{
|
||||
@@ -121,12 +189,15 @@ namespace platform {
|
||||
oss << std::setw(3) << std::right << item.second << " ";
|
||||
oss << std::left << meaning.at(item.first);
|
||||
sbody << headerLine(oss.str(), 2);
|
||||
vbody.push_back(headerLine(oss.str(), 2));
|
||||
}
|
||||
}
|
||||
|
||||
void ReportConsole::footer(double totalScore)
|
||||
{
|
||||
sbody << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
|
||||
std::stringstream linea;
|
||||
linea << Colors::MAGENTA() << std::string(MAXL, '*') << std::endl;
|
||||
vbody.push_back(linea.str()); sbody << linea.str();
|
||||
showSummary();
|
||||
auto score = data["score_name"].get<std::string>();
|
||||
auto best = BestScore::getScore(score);
|
||||
@@ -134,9 +205,71 @@ namespace platform {
|
||||
std::stringstream oss;
|
||||
oss << score << " compared to " << best.first << " .: " << totalScore / best.second;
|
||||
sbody << headerLine(oss.str());
|
||||
vbody.push_back(headerLine(oss.str()));
|
||||
}
|
||||
if (!getExistBestFile() && compare) {
|
||||
std::cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
|
||||
}
|
||||
}
|
||||
Scores ReportConsole::aggregateScore(json& result, std::string key)
|
||||
{
|
||||
auto scores = Scores(result[key][0]);
|
||||
for (int i = 1; i < result[key].size(); i++) {
|
||||
auto score = Scores(result[key][i]);
|
||||
scores.aggregate(score);
|
||||
}
|
||||
return scores;
|
||||
}
|
||||
std::string ReportConsole::buildClassificationReport(json& result, std::string color)
|
||||
{
|
||||
std::stringstream oss;
|
||||
if (result.find("confusion_matrices") == result.end())
|
||||
return "";
|
||||
bool second_header = false;
|
||||
int lines_header = 0;
|
||||
std::string color_line;
|
||||
std::string suffix = "";
|
||||
auto scores = Scores::create_aggregate(result, "confusion_matrices");
|
||||
auto output_test = scores.classification_report(color, "Test");
|
||||
int maxLine = (*std::max_element(output_test.begin(), output_test.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
|
||||
bool train_data = result.find("confusion_matrices_train") != result.end();
|
||||
std::vector<std::string> output_train;
|
||||
if (train_data) {
|
||||
auto scores_train = Scores::create_aggregate(result, "confusion_matrices_train");
|
||||
output_train = scores_train.classification_report(color, "Train");
|
||||
}
|
||||
oss << Colors::BLUE();
|
||||
for (int i = 0; i < output_test.size(); i++) {
|
||||
if (i < 2 || second_header) {
|
||||
color_line = Colors::GREEN();
|
||||
} else {
|
||||
color_line = Colors::BLUE();
|
||||
if (lines_header > 1)
|
||||
suffix = std::string(14, ' '); // compensate for the color
|
||||
}
|
||||
if (train_data) {
|
||||
oss << color_line << std::left << std::setw(maxLine) << output_train[i]
|
||||
<< suffix << Colors::BLUE() << " | " << color_line << std::left << std::setw(maxLine)
|
||||
<< output_test[i] << std::endl;
|
||||
} else {
|
||||
oss << color_line << output_test[i] << std::endl;
|
||||
}
|
||||
if (output_test[i] == "" || (second_header && lines_header < 2)) {
|
||||
lines_header++;
|
||||
second_header = true;
|
||||
} else {
|
||||
second_header = false;
|
||||
}
|
||||
}
|
||||
oss << Colors::RESET();
|
||||
return oss.str();
|
||||
}
|
||||
std::string ReportConsole::showClassificationReport(std::string color)
|
||||
{
|
||||
std::stringstream oss;
|
||||
for (auto& result : data["results"]) {
|
||||
oss << buildClassificationReport(result, color);
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
}
|
@@ -1,9 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef REPORT_CONSOLE_H
|
||||
#define REPORT_CONSOLE_H
|
||||
#include <string>
|
||||
#include "common/Colors.h"
|
||||
#include <sstream>
|
||||
#include "ReportBase.h"
|
||||
#include "main/Scores.h"
|
||||
|
||||
namespace platform {
|
||||
const int MAXL = 133;
|
||||
@@ -12,16 +13,23 @@ namespace platform {
|
||||
explicit ReportConsole(json data_, bool compare = false, int index = -1) : ReportBase(data_, compare), selectedIndex(index) {};
|
||||
virtual ~ReportConsole() = default;
|
||||
std::string fileReport();
|
||||
std::string getHeader() { do_header(); do_body(); return sheader.str(); }
|
||||
std::vector<std::string>& getBody() { return vbody; }
|
||||
std::string showClassificationReport(std::string color);
|
||||
private:
|
||||
int selectedIndex;
|
||||
std::string headerLine(const std::string& text, int utf);
|
||||
std::string buildClassificationReport(json& result, std::string color);
|
||||
void header() override;
|
||||
void do_header();
|
||||
void body() override;
|
||||
void do_body();
|
||||
void footer(double totalScore);
|
||||
void showSummary() override;
|
||||
Scores aggregateScore(json& result, std::string key);
|
||||
std::stringstream sheader;
|
||||
std::stringstream sbody;
|
||||
std::vector<std::string> vbody;
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -2,10 +2,7 @@
|
||||
#include <locale>
|
||||
#include "best/BestScore.h"
|
||||
#include "ReportExcel.h"
|
||||
|
||||
|
||||
namespace platform {
|
||||
|
||||
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet) : ReportBase(data_, compare), ExcelFile(workbook, worksheet)
|
||||
{
|
||||
createFile();
|
||||
@@ -20,26 +17,7 @@ namespace platform {
|
||||
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
|
||||
}
|
||||
}
|
||||
void ReportExcel::createWorksheet()
|
||||
{
|
||||
const std::string name = data["model"].get<std::string>();
|
||||
std::string suffix = "";
|
||||
std::string efectiveName;
|
||||
int num = 1;
|
||||
// Create a sheet with the name of the model
|
||||
while (true) {
|
||||
efectiveName = name + suffix;
|
||||
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
|
||||
suffix = std::to_string(++num);
|
||||
} else {
|
||||
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
|
||||
break;
|
||||
}
|
||||
if (num > 100) {
|
||||
throw std::invalid_argument("Couldn't create sheet " + efectiveName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ReportExcel::createFile()
|
||||
{
|
||||
@@ -47,7 +25,8 @@ namespace platform {
|
||||
workbook = workbook_new((Paths::excel() + Paths::excelResults()).c_str());
|
||||
}
|
||||
if (worksheet == NULL) {
|
||||
createWorksheet();
|
||||
const std::string name = data["model"].get<std::string>();
|
||||
worksheet = createWorksheet(name);
|
||||
}
|
||||
setProperties(data["title"].get<std::string>());
|
||||
formatColumns();
|
||||
@@ -70,7 +49,10 @@ namespace platform {
|
||||
worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
|
||||
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<std::string>().c_str(), styles["headerRest"]);
|
||||
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<std::string>()).c_str(), styles["headerRest"]);
|
||||
worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
|
||||
writeString(2, 1, "Smooth", "headerRest");
|
||||
std::string smooth = data.find("smooth_strategy") != data.end() ? data["smooth_strategy"].get<std::string>() : "ORIGINAL";
|
||||
writeString(3, 1, smooth, "headerSmall");
|
||||
worksheet_merge_range(worksheet, 2, 2, 3, 3, "Execution time", styles["headerRest"]);
|
||||
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() << " s";
|
||||
worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
|
||||
oss.str("");
|
||||
@@ -86,7 +68,9 @@ namespace platform {
|
||||
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
|
||||
oss.str("");
|
||||
oss.clear();
|
||||
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False");
|
||||
std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp";
|
||||
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
|
||||
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False") << algorithm;
|
||||
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
|
||||
}
|
||||
void ReportExcel::header_notes(int row)
|
||||
@@ -196,6 +180,10 @@ namespace platform {
|
||||
writeDouble(row, ++col, item, style);
|
||||
}
|
||||
}
|
||||
// Classificacion report
|
||||
if (lastResult.find("confusion_matrices") != lastResult.end()) {
|
||||
create_classification_report(lastResult);
|
||||
}
|
||||
// Set with of columns to show those totals completely
|
||||
worksheet_set_column(worksheet, 1, 1, 12, NULL);
|
||||
for (int i = 2; i < 7; ++i) {
|
||||
@@ -206,7 +194,129 @@ namespace platform {
|
||||
footer(totalScore, row);
|
||||
}
|
||||
}
|
||||
void ReportExcel::create_classification_report(const json& result)
|
||||
{
|
||||
|
||||
auto matrix_sheet = createWorksheet("clf_report");
|
||||
lxw_worksheet* tmp = worksheet;
|
||||
worksheet = matrix_sheet;
|
||||
if (matrix_sheet == NULL) {
|
||||
throw std::invalid_argument("Couldn't create sheet classif_report");
|
||||
}
|
||||
row = 1;
|
||||
int col = 0;
|
||||
if (result.find("confusion_matrices_train") != result.end()) {
|
||||
// Train classification report
|
||||
auto score = Scores::create_aggregate(result, "confusion_matrices_train");
|
||||
auto train = score.classification_report_json("Train");
|
||||
std::tie(row, col) = write_classification_report(train, row, 0);
|
||||
int new_row = 0;
|
||||
int new_col = col + 1;
|
||||
for (int i = 0; i < result["confusion_matrices_train"].size(); ++i) {
|
||||
auto item = result["confusion_matrices_train"][i];
|
||||
auto score_item = Scores(item);
|
||||
auto title = "Train Fold " + std::to_string(i);
|
||||
std::tie(new_row, new_col) = write_classification_report(score_item.classification_report_json(title), 1, new_col);
|
||||
new_col++;
|
||||
}
|
||||
col = new_col;
|
||||
worksheet_merge_range(matrix_sheet, 0, 0, 0, col - 1, "Train Classification Report", efectiveStyle("headerRest"));
|
||||
}
|
||||
// Test classification report
|
||||
worksheet_merge_range(matrix_sheet, row, 0, row, col - 1, "Test Classification Report", efectiveStyle("headerRest"));
|
||||
auto score = Scores::create_aggregate(result, "confusion_matrices");
|
||||
auto test = score.classification_report_json("Test");
|
||||
int init_row = ++row;
|
||||
std::tie(row, col) = write_classification_report(test, init_row, 0);
|
||||
int new_row = 0;
|
||||
int new_col = col + 1;
|
||||
for (int i = 0; i < result["confusion_matrices"].size(); ++i) {
|
||||
auto item = result["confusion_matrices"][i];
|
||||
auto score_item = Scores(item);
|
||||
auto title = "Test Fold " + std::to_string(i);
|
||||
std::tie(new_row, new_col) = write_classification_report(score_item.classification_report_json(title), init_row, new_col);
|
||||
new_col++;
|
||||
}
|
||||
// Format columns (change size to fit the content)
|
||||
for (int i = 0; i < new_col; ++i) {
|
||||
// doesn't work with from col to col, so...
|
||||
worksheet_set_column(worksheet, i, i, 12, NULL);
|
||||
}
|
||||
worksheet = tmp;
|
||||
}
|
||||
std::pair<int, int> ReportExcel::write_classification_report(const json& result, int init_row, int init_col)
|
||||
{
|
||||
row = init_row;
|
||||
auto text = result["title"].get<std::string>();
|
||||
worksheet_merge_range(worksheet, row, init_col, row + 1, init_col + 5, text.c_str(), efectiveStyle("bodyHeader"));
|
||||
row += 2;
|
||||
int col = init_col + 2;
|
||||
// Headers
|
||||
bool first_item = true;
|
||||
for (const auto& item : result["headers"]) {
|
||||
auto text = item.get<std::string>();
|
||||
if (first_item) {
|
||||
first_item = false;
|
||||
worksheet_merge_range(worksheet, row, init_col, row, init_col + 1, text.c_str(), efectiveStyle("bodyHeader"));
|
||||
} else {
|
||||
writeString(row, col++, text, "bodyHeader");
|
||||
}
|
||||
}
|
||||
row++;
|
||||
// Classes f1-score
|
||||
for (const auto& item : result["body"]) {
|
||||
col = init_col + 2;
|
||||
for (const auto& value : item) {
|
||||
if (value.is_string()) {
|
||||
worksheet_merge_range(worksheet, row, init_col, row, init_col + 1, value.get<std::string>().c_str(), efectiveStyle("text"));
|
||||
} else {
|
||||
if (value.is_number_integer()) {
|
||||
writeInt(row, col++, value.get<int>(), "ints");
|
||||
} else {
|
||||
writeDouble(row, col++, value.get<double>(), "result");
|
||||
}
|
||||
}
|
||||
}
|
||||
row++;
|
||||
}
|
||||
// Accuracy and average f1-score
|
||||
for (const auto& item : { "accuracy", "averages", "weighted" }) {
|
||||
col = init_col + 2;
|
||||
for (const auto& value : result[item]) {
|
||||
if (value.is_string()) {
|
||||
worksheet_merge_range(worksheet, row, init_col, row, init_col + 1, value.get<std::string>().c_str(), efectiveStyle("text"));
|
||||
} else {
|
||||
if (value.is_number_integer()) {
|
||||
writeInt(row, col++, value.get<int>(), "ints");
|
||||
} else {
|
||||
writeDouble(row, col++, value.get<double>(), "result");
|
||||
}
|
||||
}
|
||||
}
|
||||
row++;
|
||||
}
|
||||
// Confusion matrix
|
||||
auto n_items = result["confusion_matrix"].size();
|
||||
worksheet_merge_range(worksheet, row, init_col, row, init_col + n_items + 1, "Confusion Matrix", efectiveStyle("bodyHeader"));
|
||||
row++;
|
||||
boldGreen();
|
||||
for (int i = 0; i < n_items; ++i) {
|
||||
col = init_col + 2;
|
||||
auto label = result["body"][i][0].get<std::string>();
|
||||
worksheet_merge_range(worksheet, row, init_col, row, init_col + 1, label.c_str(), efectiveStyle("text"));
|
||||
for (int j = 0; j < result["confusion_matrix"][i].size(); ++j) {
|
||||
auto value = result["confusion_matrix"][i][j];
|
||||
if (i == j) {
|
||||
writeInt(row, col++, value.get<int>(), "ints_bold");
|
||||
} else {
|
||||
writeInt(row, col++, value.get<int>(), "ints");
|
||||
}
|
||||
}
|
||||
row++;
|
||||
}
|
||||
int maxcol = std::max(init_col + 5, int(init_col + n_items + 1));
|
||||
return { row, maxcol };
|
||||
}
|
||||
void ReportExcel::showSummary()
|
||||
{
|
||||
for (const auto& item : summary) {
|
||||
@@ -216,7 +326,6 @@ namespace platform {
|
||||
row += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void ReportExcel::footer(double totalScore, int row)
|
||||
{
|
||||
showSummary();
|
||||
|
@@ -1,11 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <xlsxwriter.h>
|
||||
#ifndef REPORT_EXCEL_H
|
||||
#define REPORT_EXCEL_H
|
||||
#include <algorithm>
|
||||
#include "main/Scores.h"
|
||||
#include "common/Colors.h"
|
||||
#include "ReportBase.h"
|
||||
#include "ExcelFile.h"
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
class ReportExcel : public ReportBase, public ExcelFile {
|
||||
public:
|
||||
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL);
|
||||
@@ -13,12 +15,14 @@ namespace platform {
|
||||
private:
|
||||
void formatColumns();
|
||||
void createFile();
|
||||
void createWorksheet();
|
||||
void header() override;
|
||||
void body() override;
|
||||
void showSummary() override;
|
||||
void footer(double totalScore, int row);
|
||||
void append_notes(const json& r, int row);
|
||||
void create_classification_report(const json& result);
|
||||
std::pair<int, int> write_classification_report(const json& result, int init_row, int init_col);
|
||||
void header_notes(int row);
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -4,6 +4,10 @@ namespace platform {
|
||||
|
||||
ReportExcelCompared::ReportExcelCompared(json& data_A, json& data_B) : data_A(data_A), data_B(data_B), ExcelFile(NULL, NULL)
|
||||
{
|
||||
auto env = DotEnv();
|
||||
nodes_label = env.get("nodes");
|
||||
leaves_label = env.get("leaves");
|
||||
depth_label = env.get("depth");
|
||||
}
|
||||
ReportExcelCompared::~ReportExcelCompared()
|
||||
{
|
||||
@@ -35,7 +39,7 @@ namespace platform {
|
||||
}
|
||||
double diff(double a, double b)
|
||||
{
|
||||
return (a - b) / b;
|
||||
return b != 0 ? (a - b) / b : 0.0;
|
||||
}
|
||||
float compute_model_number(json& rA)
|
||||
{
|
||||
@@ -52,7 +56,7 @@ namespace platform {
|
||||
average = true;
|
||||
}
|
||||
}
|
||||
if (average)
|
||||
if (average && num > 0)
|
||||
result = models / num;
|
||||
return result;
|
||||
}
|
||||
@@ -61,7 +65,7 @@ namespace platform {
|
||||
// Body Header
|
||||
auto sizes = std::vector<int>({ 22, 10, 9, 7, 12, 12, 9, 12, 12, 9, 12, 12, 9, 12, 12, 9, 12, 12, 9, 15, 15, 9, 15, 15 });
|
||||
auto head_a = std::vector<std::string>({ "Dataset", "Samples", "Features", "Classes" });
|
||||
auto head_b = std::vector<std::string>({ "Models", "Nodes", "Edges", "States", "Score", "Time" });
|
||||
auto head_b = std::vector<std::string>({ "Models", nodes_label, leaves_label, depth_label, "Score", "Time" });
|
||||
int headerRow = 3;
|
||||
int col = 0;
|
||||
for (const auto& item : head_a) {
|
||||
@@ -104,6 +108,7 @@ namespace platform {
|
||||
totals_A[j] += r_A[key].get<double>();
|
||||
totals_B[j] += r_B[key].get<double>();
|
||||
}
|
||||
std::cout << "After comparing data " << std::endl;
|
||||
if (r_A["dataset"].get<std::string>() != r_B["dataset"].get<std::string>()) {
|
||||
throw std::runtime_error("Datasets are not the same [" + r_A["dataset"].get<std::string>() + "] vs [" + r_B["dataset"].get<std::string>() + "]");
|
||||
}
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#ifndef REPORT_EXCEL_COMPARED_H
|
||||
#define REPORT_EXCEL_COMPARED_H
|
||||
#include "ReportExcel.h"
|
||||
namespace platform {
|
||||
class ReportExcelCompared : public ExcelFile {
|
||||
@@ -12,5 +13,9 @@ namespace platform {
|
||||
void footer(std::vector<double>& totals_A, std::vector<double>& totals_B, int row);
|
||||
json& data_A;
|
||||
json& data_B;
|
||||
std::string nodes_label;
|
||||
std::string leaves_label;
|
||||
std::string depth_label;
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -21,4 +21,3 @@ namespace platform {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef REPORTS_PAGED_H
|
||||
#define REPORTS_PAGED_H
|
||||
#include <locale>
|
||||
#include <sstream>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
class ReportsPaged {
|
||||
public:
|
||||
@@ -23,4 +23,4 @@ namespace platform {
|
||||
std::locale loc;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
@@ -6,6 +6,7 @@
|
||||
#include "common/DotEnv.h"
|
||||
#include "common/CLocale.h"
|
||||
#include "common/Paths.h"
|
||||
#include "common/Symbols.h"
|
||||
#include "Result.h"
|
||||
|
||||
namespace platform {
|
||||
@@ -64,35 +65,63 @@ namespace platform {
|
||||
|
||||
void Result::save()
|
||||
{
|
||||
std::ofstream file(Paths::results() + "/" + getFilename());
|
||||
std::ofstream file(Paths::results() + getFilename());
|
||||
file << data;
|
||||
file.close();
|
||||
}
|
||||
std::string Result::getFilename() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "results_" << data.at("score_name").get<std::string>() << "_" << data.at("model").get<std::string>() << "_"
|
||||
<< data.at("platform").get<std::string>() << "_" << data["date"].get<std::string>() << "_"
|
||||
<< data["time"].get<std::string>() << "_" << (data["stratified"] ? "1" : "0") << ".json";
|
||||
std::string stratified;
|
||||
try {
|
||||
stratified = data["stratified"].get<bool>() ? "1" : "0";
|
||||
}
|
||||
catch (nlohmann::json_abi_v3_11_3::detail::type_error) {
|
||||
stratified = data["stratified"].get<int>() == 1 ? "1" : "0";
|
||||
}
|
||||
oss << "results_"
|
||||
<< data.at("score_name").get<std::string>() << "_"
|
||||
<< data.at("model").get<std::string>() << "_"
|
||||
<< data.at("platform").get<std::string>() << "_"
|
||||
<< data["date"].get<std::string>() << "_"
|
||||
<< data["time"].get<std::string>() << "_"
|
||||
<< stratified << ".json";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
||||
std::string Result::to_string(int maxModel) const
|
||||
std::string Result::to_string(int maxModel, int maxTitle) const
|
||||
{
|
||||
auto tmp = ConfigLocale();
|
||||
std::stringstream oss;
|
||||
std::string s, d;
|
||||
try {
|
||||
s = data["stratified"].get<bool>() ? "S" : " ";
|
||||
}
|
||||
catch (nlohmann::json_abi_v3_11_3::detail::type_error) {
|
||||
s = data["stratified"].get<int>() == 1 ? "S" : " ";
|
||||
}
|
||||
try {
|
||||
d = data["discretized"].get<bool>() ? "D" : " ";
|
||||
}
|
||||
catch (nlohmann::json_abi_v3_11_3::detail::type_error) {
|
||||
d = data["discretized"].get<int>() == 1 ? "D" : " ";
|
||||
}
|
||||
auto duration = data["duration"].get<double>();
|
||||
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
|
||||
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
|
||||
oss << data["date"].get<std::string>() << " ";
|
||||
oss << std::setw(maxModel) << std::left << data["model"].get<std::string>() << " ";
|
||||
oss << std::setw(10) << std::left << data["score_name"].get<std::string>() << " ";
|
||||
oss << std::right << std::setw(9) << std::setprecision(7) << std::fixed << score << " ";
|
||||
oss << std::setw(11) << std::left << data["score_name"].get<std::string>() << " ";
|
||||
oss << std::right << std::setw(10) << std::setprecision(7) << std::fixed << score << " ";
|
||||
oss << std::left << std::setw(12) << data["platform"].get<std::string>() << " ";
|
||||
oss << s << d << " ";
|
||||
auto completeString = isComplete() ? "C" : "P";
|
||||
oss << std::setw(1) << " " << completeString << " ";
|
||||
oss << std::setw(5) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
|
||||
oss << std::setw(50) << std::left << data["title"].get<std::string>() << " ";
|
||||
oss << std::setw(5) << std::right << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
|
||||
auto title = data["title"].get<std::string>();
|
||||
if (title.size() > maxTitle) {
|
||||
title = title.substr(0, maxTitle - 1) + Symbols::ellipsis;
|
||||
}
|
||||
oss << std::setw(maxTitle) << std::left << title;
|
||||
return oss.str();
|
||||
}
|
||||
}
|
@@ -1,5 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef RESULT_H
|
||||
#define RESULT_H
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
@@ -9,7 +9,7 @@
|
||||
#include "main/PartialResult.h"
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
class Result {
|
||||
public:
|
||||
@@ -18,7 +18,7 @@ namespace platform {
|
||||
void save();
|
||||
// Getters
|
||||
json getJson();
|
||||
std::string to_string(int maxModel) const;
|
||||
std::string to_string(int maxModel, int maxTitle) const;
|
||||
std::string getFilename() const;
|
||||
std::string getDate() const { return data["date"].get<std::string>(); };
|
||||
std::string getTime() const { return data["time"].get<std::string>(); };
|
||||
@@ -26,11 +26,15 @@ namespace platform {
|
||||
std::string getTitle() const { return data["title"].get<std::string>(); };
|
||||
double getDuration() const { return data["duration"]; };
|
||||
std::string getModel() const { return data["model"].get<std::string>(); };
|
||||
std::string getPlatform() const { return data["platform"].get<std::string>(); };
|
||||
std::string getScoreName() const { return data["score_name"].get<std::string>(); };
|
||||
|
||||
bool isComplete() const { return complete; };
|
||||
json getData() const { return data; }
|
||||
// Setters
|
||||
void setTitle(const std::string& title) { data["title"] = title; };
|
||||
void setSmoothStrategy(const std::string& smooth_strategy) { data["smooth_strategy"] = smooth_strategy; };
|
||||
void setDiscretizationAlgorithm(const std::string& discretization_algo) { data["discretization_algorithm"] = discretization_algo; };
|
||||
void setLanguage(const std::string& language) { data["language"] = language; };
|
||||
void setLanguageVersion(const std::string& language_version) { data["language_version"] = language_version; };
|
||||
void setDuration(double duration) { data["duration"] = duration; };
|
||||
@@ -43,10 +47,10 @@ namespace platform {
|
||||
void setStratified(bool stratified) { data["stratified"] = stratified; };
|
||||
void setNFolds(int nfolds) { data["folds"] = nfolds; };
|
||||
void setPlatform(const std::string& platform_name) { data["platform"] = platform_name; };
|
||||
|
||||
private:
|
||||
json data;
|
||||
bool complete;
|
||||
double score = 0.0;
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -31,6 +31,8 @@ namespace platform {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (files.empty())
|
||||
return;
|
||||
maxModel = std::max(size_t(5), (*max_element(files.begin(), files.end(), [](const Result& a, const Result& b) { return a.getModel().size() < b.getModel().size(); })).getModel().size());
|
||||
maxFile = std::max(size_t(4), (*max_element(files.begin(), files.end(), [](const Result& a, const Result& b) { return a.getFilename().size() < b.getFilename().size(); })).getFilename().size());
|
||||
}
|
||||
|
@@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef RESULTSDATASET_H
|
||||
#define RESULTSDATASET_H
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "results/Result.h"
|
||||
namespace platform {
|
||||
using json = nlohmann::json;
|
||||
using json = nlohmann::ordered_json;
|
||||
class ResultsDataset {
|
||||
public:
|
||||
ResultsDataset(const std::string& dataset, const std::string& model, const std::string& score);
|
||||
@@ -32,3 +32,4 @@ namespace platform {
|
||||
std::vector<Result> files;
|
||||
};
|
||||
};
|
||||
#endif
|
@@ -4,15 +4,15 @@
|
||||
#include "results/ResultsDataset.h"
|
||||
#include "ResultsDatasetConsole.h"
|
||||
namespace platform {
|
||||
void ResultsDatasetsConsole::report(const std::string& dataset, const std::string& score, const std::string& model)
|
||||
bool ResultsDatasetsConsole::report(const std::string& dataset, const std::string& score, const std::string& model)
|
||||
{
|
||||
auto results = platform::ResultsDataset(dataset, model, score);
|
||||
results.load();
|
||||
results.sortModel();
|
||||
if (results.empty()) {
|
||||
std::cerr << Colors::RED() << "No results found for dataset " << dataset << " and model " << model << Colors::RESET() << std::endl;
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
results.sortModel();
|
||||
int maxModel = results.maxModelSize();
|
||||
int maxHyper = results.maxHyperSize();
|
||||
double maxResult = results.maxResultScore();
|
||||
@@ -76,6 +76,7 @@ namespace platform {
|
||||
oss << item["hyperparameters"].get<std::string>() << std::endl;
|
||||
body.push_back(oss.str());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@@ -1,21 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef RESULTSDATASETSCONSOLE_H
|
||||
#define RESULTSDATASETSCONSOLE_H
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "results/ResultsDataset.h"
|
||||
#include "ReportsPaged.h"
|
||||
#include "reports/ReportsPaged.h"
|
||||
#include "ResultsDataset.h"
|
||||
|
||||
namespace platform {
|
||||
class ResultsDatasetsConsole : public ReportsPaged {
|
||||
public:
|
||||
ResultsDatasetsConsole() = default;
|
||||
~ResultsDatasetsConsole() = default;
|
||||
void report(const std::string& dataset, const std::string& score, const std::string& model);
|
||||
bool report(const std::string& dataset, const std::string& score, const std::string& model);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
@@ -20,7 +20,7 @@ namespace platform {
|
||||
// Body header
|
||||
row = 2;
|
||||
int col = 0;
|
||||
for (const auto& name : { "Nº", "Model", "Date", "Time", "Score", "Hyperparameters" }) {
|
||||
for (const auto& name : { "#", "Model", "Date", "Time", "Score", "Hyperparameters" }) {
|
||||
writeString(row, col++, name, "bodyHeader");
|
||||
}
|
||||
// Body
|
||||
|
@@ -1,11 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef RESULTSDATASETEXCEL_H
|
||||
#define RESULTSDATASETEXCEL_H
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "reports/ExcelFile.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace platform {
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
class ResultsDatasetExcel : public ExcelFile {
|
||||
public:
|
||||
@@ -14,3 +14,4 @@ namespace platform {
|
||||
void report(json& data);
|
||||
};
|
||||
}
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user