Compare commits

68 Commits

Author SHA1 Message Date
ba455bb934 Rename config.h to config_platform.h 2024-12-13 19:57:05 +01:00
a65955248a Add mdlp as dependency 2024-12-13 10:28:27 +01:00
84930b0537 Remove lib/mdlp folder 2024-12-13 10:11:45 +01:00
10c65f44a0 Add mdlp library dependency 2024-12-13 09:55:37 +01:00
6d112f01e7 Remove external library dependency 2024-12-13 09:49:46 +01:00
401296293b Add header to b_main time 2024-12-11 23:18:20 +01:00
9566ae4cf6 Fix gridsearch discretize_algo mistake 2024-12-11 12:45:16 +01:00
55187ee521 Add time to experiment seed 2024-12-11 10:05:24 +01:00
68ea06d129 Fix fimdlp library includes 2024-11-20 21:19:35 +01:00
6c1d1d0d32 Remove mdlp files 2024-11-20 21:14:42 +01:00
b0853d169b Remove mdlp submodule 2024-11-20 21:14:19 +01:00
26f8e07774 Remove Python 3.11 only requirement 2024-11-20 20:21:39 +01:00
315dfb104f Add train test time to report console 2024-10-25 09:53:31 +02:00
381f226d53 Fix pm code in tex bestresults 2024-10-15 10:32:28 +02:00
ea13835701 Add Markdown best results output 2024-10-07 18:08:42 +02:00
d75468cf78 Replace Nº with # in output labels 2024-09-28 22:55:11 +02:00
c58bd9d60d add score name to best results excel file name 2024-09-28 18:58:49 +02:00
148a3b831a Add missing \ to results.tex 2024-09-03 12:57:22 +02:00
69063badbb Fix status error in holm.tex 2024-09-03 12:54:09 +02:00
6ae2b2182a Complete Tex output with Holm test 2024-09-03 12:43:50 +02:00
4dbd76df55 Continue TeX output 2024-09-02 20:30:47 +02:00
4545f76667 Begin adding TeX output to b_best -m any command 2024-09-02 18:14:53 +02:00
8372987dae Update sample to last library version 2024-08-31 12:41:11 +02:00
d72943c749 Fix hyperparams mistake 2024-08-07 10:52:04 +02:00
800246acd2 Accept nested hyperparameters in b_main 2024-08-04 17:19:31 +02:00
0ea967dd9d Support b_main with best hyperparameters 2024-08-02 19:10:25 +02:00
97abec8b69 Fix hide result error 2024-08-02 12:02:11 +02:00
17c9522e77 Add support to old format results 2024-07-25 17:06:31 +02:00
45af550cf9 Change time showed in report 2024-07-24 18:40:59 +02:00
5d5f49777e Fix wrong columns message 2024-07-16 11:30:28 +02:00
540a8ea06d Refactor update rows 2024-07-16 10:33:44 +02:00
1924c4392b Adapt screen to resized window 2024-07-16 10:25:15 +02:00
f2556a30af Add screen width control in b_manage 2024-07-15 18:06:39 +02:00
2f2ed00ca1 Add roc-auc-ovr as score to b_main 2024-07-14 12:48:33 +02:00
28f6a0d7a7 RocAuc refactor to speed up binary classif. problems 2024-07-13 16:54:34 +02:00
028522f180 Add AUC to reportConsole 2024-07-12 17:41:23 +02:00
84adf13a79 Add AUC computing in Experiment and store in result 2024-07-12 17:23:03 +02:00
26dfe6d056 Add Graphs to results
Add bin5..bin10 q & u discretizers algos
Fix trouble in computing states
Update mdlp to 2.0.0
2024-07-11 11:23:20 +02:00
3acc34e4c6 Fix title mistake in b_main 2024-06-17 19:07:15 +02:00
8f92b74260 Change Constant smooth type 2024-06-14 10:16:32 +02:00
3d900f8c81 Update models versions 2024-06-13 12:30:31 +02:00
e628d80f4c Experiment working with smoothing and disc-algo 2024-06-11 13:52:26 +02:00
0f06f8971e Change default smooth type in Experiment 2024-06-10 15:50:54 +02:00
f800772149 Add new hyperparameters validation in b_main 2024-06-10 10:16:07 +02:00
b8a8ddaf8c Add smooth strategy to hyperparameter in b_main
Add smooth strategy to reports
2024-06-09 20:46:14 +02:00
90555489ff Add discretiz_algo to b_main as hyperparameter 2024-06-09 11:35:50 +02:00
080f3cee34 Add discretization algo to reports 2024-06-09 01:11:56 +02:00
643633e6dd fit discretizer only with train data 2024-06-09 00:50:55 +02:00
361c51d864 Add traintest split in gridsearch 2024-06-07 11:05:59 +02:00
5dd3deca1a Add discretiz algorithm management to b_main & Dataset 2024-06-07 09:00:51 +02:00
2202a81782 Add discretization algo to result 2024-06-06 18:33:01 +02:00
c4f4e332f6 Add parsing to DotEnv 2024-06-06 17:55:39 +02:00
a7ec930fa0 Add numeric features management to Dataset 2024-06-06 13:03:57 +02:00
6858b3d89a Remove model selection from b_best and b_list 2024-06-03 17:09:45 +02:00
5fb176d78a Add message of the file saved in b_main 2024-05-29 20:52:25 +02:00
f5d5c35002 Add generate-fold-files to b_main 2024-05-28 10:52:08 +02:00
b34af13eea Add new Files library 2024-05-26 17:27:42 +02:00
e3a06264a9 Remove old Files library 2024-05-26 17:25:36 +02:00
df82f82e88 Add F column to b_best in excel 2024-05-21 08:45:17 +02:00
886dde7a06 Fix various classification reports in the same excel book 2024-05-19 18:53:55 +02:00
88468434e7 Add color and fix format in classification report in excel 2024-05-19 11:12:31 +02:00
ad5c3319bd Complete excel classification report 2024-05-18 22:59:37 +02:00
594adb0534 Begin classification report in excel 2024-05-18 21:37:34 +02:00
b9e0c92334 Move ResultsDatasetConsole to results folder 2024-05-18 18:41:17 +02:00
25bd7a42c6 Replacce pragma once with ifndef 2024-05-18 13:00:13 +02:00
c165a4bdda Fix refactor of static aggregate method 2024-05-17 23:38:21 +02:00
49a36904dc Refactor aggregate score to a constructor 2024-05-17 22:52:13 +02:00
577351eda5 put using json=nlohmann:ordered_json under namespace platform 2024-05-17 18:32:01 +02:00
98 changed files with 2196 additions and 1133 deletions

10
.gitmodules vendored
View File

@@ -10,10 +10,12 @@
[submodule "lib/libxlsxwriter"] [submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git url = https://github.com/jmcnamara/libxlsxwriter.git
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
update = merge
[submodule "lib/folding"] [submodule "lib/folding"]
path = lib/folding path = lib/folding
url = https://github.com/rmontanana/folding url = https://github.com/rmontanana/folding
[submodule "lib/Files"]
path = lib/Files
url = https://github.com/rmontanana/ArffFiles
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20) cmake_minimum_required(VERSION 3.20)
project(Platform project(Platform
VERSION 1.0.4 VERSION 1.1.0
DESCRIPTION "Platform to run Experiments with classifiers." DESCRIPTION "Platform to run Experiments with classifiers."
HOMEPAGE_URL "https://github.com/rmontanana/platform" HOMEPAGE_URL "https://github.com/rmontanana/platform"
LANGUAGES CXX LANGUAGES CXX
@@ -21,7 +21,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast")
set(CMAKE_CXX_FLAGS_DEBUG " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g") set(CMAKE_CXX_FLAGS_DEBUG " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
# Options # Options
@@ -46,7 +46,7 @@ if(Boost_FOUND)
endif() endif()
# Python # Python
find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED) find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}") message("Python3_LIBRARIES=${Python3_LIBRARIES}")
# CMakes modules # CMakes modules
@@ -88,10 +88,9 @@ message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
## Configure test data path ## Configure test data path
cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data") cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h") configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
add_subdirectory(lib/Files)
add_subdirectory(config) add_subdirectory(config)
add_subdirectory(src) add_subdirectory(src)
add_subdirectory(sample) # add_subdirectory(sample)
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${Platform_SOURCE_DIR}/src/*.cpp) file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${Platform_SOURCE_DIR}/src/*.cpp)
# Testing # Testing

View File

@@ -6,7 +6,6 @@ f_release = build_release
f_debug = build_debug f_debug = build_debug
app_targets = b_best b_list b_main b_manage b_grid app_targets = b_best b_list b_main b_manage b_grid
test_targets = unit_tests_platform test_targets = unit_tests_platform
n_procs = -j 16
define ClearTests define ClearTests
@for t in $(test_targets); do \ @for t in $(test_targets); do \
@@ -56,10 +55,10 @@ dependency: ## Create a dependency graph diagram of the project (build/dependenc
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
buildd: ## Build the debug targets buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) PlatformSample $(n_procs) cmake --build $(f_debug) -t $(app_targets) PlatformSample --parallel
buildr: ## Build the release targets buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) $(n_procs) cmake --build $(f_release) -t $(app_targets) --parallel
clean: ## Clean the tests info clean: ## Clean the tests info
@echo ">>> Cleaning Debug Platform tests..."; @echo ">>> Cleaning Debug Platform tests...";
@@ -87,7 +86,7 @@ opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running Platform tests..."; @echo ">>> Running Platform tests...";
@$(MAKE) clean @$(MAKE) clean
@cmake --build $(f_debug) -t $(test_targets) $(n_procs) @cmake --build $(f_debug) -t $(test_targets) --parallel
@for t in $(test_targets); do \ @for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \ if [ -f $(f_debug)/tests/$$t ]; then \
cd $(f_debug)/tests ; \ cd $(f_debug)/tests ; \

View File

@@ -20,11 +20,18 @@ In Linux sometimes the library libstdc++ is mistaken from the miniconda installa
libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by b_xxxx) libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by b_xxxx)
``` ```
The solution is to erase the libstdc++ library from the miniconda installation: The solution is to erase the libstdc++ library from the miniconda installation and no further compilation is needed.
### MPI ### MPI
In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable: In Linux just install openmpi & openmpi-devel packages.
```bash
source /etc/profile.d/modules.sh
module load mpi/openmpi-x86_64
```
If cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
```bash ```bash
export MPI_HOME="/usr/lib64/openmpi" export MPI_HOME="/usr/lib64/openmpi"

View File

@@ -1,4 +1,4 @@
configure_file( configure_file(
"config.h.in" "config.h.in"
"${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES "${CMAKE_BINARY_DIR}/configured_files/include/config_platform.h" ESCAPE_QUOTES
) )

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef PLATFORM_H
#define PLATFORM_H
#include <string> #include <string>
#include <string_view> #include <string_view>
@@ -8,3 +8,4 @@ static constexpr std::string_view platform_project_version = "@PROJECT_VERSION@"
static constexpr std::string_view platform_project_description = "@PROJECT_DESCRIPTION@"; static constexpr std::string_view platform_project_description = "@PROJECT_DESCRIPTION@";
static constexpr std::string_view platform_git_sha = "@GIT_SHA@"; static constexpr std::string_view platform_git_sha = "@GIT_SHA@";
static constexpr std::string_view platform_data_path = "@Platform_SOURCE_DIR@/tests/data/"; static constexpr std::string_view platform_data_path = "@Platform_SOURCE_DIR@/tests/data/";
#endif

View File

@@ -1,8 +1,3 @@
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"] [submodule "lib/catch2"]
path = lib/catch2 path = lib/catch2
main = v2.x main = v2.x

1
lib/Files Submodule

Submodule lib/Files added at a4329f5f9d

View File

@@ -1,176 +0,0 @@
#include "ArffFiles.h"
#include <fstream>
#include <sstream>
#include <map>
#include <cctype> // std::isdigit
#include <algorithm> // std::all_of
#include <iostream>
ArffFiles::ArffFiles() = default;
std::vector<std::string> ArffFiles::getLines() const
{
return lines;
}
unsigned long int ArffFiles::getSize() const
{
return lines.size();
}
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
{
return attributes;
}
std::string ArffFiles::getClassName() const
{
return className;
}
std::string ArffFiles::getClassType() const
{
return classType;
}
std::vector<std::vector<float>>& ArffFiles::getX()
{
return X;
}
std::vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::loadCommon(std::string fileName)
{
std::ifstream file(fileName);
if (!file.is_open()) {
throw std::invalid_argument("Unable to open file");
}
std::string line;
std::string keyword;
std::string attribute;
std::string type;
std::string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
std::stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
type += type_w + " ";
attributes.emplace_back(trim(attribute), trim(type));
continue;
}
if (line[0] == '@') {
continue;
}
lines.push_back(line);
}
file.close();
if (attributes.empty())
throw std::invalid_argument("No attributes found");
}
void ArffFiles::load(const std::string& fileName, bool classLast)
{
int labelIndex;
loadCommon(fileName);
if (classLast) {
className = std::get<0>(attributes.back());
classType = std::get<1>(attributes.back());
attributes.pop_back();
labelIndex = static_cast<int>(attributes.size());
} else {
className = std::get<0>(attributes.front());
classType = std::get<1>(attributes.front());
attributes.erase(attributes.begin());
labelIndex = 0;
}
generateDataset(labelIndex);
}
void ArffFiles::load(const std::string& fileName, const std::string& name)
{
int labelIndex;
loadCommon(fileName);
bool found = false;
for (int i = 0; i < attributes.size(); ++i) {
if (attributes[i].first == name) {
className = std::get<0>(attributes[i]);
classType = std::get<1>(attributes[i]);
attributes.erase(attributes.begin() + i);
labelIndex = i;
found = true;
break;
}
}
if (!found) {
throw std::invalid_argument("Class name not found");
}
generateDataset(labelIndex);
}
void ArffFiles::generateDataset(int labelIndex)
{
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
auto yy = std::vector<std::string>(lines.size(), "");
auto removeLines = std::vector<int>(); // Lines with missing values
for (size_t i = 0; i < lines.size(); i++) {
std::stringstream ss(lines[i]);
std::string value;
int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) {
if (pos++ == labelIndex) {
yy[i] = value;
} else {
if (value == "?") {
X[xIndex++][i] = -1;
removeLines.push_back(i);
} else
X[xIndex++][i] = stof(value);
}
}
}
for (auto i : removeLines) {
yy.erase(yy.begin() + i);
for (auto& x : X) {
x.erase(x.begin() + i);
}
}
y = factorize(yy);
}
std::string ArffFiles::trim(const std::string& source)
{
std::string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s;
}
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
{
std::vector<int> yy;
labels.clear();
yy.reserve(labels_t.size());
std::map<std::string, int> labelMap;
int i = 0;
for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
bool allDigits = std::all_of(label.begin(), label.end(), isdigit);
if (allDigits)
labels.push_back("Class " + label);
else
labels.push_back(label);
}
yy.push_back(labelMap[label]);
}
return yy;
}

View File

@@ -1,34 +0,0 @@
#ifndef ARFFFILES_H
#define ARFFFILES_H
#include <string>
#include <vector>
class ArffFiles {
public:
ArffFiles();
void load(const std::string&, bool = true);
void load(const std::string&, const std::string&);
std::vector<std::string> getLines() const;
unsigned long int getSize() const;
std::string getClassName() const;
std::string getClassType() const;
std::vector<std::string> getLabels() const { return labels; }
static std::string trim(const std::string&);
std::vector<std::vector<float>>& getX();
std::vector<int>& getY();
std::vector<std::pair<std::string, std::string>> getAttributes() const;
std::vector<int> factorize(const std::vector<std::string>& labels_t);
private:
std::vector<std::string> lines;
std::vector<std::pair<std::string, std::string>> attributes;
std::string className;
std::string classType;
std::vector<std::vector<float>> X;
std::vector<int> y;
std::vector<std::string> labels;
void generateDataset(int);
void loadCommon(std::string);
};
#endif

View File

@@ -1 +0,0 @@
add_library(ArffFiles ArffFiles.cc)

View File

@@ -3,7 +3,7 @@ include_directories(
${Platform_SOURCE_DIR}/src/main ${Platform_SOURCE_DIR}/src/main
${Python3_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS}
${Platform_SOURCE_DIR}/lib/Files ${Platform_SOURCE_DIR}/lib/Files
${Platform_SOURCE_DIR}/lib/mdlp ${Platform_SOURCE_DIR}/lib/mdlp/src
${Platform_SOURCE_DIR}/lib/argparse/include ${Platform_SOURCE_DIR}/lib/argparse/include
${Platform_SOURCE_DIR}/lib/folding ${Platform_SOURCE_DIR}/lib/folding
${Platform_SOURCE_DIR}/lib/json/include ${Platform_SOURCE_DIR}/lib/json/include
@@ -12,4 +12,4 @@ include_directories(
${Bayesnet_INCLUDE_DIRS} ${Bayesnet_INCLUDE_DIRS}
) )
add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp) add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp)
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy) target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)

View File

@@ -5,13 +5,13 @@
#include <torch/torch.h> #include <torch/torch.h>
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include <ArffFiles.h> #include <ArffFiles.hpp>
#include <CPPFImdlp.h> #include <fimdlp/CPPFImdlp.h>
#include <folding.hpp> #include <folding.hpp>
#include <bayesnet/utils/BayesMetrics.h> #include <bayesnet/utils/BayesMetrics.h>
#include "Models.h" #include "Models.h"
#include "modelRegister.h" #include "modelRegister.h"
#include "config.h" #include "config_platform.h"
const std::string PATH = { platform_data_path.begin(), platform_data_path.end() }; const std::string PATH = { platform_data_path.begin(), platform_data_path.end() };
@@ -79,11 +79,11 @@ int main(int argc, char** argv)
} }
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}"); throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
} }
); );
program.add_argument("-p", "--path") program.add_argument("-p", "--path")
.help(" folder where the data files are located, default") .help(" folder where the data files are located, default")
.default_value(std::string{ PATH } .default_value(std::string{ PATH }
); );
program.add_argument("-m", "--model") program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString()) .help("Model to use " + platform::Models::instance()->toString())
.action([](const std::string& value) { .action([](const std::string& value) {
@@ -93,7 +93,7 @@ int main(int argc, char** argv)
} }
throw runtime_error("Model must be one of " + platform::Models::instance()->toString()); throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
} }
); );
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true); program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
@@ -112,129 +112,130 @@ int main(int argc, char** argv)
catch (...) { catch (...) {
throw runtime_error("Number of folds must be an integer"); throw runtime_error("Number of folds must be an integer");
}}); }});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors, dump_cpt; bool class_last, stratified, tensors, dump_cpt;
std::string model_name, file_name, path, complete_file_name; std::string model_name, file_name, path, complete_file_name;
int nFolds, seed; int nFolds, seed;
try { try {
program.parse_args(argc, argv); program.parse_args(argc, argv);
file_name = program.get<std::string>("dataset"); file_name = program.get<std::string>("dataset");
path = program.get<std::string>("path"); path = program.get<std::string>("path");
model_name = program.get<std::string>("model"); model_name = program.get<std::string>("model");
complete_file_name = path + file_name + ".arff"; complete_file_name = path + file_name + ".arff";
stratified = program.get<bool>("stratified"); stratified = program.get<bool>("stratified");
tensors = program.get<bool>("tensors"); tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds"); nFolds = program.get<int>("folds");
seed = program.get<int>("seed"); seed = program.get<int>("seed");
dump_cpt = program.get<bool>("dumpcpt"); dump_cpt = program.get<bool>("dumpcpt");
class_last = datasets[file_name]; class_last = datasets[file_name];
if (!file_exists(complete_file_name)) { if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
}
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
} }
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
/* /*
* Begin Processing * Begin Processing
*/ */
auto handler = ArffFiles(); auto handler = ArffFiles();
handler.load(complete_file_name, class_last); handler.load(complete_file_name, class_last);
// Get Dataset X, y // Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX(); std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY(); mdlp::labels_t& y = handler.getY();
// Get className & Features // Get className & Features
auto className = handler.getClassName(); auto className = handler.getClassName();
std::vector<std::string> features; std::vector<std::string> features;
auto attributes = handler.getAttributes(); auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), transform(attributes.begin(), attributes.end(), back_inserter(features),
[](const pair<std::string, std::string>& item) { return item.first; }); [](const pair<std::string, std::string>& item) { return item.first; });
// Discretize Dataset // Discretize Dataset
auto [Xd, maxes] = discretize(X, y, features); auto [Xd, maxes] = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1; maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<std::string, std::vector<int>> states; map<std::string, std::vector<int>> states;
for (auto feature : features) { for (auto feature : features) {
states[feature] = std::vector<int>(maxes[feature]); states[feature] = std::vector<int>(maxes[feature]);
}
states[className] = std::vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states);
if (dump_cpt) {
std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt();
}
auto lines = clf->show();
for (auto line : lines) {
std::cout << line << std::endl;
}
std::cout << "--- Topological Order ---" << std::endl;
auto order = clf->topological_order();
for (auto name : order) {
std::cout << name << ", ";
}
std::cout << "end." << std::endl;
auto score = clf->score(Xd, y);
std::cout << "Score: " << score << std::endl;
auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot");
file << graph;
file.close();
std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
std::string stratified_string = stratified ? " Stratified" : "";
std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
std::cout << "==========================================" << std::endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
}
float total_score = 0, total_score_train = 0, score_train, score_test;
folding::Fold* fold;
double nodes = 0.0;
if (stratified)
fold = new folding::StratifiedKFold(nFolds, y, seed);
else
fold = new folding::KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i);
std::cout << "Fold: " << i + 1 << std::endl;
if (tensors) {
auto ttrain = torch::tensor(train, torch::kInt64);
auto ttest = torch::tensor(test, torch::kInt64);
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
torch::Tensor ytraint = yt.index({ ttrain });
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest });
clf->fit(Xtraint, ytraint, features, className, states);
auto temp = clf->predict(Xtraint);
score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt);
} else {
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
auto [Xtest, ytest] = extract_indices(test, Xd, y);
clf->fit(Xtrain, ytrain, features, className, states);
std::cout << "Nodes: " << clf->getNumberOfNodes() << std::endl;
nodes += clf->getNumberOfNodes();
score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest);
} }
states[className] = std::vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name);
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::ORIGINAL;
clf->fit(Xd, y, features, className, states, smoothing);
if (dump_cpt) { if (dump_cpt) {
std::cout << "--- CPT Tables ---" << std::endl; std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt(); clf->dump_cpt();
} }
total_score_train += score_train; auto lines = clf->show();
total_score += score_test; for (auto line : lines) {
std::cout << "Score Train: " << score_train << std::endl; std::cout << line << std::endl;
std::cout << "Score Test : " << score_test << std::endl; }
std::cout << "-------------------------------------------------------------------------------" << std::endl; std::cout << "--- Topological Order ---" << std::endl;
} auto order = clf->topological_order();
std::cout << "Nodes: " << nodes / nFolds << std::endl; for (auto name : order) {
std::cout << "**********************************************************************************" << std::endl; std::cout << name << ", ";
std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl; }
std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0; std::cout << "end." << std::endl;
auto score = clf->score(Xd, y);
std::cout << "Score: " << score << std::endl;
auto graph = clf->graph();
auto dot_file = model_name + "_" + file_name;
ofstream file(dot_file + ".dot");
file << graph;
file.close();
std::cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << std::endl;
std::cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << std::endl;
std::string stratified_string = stratified ? " Stratified" : "";
std::cout << nFolds << " Folds" << stratified_string << " Cross validation" << std::endl;
std::cout << "==========================================" << std::endl;
torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
torch::Tensor yt = torch::tensor(y, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
}
float total_score = 0, total_score_train = 0, score_train, score_test;
folding::Fold* fold;
double nodes = 0.0;
if (stratified)
fold = new folding::StratifiedKFold(nFolds, y, seed);
else
fold = new folding::KFold(nFolds, y.size(), seed);
for (auto i = 0; i < nFolds; ++i) {
auto [train, test] = fold->getFold(i);
std::cout << "Fold: " << i + 1 << std::endl;
if (tensors) {
auto ttrain = torch::tensor(train, torch::kInt64);
auto ttest = torch::tensor(test, torch::kInt64);
torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
torch::Tensor ytraint = yt.index({ ttrain });
torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
torch::Tensor ytestt = yt.index({ ttest });
clf->fit(Xtraint, ytraint, features, className, states, smoothing);
auto temp = clf->predict(Xtraint);
score_train = clf->score(Xtraint, ytraint);
score_test = clf->score(Xtestt, ytestt);
} else {
auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
auto [Xtest, ytest] = extract_indices(test, Xd, y);
clf->fit(Xtrain, ytrain, features, className, states, smoothing);
std::cout << "Nodes: " << clf->getNumberOfNodes() << std::endl;
nodes += clf->getNumberOfNodes();
score_train = clf->score(Xtrain, ytrain);
score_test = clf->score(Xtest, ytest);
}
if (dump_cpt) {
std::cout << "--- CPT Tables ---" << std::endl;
clf->dump_cpt();
}
total_score_train += score_train;
total_score += score_test;
std::cout << "Score Train: " << score_train << std::endl;
std::cout << "Score Test : " << score_test << std::endl;
std::cout << "-------------------------------------------------------------------------------" << std::endl;
}
std::cout << "Nodes: " << nodes / nFolds << std::endl;
std::cout << "**********************************************************************************" << std::endl;
std::cout << "Average Score Train: " << total_score_train / nFolds << std::endl;
std::cout << "Average Score Test : " << total_score / nFolds << std::endl;return 0;
} }

View File

@@ -2,7 +2,7 @@ include_directories(
## Libs ## Libs
${Platform_SOURCE_DIR}/lib/Files ${Platform_SOURCE_DIR}/lib/Files
${Platform_SOURCE_DIR}/lib/folding ${Platform_SOURCE_DIR}/lib/folding
${Platform_SOURCE_DIR}/lib/mdlp ${Platform_SOURCE_DIR}/lib/mdlp/src
${Platform_SOURCE_DIR}/lib/argparse/include ${Platform_SOURCE_DIR}/lib/argparse/include
${Platform_SOURCE_DIR}/lib/json/include ${Platform_SOURCE_DIR}/lib/json/include
${Platform_SOURCE_DIR}/lib/libxlsxwriter/include ${Platform_SOURCE_DIR}/lib/libxlsxwriter/include
@@ -20,50 +20,50 @@ include_directories(
# b_best # b_best
add_executable( add_executable(
b_best commands/b_best.cpp best/Statistics.cpp b_best commands/b_best.cpp best/Statistics.cpp
best/BestResultsExcel.cpp best/BestResults.cpp best/BestResultsExcel.cpp best/BestResultsTex.cpp best/BestResultsMd.cpp best/BestResults.cpp
common/Datasets.cpp common/Dataset.cpp common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
main/Models.cpp main/Models.cpp main/Scores.cpp
reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
results/Result.cpp results/Result.cpp
) )
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}") target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
# b_grid # b_grid
set(grid_sources GridSearch.cpp GridData.cpp) set(grid_sources GridSearch.cpp GridData.cpp)
list(TRANSFORM grid_sources PREPEND grid/) list(TRANSFORM grid_sources PREPEND grid/)
add_executable(b_grid commands/b_grid.cpp ${grid_sources} add_executable(b_grid commands/b_grid.cpp ${grid_sources}
common/Datasets.cpp common/Dataset.cpp common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
main/HyperParameters.cpp main/Models.cpp main/HyperParameters.cpp main/Models.cpp
) )
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy) target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
# b_list # b_list
add_executable(b_list commands/b_list.cpp add_executable(b_list commands/b_list.cpp
common/Datasets.cpp common/Dataset.cpp common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
main/Models.cpp main/Models.cpp main/Scores.cpp
reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ResultsDatasetConsole.cpp reports/ReportsPaged.cpp reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
) )
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}") target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
# b_main # b_main
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp) set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
list(TRANSFORM main_sources PREPEND main/) list(TRANSFORM main_sources PREPEND main/)
add_executable(b_main commands/b_main.cpp ${main_sources} add_executable(b_main commands/b_main.cpp ${main_sources}
common/Datasets.cpp common/Dataset.cpp common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
reports/ReportConsole.cpp reports/ReportBase.cpp reports/ReportConsole.cpp reports/ReportBase.cpp
results/Result.cpp results/Result.cpp
) )
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy) target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
# b_manage # b_manage
set(manage_sources ManageScreen.cpp CommandParser.cpp ResultsManager.cpp) set(manage_sources ManageScreen.cpp OptionsMenu.cpp ResultsManager.cpp)
list(TRANSFORM manage_sources PREPEND manage/) list(TRANSFORM manage_sources PREPEND manage/)
add_executable( add_executable(
b_manage commands/b_manage.cpp ${manage_sources} b_manage commands/b_manage.cpp ${manage_sources}
common/Datasets.cpp common/Dataset.cpp common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
reports/ReportConsole.cpp reports/ReportExcel.cpp reports/ReportExcelCompared.cpp reports/ReportBase.cpp reports/ExcelFile.cpp reports/DatasetsConsole.cpp reports/ResultsDatasetConsole.cpp reports/ReportsPaged.cpp reports/ReportConsole.cpp reports/ReportExcel.cpp reports/ReportExcelCompared.cpp reports/ReportBase.cpp reports/ExcelFile.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
results/Result.cpp results/ResultsDataset.cpp results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
main/Scores.cpp main/Scores.cpp
) )
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp "${BayesNet}") target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" fimdlp "${BayesNet}")

View File

@@ -6,8 +6,12 @@
#include <algorithm> #include <algorithm>
#include "common/Colors.h" #include "common/Colors.h"
#include "common/CLocale.h" #include "common/CLocale.h"
#include "common/Paths.h"
#include "common/Utils.h" // compute_std
#include "results/Result.h" #include "results/Result.h"
#include "BestResultsExcel.h" #include "BestResultsExcel.h"
#include "BestResultsTex.h"
#include "BestResultsMd.h"
#include "best/Statistics.h" #include "best/Statistics.h"
#include "BestResults.h" #include "BestResults.h"
@@ -51,7 +55,7 @@ namespace platform {
} }
} }
if (update) { if (update) {
bests[datasetName] = { item.at("score").get<double>(), item.at("hyperparameters"), file }; bests[datasetName] = { item.at("score").get<double>(), item.at("hyperparameters"), file, item.at("score_std").get<double>() };
} }
} }
} }
@@ -59,16 +63,12 @@ namespace platform {
std::cerr << Colors::MAGENTA() << "No results found for model " << model << " and score " << score << Colors::RESET() << std::endl; std::cerr << Colors::MAGENTA() << "No results found for model " << model << " and score " << score << Colors::RESET() << std::endl;
exit(1); exit(1);
} }
std::string bestFileName = path + bestResultFile(); std::string bestFileName = path + Paths::bestResultsFile(score, model);
std::ofstream file(bestFileName); std::ofstream file(bestFileName);
file << bests; file << bests;
file.close(); file.close();
return bestFileName; return bestFileName;
} }
std::string BestResults::bestResultFile()
{
return "best_results_" + score + "_" + model + ".json";
}
std::pair<std::string, std::string> getModelScore(std::string name) std::pair<std::string, std::string> getModelScore(std::string name)
{ {
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json // results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
@@ -150,7 +150,7 @@ namespace platform {
} }
void BestResults::listFile() void BestResults::listFile()
{ {
std::string bestFileName = path + bestResultFile(); std::string bestFileName = path + Paths::bestResultsFile(score, model);
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest); fclose(fileTest);
} else { } else {
@@ -196,7 +196,7 @@ namespace platform {
auto maxDate = std::filesystem::file_time_type::max(); auto maxDate = std::filesystem::file_time_type::max();
for (const auto& model : models) { for (const auto& model : models) {
this->model = model; this->model = model;
std::string bestFileName = path + bestResultFile(); std::string bestFileName = path + Paths::bestResultsFile(score, model);
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest); fclose(fileTest);
} else { } else {
@@ -213,13 +213,20 @@ namespace platform {
table["dateTable"] = ftime_to_string(maxDate); table["dateTable"] = ftime_to_string(maxDate);
return table; return table;
} }
void BestResults::printTableResults(std::vector<std::string> models, json table)
void BestResults::printTableResults(std::vector<std::string> models, json table, bool tex)
{ {
std::stringstream oss; std::stringstream oss;
oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<std::string>() << std::endl; oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<std::string>() << std::endl;
std::cout << oss.str(); std::cout << oss.str();
std::cout << std::string(oss.str().size() - 8, '-') << std::endl; std::cout << std::string(oss.str().size() - 8, '-') << std::endl;
std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset"); std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset");
auto bestResultsTex = BestResultsTex();
auto bestResultsMd = BestResultsMd();
if (tex) {
bestResultsTex.results_header(models, table.at("dateTable").get<std::string>());
bestResultsMd.results_header(models, table.at("dateTable").get<std::string>());
}
for (const auto& model : models) { for (const auto& model : models) {
std::cout << std::setw(maxModelName) << std::left << model << " "; std::cout << std::setw(maxModelName) << std::left << model << " ";
} }
@@ -230,12 +237,13 @@ namespace platform {
} }
std::cout << std::endl; std::cout << std::endl;
auto i = 0; auto i = 0;
std::map<std::string, double> totals; std::map<std::string, std::vector<double>> totals;
int nDatasets = table.begin().value().size(); int nDatasets = table.begin().value().size();
for (const auto& model : models) {
totals[model] = 0.0;
}
auto datasets = getDatasets(table.begin().value()); auto datasets = getDatasets(table.begin().value());
if (tex) {
bestResultsTex.results_body(datasets, table);
bestResultsMd.results_body(datasets, table);
}
for (auto const& dataset_ : datasets) { for (auto const& dataset_ : datasets) {
auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN(); auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN();
std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " "; std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " ";
@@ -270,7 +278,7 @@ namespace platform {
if (value == -1) { if (value == -1) {
std::cout << Colors::YELLOW() << std::setw(maxModelName) << std::right << "N/A" << " "; std::cout << Colors::YELLOW() << std::setw(maxModelName) << std::right << "N/A" << " ";
} else { } else {
totals[model] += value; totals[model].push_back(value);
std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " "; std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " ";
} }
} }
@@ -281,19 +289,26 @@ namespace platform {
std::cout << std::string(maxModelName, '=') << " "; std::cout << std::string(maxModelName, '=') << " ";
} }
std::cout << std::endl; std::cout << std::endl;
std::cout << Colors::GREEN() << " Totals" << std::string(maxDatasetName - 6, '.') << " "; std::cout << Colors::GREEN() << " Average" << std::string(maxDatasetName - 7, '.') << " ";
double max_value = 0.0; double max_value = 0.0;
std::string best_model = "";
for (const auto& total : totals) { for (const auto& total : totals) {
if (total.second > max_value) { auto actual = std::reduce(total.second.begin(), total.second.end());
max_value = total.second; if (actual > max_value) {
max_value = actual;
best_model = total.first;
} }
} }
if (tex) {
bestResultsTex.results_footer(totals, best_model);
bestResultsMd.results_footer(totals, best_model);
}
for (const auto& model : models) { for (const auto& model : models) {
std::string efectiveColor = Colors::GREEN(); std::string efectiveColor = model == best_model ? Colors::RED() : Colors::GREEN();
if (totals[model] == max_value) { double value = std::reduce(totals[model].begin(), totals[model].end()) / nDatasets;
efectiveColor = Colors::RED(); double std_value = compute_std(totals[model], value);
} std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << value << " ";
std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << totals[model] << " ";
} }
std::cout << std::endl; std::cout << std::endl;
} }
@@ -306,26 +321,34 @@ namespace platform {
json table = buildTableResults(models); json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value()); std::vector<std::string> datasets = getDatasets(table.begin().value());
BestResultsExcel excel_report(score, datasets); BestResultsExcel excel_report(score, datasets);
excel_report.reportSingle(model, path + bestResultFile()); excel_report.reportSingle(model, path + Paths::bestResultsFile(score, model));
messageExcelFile(excel_report.getFileName()); messageOutputFile("Excel", excel_report.getFileName());
} }
} }
void BestResults::reportAll(bool excel) void BestResults::reportAll(bool excel, bool tex)
{ {
auto models = getModels(); auto models = getModels();
// Build the table of results // Build the table of results
json table = buildTableResults(models); json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value()); std::vector<std::string> datasets = getDatasets(table.begin().value());
// Print the table of results // Print the table of results
printTableResults(models, table); printTableResults(models, table, tex);
// Compute the Friedman test // Compute the Friedman test
std::map<std::string, std::map<std::string, float>> ranksModels; std::map<std::string, std::map<std::string, float>> ranksModels;
if (friedman) { if (friedman) {
Statistics stats(models, datasets, table, significance); Statistics stats(models, datasets, table, significance);
auto result = stats.friedmanTest(); auto result = stats.friedmanTest();
stats.postHocHolmTest(result); stats.postHocHolmTest(result, tex);
ranksModels = stats.getRanks(); ranksModels = stats.getRanks();
} }
if (tex) {
messageOutputFile("TeX", Paths::tex() + Paths::tex_output());
messageOutputFile("MarkDown", Paths::tex() + Paths::md_output());
if (friedman) {
messageOutputFile("TeX", Paths::tex() + Paths::tex_post_hoc());
messageOutputFile("MarkDown", Paths::tex() + Paths::md_post_hoc());
}
}
if (excel) { if (excel) {
BestResultsExcel excel(score, datasets); BestResultsExcel excel(score, datasets);
excel.reportAll(models, table, ranksModels, friedman, significance); excel.reportAll(models, table, ranksModels, friedman, significance);
@@ -346,13 +369,14 @@ namespace platform {
} }
} }
model = models.at(idx); model = models.at(idx);
excel.reportSingle(model, path + bestResultFile()); excel.reportSingle(model, path + Paths::bestResultsFile(score, model));
} }
messageExcelFile(excel.getFileName()); messageOutputFile("Excel", excel.getFileName());
} }
} }
void BestResults::messageExcelFile(const std::string& fileName) void BestResults::messageOutputFile(const std::string& title, const std::string& fileName)
{ {
std::cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << std::endl; std::cout << Colors::YELLOW() << "** " << std::setw(5) << std::left << title
<< " file generated: " << fileName << Colors::RESET() << std::endl;
} }
} }

View File

@@ -1,9 +1,10 @@
#pragma once #ifndef BESTRESULTS_H
#define BESTRESULTS_H
#include <string> #include <string>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using json = nlohmann::ordered_json;
namespace platform { namespace platform {
using json = nlohmann::ordered_json;
class BestResults { class BestResults {
public: public:
explicit BestResults(const std::string& path, const std::string& score, const std::string& model, const std::string& dataset, bool friedman, double significance = 0.05) explicit BestResults(const std::string& path, const std::string& score, const std::string& model, const std::string& dataset, bool friedman, double significance = 0.05)
@@ -12,16 +13,15 @@ namespace platform {
} }
std::string build(); std::string build();
void reportSingle(bool excel); void reportSingle(bool excel);
void reportAll(bool excel); void reportAll(bool excel, bool tex);
void buildAll(); void buildAll();
private: private:
std::vector<std::string> getModels(); std::vector<std::string> getModels();
std::vector<std::string> getDatasets(json table); std::vector<std::string> getDatasets(json table);
std::vector<std::string> loadResultFiles(); std::vector<std::string> loadResultFiles();
void messageExcelFile(const std::string& fileName); void messageOutputFile(const std::string& title, const std::string& fileName);
json buildTableResults(std::vector<std::string> models); json buildTableResults(std::vector<std::string> models);
void printTableResults(std::vector<std::string> models, json table); void printTableResults(std::vector<std::string> models, json table, bool tex);
std::string bestResultFile();
json loadFile(const std::string& fileName); json loadFile(const std::string& fileName);
void listFile(); void listFile();
std::string path; std::string path;
@@ -34,3 +34,4 @@ namespace platform {
int maxDatasetName = 0; int maxDatasetName = 0;
}; };
} }
#endif

View File

@@ -32,7 +32,7 @@ namespace platform {
} }
BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets) BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets)
{ {
file_name = "BestResults.xlsx"; file_name = Paths::bestResultsExcel(score);
workbook = workbook_new(getFileName().c_str()); workbook = workbook_new(getFileName().c_str());
setProperties("Best Results"); setProperties("Best Results");
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
@@ -64,19 +64,21 @@ namespace platform {
json data = loadResultData(fileName); json data = loadResultData(fileName);
std::string title = "Best results for " + model; std::string title = "Best results for " + model;
worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]); worksheet_merge_range(worksheet, 0, 0, 0, 5, title.c_str(), styles["headerFirst"]);
// Body header // Body header
row = 3; row = 3;
int col = 1; int col = 1;
writeString(row, 0, "", "bodyHeader"); writeString(row, 0, "#", "bodyHeader");
writeString(row, 1, "Dataset", "bodyHeader"); writeString(row, 1, "Dataset", "bodyHeader");
writeString(row, 2, "Score", "bodyHeader"); writeString(row, 2, "Score", "bodyHeader");
writeString(row, 3, "File", "bodyHeader"); writeString(row, 3, "File", "bodyHeader");
writeString(row, 4, "Hyperparameters", "bodyHeader"); writeString(row, 4, "Hyperparameters", "bodyHeader");
writeString(row, 5, "F", "bodyHeader");
auto i = 0; auto i = 0;
std::string hyperparameters; std::string hyperparameters;
int hypSize = 22; int hypSize = 22;
std::map<std::string, std::string> files; // map of files imported and their tabs std::map<std::string, std::string> files; // map of files imported and their tabs
int numLines = data.size();
for (auto const& item : data.items()) { for (auto const& item : data.items()) {
row++; row++;
writeInt(row, 0, i++, "ints"); writeInt(row, 0, i++, "ints");
@@ -104,6 +106,8 @@ namespace platform {
hypSize = hyperparameters.size(); hypSize = hyperparameters.size();
} }
writeString(row, 4, hyperparameters, "text"); writeString(row, 4, hyperparameters, "text");
std::string countHyperparameters = "=COUNTIF(e5:e" + std::to_string(numLines + 4) + ", e" + std::to_string(row + 1) + ")";
worksheet_write_formula(worksheet, row, 5, countHyperparameters.c_str(), efectiveStyle("ints"));
} }
row++; row++;
// Set Totals // Set Totals
@@ -180,7 +184,7 @@ namespace platform {
// Body header // Body header
row = 3; row = 3;
int col = 1; int col = 1;
writeString(row, 0, "", "bodyHeader"); writeString(row, 0, "#", "bodyHeader");
writeString(row, 1, "Dataset", "bodyHeader"); writeString(row, 1, "Dataset", "bodyHeader");
for (const auto& model : models) { for (const auto& model : models) {
writeString(row, ++col, model.c_str(), "bodyHeader"); writeString(row, ++col, model.c_str(), "bodyHeader");

View File

@@ -1,14 +1,13 @@
#pragma once #ifndef BESTRESULTSEXCEL_H
#define BESTRESULTSEXCEL_H
#include <vector> #include <vector>
#include <map> #include <map>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "reports/ExcelFile.h" #include "reports/ExcelFile.h"
using json = nlohmann::ordered_json;
namespace platform { namespace platform {
using json = nlohmann::ordered_json;
class BestResultsExcel : public ExcelFile { class BestResultsExcel : public ExcelFile {
public: public:
BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets); BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets);
@@ -34,3 +33,4 @@ namespace platform {
int datasetNameSize = 25; // Min size of the column int datasetNameSize = 25; // Min size of the column
}; };
} }
#endif

103
src/best/BestResultsMd.cpp Normal file
View File

@@ -0,0 +1,103 @@
#include <iostream>
#include "BestResultsMd.h"
#include "common/Utils.h" // compute_std
namespace platform {
using json = nlohmann::ordered_json;
void BestResultsMd::openMdFile(const std::string& name)
{
handler.open(name);
if (!handler.is_open()) {
std::cerr << "Error opening file " << name << std::endl;
exit(1);
}
}
void BestResultsMd::results_header(const std::vector<std::string>& models, const std::string& date)
{
this->models = models;
auto file_name = Paths::tex() + Paths::md_output();
openMdFile(file_name);
handler << "<!-- This file has been generated by the platform program" << std::endl;
handler << " Date: " << date.c_str() << std::endl;
handler << "" << std::endl;
handler << " Table of results" << std::endl;
handler << "-->" << std::endl;
handler << "| # | Dataset |";
for (const auto& model : models) {
handler << " " << model.c_str() << " |";
}
handler << std::endl;
handler << "|--: | :--- |";
for (const auto& model : models) {
handler << " :---: |";
}
handler << std::endl;
}
void BestResultsMd::results_body(const std::vector<std::string>& datasets, json& table)
{
int i = 0;
for (auto const& dataset : datasets) {
// Find out max value for this dataset
double max_value = 0;
// Find out the max value for this dataset
for (const auto& model : models) {
double value;
try {
value = table[model].at(dataset).at(0).get<double>();
}
catch (nlohmann::json_abi_v3_11_3::detail::out_of_range err) {
value = -1.0;
}
if (value > max_value) {
max_value = value;
}
}
handler << "| " << ++i << " | " << dataset.c_str() << " | ";
for (const auto& model : models) {
double value = table[model].at(dataset).at(0).get<double>();
double std_value = table[model].at(dataset).at(3).get<double>();
const char* bold = value == max_value ? "**" : "";
handler << bold << std::setprecision(4) << std::fixed << value << "±" << std::setprecision(3) << std_value << bold << " | ";
}
handler << std::endl;
}
}
void BestResultsMd::results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model)
{
handler << "| | **Average Score** | ";
int nDatasets = totals.begin()->second.size();
for (const auto& model : models) {
double value = std::reduce(totals.at(model).begin(), totals.at(model).end()) / nDatasets;
double std_value = compute_std(totals.at(model), value);
const char* bold = model == best_model ? "**" : "";
handler << bold << std::setprecision(4) << std::fixed << value << "±" << std::setprecision(3) << std::fixed << std_value << bold << " | ";
}
handler.close();
}
void BestResultsMd::holm_test(struct HolmResult& holmResult, const std::string& date)
{
auto file_name = Paths::tex() + Paths::md_post_hoc();
openMdFile(file_name);
handler << "<!-- This file has been generated by the platform program" << std::endl;
handler << " Date: " << date.c_str() << std::endl;
handler << std::endl;
handler << " Post-hoc handler test" << std::endl;
handler << "-->" << std::endl;
handler << "Post-hoc Holm test: H<sub>0</sub>: There is no significant differences between the control model and the other models." << std::endl << std::endl;
handler << "| classifier | pvalue | rank | win | tie | loss | H<sub>0</sub> |" << std::endl;
handler << "| :-- | --: | --: | --:| --: | --: | :--: |" << std::endl;
for (auto const& line : holmResult.holmLines) {
auto textStatus = !line.reject ? "**" : " ";
if (line.model == holmResult.model) {
handler << "| " << line.model << " | - | " << std::fixed << std::setprecision(2) << line.rank << " | - | - | - |" << std::endl;
} else {
handler << "| " << line.model << " | " << textStatus << std::scientific << std::setprecision(4) << line.pvalue << textStatus << " |";
handler << std::fixed << std::setprecision(2) << line.rank << " | " << line.wtl.win << " | " << line.wtl.tie << " | " << line.wtl.loss << " |";
handler << (line.reject ? "rejected" : "**accepted**") << " |" << std::endl;
}
}
handler << std::endl;
handler.close();
}
}

24
src/best/BestResultsMd.h Normal file
View File

@@ -0,0 +1,24 @@
#ifndef BEST_RESULTS_MD_H
#define BEST_RESULTS_MD_H
#include <map>
#include <vector>
#include <nlohmann/json.hpp>
#include "common/Paths.h"
#include "Statistics.h"
namespace platform {
using json = nlohmann::ordered_json;
class BestResultsMd {
public:
BestResultsMd() = default;
~BestResultsMd() = default;
void results_header(const std::vector<std::string>& models, const std::string& date);
void results_body(const std::vector<std::string>& datasets, json& table);
void results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model);
void holm_test(struct HolmResult& holmResult, const std::string& date);
private:
void openMdFile(const std::string& name);
std::ofstream handler;
std::vector<std::string> models;
};
}
#endif

117
src/best/BestResultsTex.cpp Normal file
View File

@@ -0,0 +1,117 @@
#include <iostream>
#include "BestResultsTex.h"
#include "common/Utils.h" // compute_std
namespace platform {
using json = nlohmann::ordered_json;
void BestResultsTex::openTexFile(const std::string& name)
{
handler.open(name);
if (!handler.is_open()) {
std::cerr << "Error opening file " << name << std::endl;
exit(1);
}
}
void BestResultsTex::results_header(const std::vector<std::string>& models, const std::string& date)
{
this->models = models;
auto file_name = Paths::tex() + Paths::tex_output();
openTexFile(file_name);
handler << "%% This file has been generated by the platform program" << std::endl;
handler << "%% Date: " << date.c_str() << std::endl;
handler << "%%" << std::endl;
handler << "%% Table of results" << std::endl;
handler << "%%" << std::endl;
handler << "\\begin{table}[htbp] " << std::endl;
handler << "\\centering " << std::endl;
handler << "\\tiny " << std::endl;
handler << "\\renewcommand{\\arraystretch }{1.2} " << std::endl;
handler << "\\renewcommand{\\tabcolsep }{0.07cm} " << std::endl;
handler << "\\caption{Accuracy results(mean $\\pm$ std) for all the algorithms and datasets} " << std::endl;
handler << "\\label{tab:results_accuracy}" << std::endl;
handler << "\\begin{tabular} {{r" << std::string(models.size(), 'c').c_str() << "}}" << std::endl;
handler << "\\hline " << std::endl;
handler << "" << std::endl;
for (const auto& model : models) {
handler << "& " << model.c_str();
}
handler << "\\\\" << std::endl;
handler << "\\hline" << std::endl;
}
void BestResultsTex::results_body(const std::vector<std::string>& datasets, json& table)
{
int i = 0;
for (auto const& dataset : datasets) {
// Find out max value for this dataset
double max_value = 0;
// Find out the max value for this dataset
for (const auto& model : models) {
double value;
try {
value = table[model].at(dataset).at(0).get<double>();
}
catch (nlohmann::json_abi_v3_11_3::detail::out_of_range err) {
value = -1.0;
}
if (value > max_value) {
max_value = value;
}
}
handler << ++i << " ";
for (const auto& model : models) {
double value = table[model].at(dataset).at(0).get<double>();
double std_value = table[model].at(dataset).at(3).get<double>();
const char* bold = value == max_value ? "\\bfseries" : "";
handler << "& " << bold << std::setprecision(4) << std::fixed << value << "$\\pm$" << std::setprecision(3) << std_value;
}
handler << "\\\\" << std::endl;
}
}
void BestResultsTex::results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model)
{
handler << "\\hline" << std::endl;
handler << "Average ";
int nDatasets = totals.begin()->second.size();
for (const auto& model : models) {
double value = std::reduce(totals.at(model).begin(), totals.at(model).end()) / nDatasets;
double std_value = compute_std(totals.at(model), value);
const char* bold = model == best_model ? "\\bfseries" : "";
handler << "& " << bold << std::setprecision(4) << std::fixed << value << "$\\pm$" << std::setprecision(3) << std::fixed << std_value;
}
handler << "\\\\" << std::endl;
handler << "\\hline " << std::endl;
handler << "\\end{tabular}" << std::endl;
handler << "\\end{table}" << std::endl;
handler.close();
}
void BestResultsTex::holm_test(struct HolmResult& holmResult, const std::string& date)
{
auto file_name = Paths::tex() + Paths::tex_post_hoc();
openTexFile(file_name);
handler << "%% This file has been generated by the platform program" << std::endl;
handler << "%% Date: " << date.c_str() << std::endl;
handler << "%%" << std::endl;
handler << "%% Post-hoc handler test" << std::endl;
handler << "%%" << std::endl;
handler << "\\begin{table}[htbp]" << std::endl;
handler << "\\centering" << std::endl;
handler << "\\caption{Results of the post-hoc test for the mean accuracy of the algorithms.}\\label{tab:tests}" << std::endl;
handler << "\\begin{tabular}{lrrrrr}" << std::endl;
handler << "\\hline" << std::endl;
handler << "classifier & pvalue & rank & win & tie & loss\\\\" << std::endl;
handler << "\\hline" << std::endl;
for (auto const& line : holmResult.holmLines) {
auto textStatus = !line.reject ? "\\bf " : " ";
if (line.model == holmResult.model) {
handler << line.model << " & - & " << std::fixed << std::setprecision(2) << line.rank << " & - & - & - \\\\" << std::endl;
} else {
handler << line.model << " & " << textStatus << std::scientific << std::setprecision(4) << line.pvalue << " & ";
handler << std::fixed << std::setprecision(2) << line.rank << " & " << line.wtl.win << " & " << line.wtl.tie << " & " << line.wtl.loss << "\\\\" << std::endl;
}
}
handler << "\\hline " << std::endl;
handler << "\\end{tabular}" << std::endl;
handler << "\\end{table}" << std::endl;
handler.close();
}
}

24
src/best/BestResultsTex.h Normal file
View File

@@ -0,0 +1,24 @@
#ifndef BEST_RESULTS_TEX_H
#define BEST_RESULTS_TEX_H
#include <map>
#include <vector>
#include <nlohmann/json.hpp>
#include "common/Paths.h"
#include "Statistics.h"
namespace platform {
using json = nlohmann::ordered_json;
class BestResultsTex {
public:
BestResultsTex() = default;
~BestResultsTex() = default;
void results_header(const std::vector<std::string>& models, const std::string& date);
void results_body(const std::vector<std::string>& datasets, json& table);
void results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model);
void holm_test(struct HolmResult& holmResult, const std::string& date);
private:
void openTexFile(const std::string& name);
std::ofstream handler;
std::vector<std::string> models;
};
}
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef BESTSCORE_H
#define BESTSCORE_H
#include <string> #include <string>
#include <map> #include <map>
#include <utility> #include <utility>
@@ -24,3 +24,4 @@ namespace platform {
} }
}; };
} }
#endif

View File

@@ -4,6 +4,8 @@
#include "common/Colors.h" #include "common/Colors.h"
#include "common/Symbols.h" #include "common/Symbols.h"
#include "common/CLocale.h" #include "common/CLocale.h"
#include "BestResultsTex.h"
#include "BestResultsMd.h"
#include "Statistics.h" #include "Statistics.h"
@@ -113,7 +115,7 @@ namespace platform {
} }
} }
void Statistics::postHocHolmTest(bool friedmanResult) void Statistics::postHocHolmTest(bool friedmanResult, bool tex)
{ {
if (!fitted) { if (!fitted) {
fit(); fit();
@@ -130,7 +132,7 @@ namespace platform {
stats[i] = 0.0; stats[i] = 0.0;
continue; continue;
} }
double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff; double z = std::abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
double p_value = (long double)2 * (1 - cdf(dist, z)); double p_value = (long double)2 * (1 - cdf(dist, z));
stats[i] = p_value; stats[i] = p_value;
} }
@@ -195,6 +197,12 @@ namespace platform {
if (output) { if (output) {
std::cout << oss.str(); std::cout << oss.str();
} }
if (tex) {
BestResultsTex bestResultsTex;
BestResultsMd bestResultsMd;
bestResultsTex.holm_test(holmResult, get_date() + " " + get_time());
bestResultsMd.holm_test(holmResult, get_date() + " " + get_time());
}
} }
bool Statistics::friedmanTest() bool Statistics::friedmanTest()
{ {

View File

@@ -1,13 +1,13 @@
#pragma once #ifndef STATISTICS_H
#define STATISTICS_H
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <map> #include <map>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using json = nlohmann::ordered_json;
namespace platform { namespace platform {
using json = nlohmann::ordered_json;
struct WTL { struct WTL {
int win; int win;
int tie; int tie;
@@ -34,7 +34,7 @@ namespace platform {
public: public:
Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true); Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true);
bool friedmanTest(); bool friedmanTest();
void postHocHolmTest(bool friedmanResult); void postHocHolmTest(bool friedmanResult, bool tex=false);
FriedmanResult& getFriedmanResult(); FriedmanResult& getFriedmanResult();
HolmResult& getHolmResult(); HolmResult& getHolmResult();
std::map<std::string, std::map<std::string, float>>& getRanks(); std::map<std::string, std::map<std::string, float>>& getRanks();
@@ -60,3 +60,4 @@ namespace platform {
std::map<std::string, std::map<std::string, float>> ranksModels; std::map<std::string, std::map<std::string, float>> ranksModels;
}; };
} }
#endif

View File

@@ -5,26 +5,18 @@
#include "common/Paths.h" #include "common/Paths.h"
#include "common/Colors.h" #include "common/Colors.h"
#include "best/BestResults.h" #include "best/BestResults.h"
#include "config.h" #include "config_platform.h"
void manageArguments(argparse::ArgumentParser& program) void manageArguments(argparse::ArgumentParser& program)
{ {
program.add_argument("-m", "--model") program.add_argument("-m", "--model")
.help("Model to use: " + platform::Models::instance()->toString() + " or any") .help("Model to use or any")
.action([](const std::string& value) { .default_value("any");
std::vector<std::string> valid(platform::Models::instance()->getNames());
valid.push_back("any");
static const std::vector<std::string> choices = valid;
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString() + " or any");
}
);
program.add_argument("-d", "--dataset").default_value("any").help("Filter results of the selected model) (any for all datasets)"); program.add_argument("-d", "--dataset").default_value("any").help("Filter results of the selected model) (any for all datasets)");
program.add_argument("-s", "--score").default_value("accuracy").help("Filter results of the score name supplied"); program.add_argument("-s", "--score").default_value("accuracy").help("Filter results of the score name supplied");
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true); program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true); program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true);
program.add_argument("--tex").help("Output result table to TeX file").default_value(false).implicit_value(true);
program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const std::string& value) { program.add_argument("--level").help("significance level").default_value(0.05).scan<'g', double>().action([](const std::string& value) {
try { try {
auto k = std::stod(value); auto k = std::stod(value);
@@ -46,7 +38,7 @@ int main(int argc, char** argv)
argparse::ArgumentParser program("b_best", { platform_project_version.begin(), platform_project_version.end() }); argparse::ArgumentParser program("b_best", { platform_project_version.begin(), platform_project_version.end() });
manageArguments(program); manageArguments(program);
std::string model, dataset, score; std::string model, dataset, score;
bool build, report, friedman, excel; bool build, report, friedman, excel, tex;
double level; double level;
try { try {
program.parse_args(argc, argv); program.parse_args(argc, argv);
@@ -55,6 +47,7 @@ int main(int argc, char** argv)
score = program.get<std::string>("score"); score = program.get<std::string>("score");
friedman = program.get<bool>("friedman"); friedman = program.get<bool>("friedman");
excel = program.get<bool>("excel"); excel = program.get<bool>("excel");
tex = program.get<bool>("tex");
level = program.get<double>("level"); level = program.get<double>("level");
if (model == "" || score == "") { if (model == "" || score == "") {
throw std::runtime_error("Model and score name must be supplied"); throw std::runtime_error("Model and score name must be supplied");
@@ -74,7 +67,7 @@ int main(int argc, char** argv)
auto results = platform::BestResults(platform::Paths::results(), score, model, dataset, friedman, level); auto results = platform::BestResults(platform::Paths::results(), score, model, dataset, friedman, level);
if (model == "any") { if (model == "any") {
results.buildAll(); results.buildAll();
results.reportAll(excel); results.reportAll(excel, tex);
} else { } else {
std::string fileName = results.build(); std::string fileName = results.build();
std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl; std::cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << std::endl;

View File

@@ -11,7 +11,7 @@
#include "common/Colors.h" #include "common/Colors.h"
#include "common/DotEnv.h" #include "common/DotEnv.h"
#include "grid/GridSearch.h" #include "grid/GridSearch.h"
#include "config.h" #include "config_platform.h"
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
const int MAXL = 133; const int MAXL = 133;
@@ -93,8 +93,10 @@ void list_dump(std::string& model)
if (item.first.size() > max_dataset) { if (item.first.size() > max_dataset) {
max_dataset = item.first.size(); max_dataset = item.first.size();
} }
if (item.second.dump().size() > max_hyper) { for (auto const& [key, value] : item.second.items()) {
max_hyper = item.second.dump().size(); if (value.dump().size() > max_hyper) {
max_hyper = value.dump().size();
}
} }
} }
std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. " std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. "
@@ -106,7 +108,12 @@ void list_dump(std::string& model)
std::cout << color; std::cout << color;
auto num_combinations = data.getNumCombinations(item.first); auto num_combinations = data.getNumCombinations(item.first);
std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first
<< " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << left << item.second.dump() << std::endl; << " " << setw(5) << right << num_combinations << " ";
std::string prefix = "";
for (auto const& [key, value] : item.second.items()) {
std::cout << prefix << setw(max_hyper) << std::left << value.dump() << std::endl;
prefix = string(11 + max_dataset, ' ');
}
} }
std::cout << Colors::RESET() << std::endl; std::cout << Colors::RESET() << std::endl;
} }

View File

@@ -10,10 +10,10 @@
#include "common/Datasets.h" #include "common/Datasets.h"
#include "reports/DatasetsExcel.h" #include "reports/DatasetsExcel.h"
#include "reports/DatasetsConsole.h" #include "reports/DatasetsConsole.h"
#include "reports/ResultsDatasetConsole.h" #include "results/ResultsDatasetConsole.h"
#include "results/ResultsDataset.h" #include "results/ResultsDataset.h"
#include "results/ResultsDatasetExcel.h" #include "results/ResultsDatasetExcel.h"
#include "config.h" #include "config_platform.h"
void list_datasets(argparse::ArgumentParser& program) void list_datasets(argparse::ArgumentParser& program)
@@ -76,18 +76,8 @@ int main(int argc, char** argv)
} }
); );
results_command.add_argument("-m", "--model") results_command.add_argument("-m", "--model")
.help("Model to use: " + platform::Models::instance()->toString() + " or any") .help("Model to use or any")
.default_value("any") .default_value("any");
.action([](const std::string& value) {
std::vector<std::string> valid(platform::Models::instance()->getNames());
valid.push_back("any");
static const std::vector<std::string> choices = valid;
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString() + " or any");
}
);
results_command.add_argument("--excel").help("Output in Excel format").default_value(false).implicit_value(true); results_command.add_argument("--excel").help("Output in Excel format").default_value(false).implicit_value(true);
results_command.add_argument("-s", "--score").default_value("accuracy").help("Filter results of the score name supplied"); results_command.add_argument("-s", "--score").default_value("accuracy").help("Filter results of the score name supplied");

View File

@@ -7,11 +7,12 @@
#include "common/Paths.h" #include "common/Paths.h"
#include "main/Models.h" #include "main/Models.h"
#include "main/modelRegister.h" #include "main/modelRegister.h"
#include "config.h" #include "config_platform.h"
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
void manageArguments(argparse::ArgumentParser& program) void manageArguments(argparse::ArgumentParser& program)
{ {
auto env = platform::DotEnv(); auto env = platform::DotEnv();
@@ -35,6 +36,7 @@ void manageArguments(argparse::ArgumentParser& program)
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment"); program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \ program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format."); "Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
program.add_argument("--hyper-best").default_value(false).help("Use best results of the model as source of hyperparameters").implicit_value(true);
program.add_argument("-m", "--model") program.add_argument("-m", "--model")
.help("Model to use: " + platform::Models::instance()->toString()) .help("Model to use: " + platform::Models::instance()->toString())
.action([](const std::string& value) { .action([](const std::string& value) {
@@ -47,6 +49,23 @@ void manageArguments(argparse::ArgumentParser& program)
); );
program.add_argument("--title").default_value("").help("Experiment title"); program.add_argument("--title").default_value("").help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
auto valid_choices = env.valid_tokens("discretize_algo");
auto& disc_arg = program.add_argument("--discretize-algo").help("Algorithm to use in discretization. Valid values: " + env.valid_values("discretize_algo")).default_value(env.get("discretize_algo"));
for (auto choice : valid_choices) {
disc_arg.choices(choice);
}
valid_choices = env.valid_tokens("smooth_strat");
auto& smooth_arg = program.add_argument("--smooth-strat").help("Smooth strategy used in Bayes Network node initialization. Valid values: " + env.valid_values("smooth_strat")).default_value(env.get("smooth_strat"));
for (auto choice : valid_choices) {
smooth_arg.choices(choice);
}
auto& score_arg = program.add_argument("-s", "--score").help("Score to use. Valid values: " + env.valid_values("score")).default_value(env.get("score"));
valid_choices = env.valid_tokens("score");
for (auto choice : valid_choices) {
score_arg.choices(choice);
}
program.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true);
program.add_argument("--graph").help("generate graphviz dot files with the model").default_value(false).implicit_value(true);
program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true); program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true);
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true); program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true);
@@ -66,16 +85,16 @@ void manageArguments(argparse::ArgumentParser& program)
throw std::runtime_error("Number of folds must be an integer"); throw std::runtime_error("Number of folds must be an integer");
}}); }});
auto seed_values = env.getSeeds(); auto seed_values = env.getSeeds();
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); program.add_argument("--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
} }
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
argparse::ArgumentParser program("b_main", { platform_project_version.begin(), platform_project_version.end() }); argparse::ArgumentParser program("b_main", { platform_project_version.begin(), platform_project_version.end() });
manageArguments(program); manageArguments(program);
std::string file_name, model_name, title, hyperparameters_file, datasets_file; std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat, score;
json hyperparameters_json; json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet, no_train_score; bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files, graph, hyper_best;
std::vector<int> seeds; std::vector<int> seeds;
std::vector<std::string> file_names; std::vector<std::string> file_names;
std::vector<std::string> filesToTest; std::vector<std::string> filesToTest;
@@ -87,20 +106,33 @@ int main(int argc, char** argv)
datasets_file = program.get<std::string>("datasets-file"); datasets_file = program.get<std::string>("datasets-file");
model_name = program.get<std::string>("model"); model_name = program.get<std::string>("model");
discretize_dataset = program.get<bool>("discretize"); discretize_dataset = program.get<bool>("discretize");
discretize_algo = program.get<std::string>("discretize-algo");
smooth_strat = program.get<std::string>("smooth-strat");
stratified = program.get<bool>("stratified"); stratified = program.get<bool>("stratified");
quiet = program.get<bool>("quiet"); quiet = program.get<bool>("quiet");
graph = program.get<bool>("graph");
n_folds = program.get<int>("folds"); n_folds = program.get<int>("folds");
score = program.get<std::string>("score");
seeds = program.get<std::vector<int>>("seeds"); seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<std::string>("hyperparameters"); auto hyperparameters = program.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters); hyperparameters_json = json::parse(hyperparameters);
hyperparameters_file = program.get<std::string>("hyper-file"); hyperparameters_file = program.get<std::string>("hyper-file");
no_train_score = program.get<bool>("no-train-score"); no_train_score = program.get<bool>("no-train-score");
if (hyperparameters_file != "" && hyperparameters != "{}") { hyper_best = program.get<bool>("hyper-best");
throw runtime_error("hyperparameters and hyper_file are mutually exclusive"); generate_fold_files = program.get<bool>("generate-fold-files");
if (hyper_best) {
// Build the best results file_name
hyperparameters_file = platform::Paths::results() + platform::Paths::bestResultsFile(score, model_name);
// ignore this parameter
hyperparameters = "{}";
} else {
if (hyperparameters_file != "" && hyperparameters != "{}") {
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
}
} }
title = program.get<std::string>("title"); title = program.get<std::string>("title");
if (title == "" && file_name == "") { if (title == "" && file_name == "all") {
throw runtime_error("title is mandatory if dataset is not provided"); throw runtime_error("title is mandatory if all datasets are to be tested");
} }
saveResults = program.get<bool>("save"); saveResults = program.get<bool>("save");
} }
@@ -109,7 +141,7 @@ int main(int argc, char** argv)
cerr << program; cerr << program;
exit(1); exit(1);
} }
auto datasets = platform::Datasets(discretize_dataset, platform::Paths::datasets()); auto datasets = platform::Datasets(false, platform::Paths::datasets());
if (datasets_file != "") { if (datasets_file != "") {
ifstream catalog(datasets_file); ifstream catalog(datasets_file);
if (catalog.is_open()) { if (catalog.is_open()) {
@@ -165,7 +197,7 @@ int main(int argc, char** argv)
platform::HyperParameters test_hyperparams; platform::HyperParameters test_hyperparams;
if (hyperparameters_file != "") { if (hyperparameters_file != "") {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file); test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file, hyper_best);
} else { } else {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json); test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
} }
@@ -175,24 +207,28 @@ int main(int argc, char** argv)
*/ */
auto env = platform::DotEnv(); auto env = platform::DotEnv();
auto experiment = platform::Experiment(); auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("c++").setLanguageVersion("13.2.1"); experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1");
experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat);
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy"); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score);
experiment.setHyperparameters(test_hyperparams); experiment.setHyperparameters(test_hyperparams);
for (auto seed : seeds) { for (auto seed : seeds) {
experiment.addRandomSeed(seed); experiment.addRandomSeed(seed);
} }
platform::Timer timer; platform::Timer timer;
timer.start(); timer.start();
experiment.go(filesToTest, quiet, no_train_score); experiment.go(filesToTest, quiet, no_train_score, generate_fold_files, graph);
experiment.setDuration(timer.getDuration()); experiment.setDuration(timer.getDuration());
if (saveResults) {
experiment.saveResult();
}
if (!quiet) { if (!quiet) {
// Classification report if only one dataset is tested // Classification report if only one dataset is tested
experiment.report(filesToTest.size() == 1); experiment.report(filesToTest.size() == 1);
} }
if (saveResults) {
experiment.saveResult();
}
if (graph) {
experiment.saveGraph();
}
std::cout << "Done!" << std::endl; std::cout << "Done!" << std::endl;
return 0; return 0;
} }

View File

@@ -4,7 +4,10 @@
#include <unistd.h> #include <unistd.h>
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include "manage/ManageScreen.h" #include "manage/ManageScreen.h"
#include "config.h" #include <signal.h>
#include "config_platform.h"
platform::ManageScreen* manager = nullptr;
void manageArguments(argparse::ArgumentParser& program, int argc, char** argv) void manageArguments(argparse::ArgumentParser& program, int argc, char** argv)
{ {
@@ -42,6 +45,11 @@ std::pair<int, int> numRowsCols()
return { ts.ws_row, ts.ws_col }; return { ts.ws_row, ts.ws_col };
#endif /* TIOCGSIZE */ #endif /* TIOCGSIZE */
} }
void handleResize(int sig)
{
auto [rows, cols] = numRowsCols();
manager->updateSize(rows, cols);
}
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
@@ -50,13 +58,15 @@ int main(int argc, char** argv)
std::string model = program.get<std::string>("model"); std::string model = program.get<std::string>("model");
std::string score = program.get<std::string>("score"); std::string score = program.get<std::string>("score");
std::string platform = program.get<std::string>("platform"); std::string platform = program.get<std::string>("platform");
auto complete = program.get<bool>("complete"); bool complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial"); bool partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare"); bool compare = program.get<bool>("compare");
auto [rows, cols] = numRowsCols();
if (complete) if (complete)
partial = false; partial = false;
auto manager = platform::ManageScreen(rows, cols, model, score, platform, complete, partial, compare); signal(SIGWINCH, handleResize);
manager.doMenu(); auto [rows, cols] = numRowsCols();
manager = new platform::ManageScreen(rows, cols, model, score, platform, complete, partial, compare);
manager->doMenu();
delete manager;
return 0; return 0;
} }

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef CLOCALE_H
#define CLOCALE_H
#include <locale> #include <locale>
#include <iostream> #include <iostream>
#include <string> #include <string>
@@ -19,3 +19,4 @@ namespace platform {
} }
}; };
} }
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef COLORS_H
#define COLORS_H
#include <string> #include <string>
class Colors { class Colors {
public: public:
@@ -27,3 +27,4 @@ public:
static std::string CONCEALED() { return "\033[8m"; } static std::string CONCEALED() { return "\033[8m"; }
static std::string CLRSCR() { return "\033[2J\033[1;1H"; } static std::string CLRSCR() { return "\033[2J\033[1;1H"; }
}; };
#endif

View File

@@ -1,24 +1,26 @@
#include <ArffFiles.h> #include <ArffFiles.hpp>
#include <fstream> #include <fstream>
#include "Dataset.h" #include "Dataset.h"
namespace platform { namespace platform {
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType) const std::string message_dataset_not_loaded = "Dataset not loaded.";
Dataset::Dataset(const Dataset& dataset) :
path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples),
n_features(dataset.n_features), numericFeatures(dataset.numericFeatures), features(dataset.features),
states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y),
X_train(dataset.X_train), X_test(dataset.X_test), Xv(dataset.Xv), yv(dataset.yv),
fileType(dataset.fileType)
{ {
} }
std::string Dataset::getName() const std::string Dataset::getName() const
{ {
return name; return name;
} }
std::string Dataset::getClassName() const
{
return className;
}
std::vector<std::string> Dataset::getFeatures() const std::vector<std::string> Dataset::getFeatures() const
{ {
if (loaded) { if (loaded) {
return features; return features;
} else { } else {
throw std::invalid_argument("Dataset not loaded."); throw std::invalid_argument(message_dataset_not_loaded);
} }
} }
int Dataset::getNFeatures() const int Dataset::getNFeatures() const
@@ -26,7 +28,7 @@ namespace platform {
if (loaded) { if (loaded) {
return n_features; return n_features;
} else { } else {
throw std::invalid_argument("Dataset not loaded."); throw std::invalid_argument(message_dataset_not_loaded);
} }
} }
int Dataset::getNSamples() const int Dataset::getNSamples() const
@@ -34,7 +36,40 @@ namespace platform {
if (loaded) { if (loaded) {
return n_samples; return n_samples;
} else { } else {
throw std::invalid_argument("Dataset not loaded."); throw std::invalid_argument(message_dataset_not_loaded);
}
}
std::string Dataset::getClassName() const
{
return className;
}
int Dataset::getNClasses() const
{
if (loaded) {
return *std::max_element(yv.begin(), yv.end()) + 1;
} else {
throw std::invalid_argument(message_dataset_not_loaded);
}
}
std::vector<std::string> Dataset::getLabels() const
{
// Return the labels factorization result
if (loaded) {
return labels;
} else {
throw std::invalid_argument(message_dataset_not_loaded);
}
}
std::vector<int> Dataset::getClassesCounts() const
{
if (loaded) {
std::vector<int> counts(*std::max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) {
counts[y]++;
}
return counts;
} else {
throw std::invalid_argument(message_dataset_not_loaded);
} }
} }
std::map<std::string, std::vector<int>> Dataset::getStates() const std::map<std::string, std::vector<int>> Dataset::getStates() const
@@ -42,7 +77,7 @@ namespace platform {
if (loaded) { if (loaded) {
return states; return states;
} else { } else {
throw std::invalid_argument("Dataset not loaded."); throw std::invalid_argument(message_dataset_not_loaded);
} }
} }
pair<std::vector<std::vector<float>>&, std::vector<int>&> Dataset::getVectors() pair<std::vector<std::vector<float>>&, std::vector<int>&> Dataset::getVectors()
@@ -50,61 +85,56 @@ namespace platform {
if (loaded) { if (loaded) {
return { Xv, yv }; return { Xv, yv };
} else { } else {
throw std::invalid_argument("Dataset not loaded."); throw std::invalid_argument(message_dataset_not_loaded);
}
}
pair<std::vector<std::vector<int>>&, std::vector<int>&> Dataset::getVectorsDiscretized()
{
if (loaded) {
return { Xd, yv };
} else {
throw std::invalid_argument("Dataset not loaded.");
} }
} }
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors() pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
{ {
if (loaded) { if (loaded) {
buildTensors();
return { X, y }; return { X, y };
} else { } else {
throw std::invalid_argument("Dataset not loaded."); throw std::invalid_argument(message_dataset_not_loaded);
} }
} }
void Dataset::load_csv() void Dataset::load_csv()
{ {
ifstream file(path + "/" + name + ".csv"); ifstream file(path + "/" + name + ".csv");
if (file.is_open()) { if (!file.is_open()) {
std::string line;
getline(file, line);
std::vector<std::string> tokens = split(line, ',');
features = std::vector<std::string>(tokens.begin(), tokens.end() - 1);
if (className == "-1") {
className = tokens.back();
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = split(line, ',');
for (auto i = 0; i < features.size(); ++i) {
Xv[i].push_back(stof(tokens[i]));
}
yv.push_back(stoi(tokens.back()));
}
labels.clear();
file.close();
} else {
throw std::invalid_argument("Unable to open dataset file."); throw std::invalid_argument("Unable to open dataset file.");
} }
labels.clear();
std::string line;
getline(file, line);
std::vector<std::string> tokens = split(line, ',');
features = std::vector<std::string>(tokens.begin(), tokens.end() - 1);
if (className == "-1") {
className = tokens.back();
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = split(line, ',');
for (auto i = 0; i < features.size(); ++i) {
Xv[i].push_back(stof(tokens[i]));
}
auto label = trim(tokens.back());
if (find(labels.begin(), labels.end(), label) == labels.end()) {
labels.push_back(label);
}
yv.push_back(stoi(label));
}
file.close();
} }
void Dataset::computeStates() void Dataset::computeStates()
{ {
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
states[features[i]] = std::vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1); auto [max_value, idx] = torch::max(X_train.index({ i, "..." }), 0);
auto item = states.at(features[i]); states[features[i]] = std::vector<int>(max_value.item<int>() + 1);
iota(begin(item), end(item), 0); iota(begin(states.at(features[i])), end(states.at(features[i])), 0);
} }
states[className] = std::vector<int>(*max_element(yv.begin(), yv.end()) + 1); auto [max_value, idx] = torch::max(y_train, 0);
states[className] = std::vector<int>(max_value.item<int>() + 1);
iota(begin(states.at(className)), end(states.at(className)), 0); iota(begin(states.at(className)), end(states.at(className)), 0);
} }
void Dataset::load_arff() void Dataset::load_arff()
@@ -141,32 +171,35 @@ namespace platform {
void Dataset::load_rdata() void Dataset::load_rdata()
{ {
ifstream file(path + "/" + name + "_R.dat"); ifstream file(path + "/" + name + "_R.dat");
if (file.is_open()) { if (!file.is_open()) {
std::string line;
getline(file, line);
line = ArffFiles::trim(line);
std::vector<std::string> tokens = tokenize(line);
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
if (className == "-1") {
className = ArffFiles::trim(tokens.back());
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = tokenize(line);
// We have to skip the first token, which is the instance number.
for (auto i = 1; i < features.size() + 1; ++i) {
const float value = stof(tokens[i]);
Xv[i - 1].push_back(value);
}
yv.push_back(stoi(tokens.back()));
}
labels.clear();
file.close();
} else {
throw std::invalid_argument("Unable to open dataset file."); throw std::invalid_argument("Unable to open dataset file.");
} }
std::string line;
labels.clear();
getline(file, line);
line = ArffFiles::trim(line);
std::vector<std::string> tokens = tokenize(line);
transform(tokens.begin(), tokens.end() - 1, back_inserter(features), [](const auto& attribute) { return ArffFiles::trim(attribute); });
if (className == "-1") {
className = ArffFiles::trim(tokens.back());
}
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(std::vector<float>());
}
while (getline(file, line)) {
tokens = tokenize(line);
// We have to skip the first token, which is the instance number.
for (auto i = 1; i < features.size() + 1; ++i) {
const float value = stof(tokens[i]);
Xv[i - 1].push_back(value);
}
auto label = trim(tokens.back());
if (find(labels.begin(), labels.end(), label) == labels.end()) {
labels.push_back(label);
}
yv.push_back(stoi(label));
}
file.close();
} }
void Dataset::load() void Dataset::load()
{ {
@@ -180,39 +213,66 @@ namespace platform {
} else if (fileType == RDATA) { } else if (fileType == RDATA) {
load_rdata(); load_rdata();
} }
if (discretize) {
Xd = discretizeDataset(Xv, yv);
computeStates();
}
n_samples = Xv[0].size(); n_samples = Xv[0].size();
n_features = Xv.size(); n_features = Xv.size();
loaded = true; if (numericFeaturesIdx.size() == 0) {
} numericFeatures = std::vector<bool>(n_features, false);
void Dataset::buildTensors()
{
if (discretize) {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
} else { } else {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32); if (numericFeaturesIdx.at(0) == -1) {
} numericFeatures = std::vector<bool>(n_features, true);
for (int i = 0; i < features.size(); ++i) {
if (discretize) {
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
} else { } else {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32)); numericFeatures = std::vector<bool>(n_features, false);
for (auto i : numericFeaturesIdx) {
numericFeatures[i] = true;
}
} }
} }
y = torch::tensor(yv, torch::kInt32); // Build Tensors
} X = torch::zeros({ n_features, n_samples }, torch::kFloat32);
std::vector<mdlp::labels_t> Dataset::discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y) for (int i = 0; i < features.size(); ++i) {
{ X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
std::vector<mdlp::labels_t> Xd;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
Xd.push_back(xd);
} }
return Xd; y = torch::tensor(yv, torch::kInt32);
loaded = true;
}
std::tuple<torch::Tensor&, torch::Tensor&, torch::Tensor&, torch::Tensor&> Dataset::getTrainTestTensors(std::vector<int>& train, std::vector<int>& test)
{
if (!loaded) {
throw std::invalid_argument(message_dataset_not_loaded);
}
auto train_t = torch::tensor(train);
int samples_train = train.size();
int samples_test = test.size();
auto test_t = torch::tensor(test);
X_train = X.index({ "...", train_t });
y_train = y.index({ train_t });
X_test = X.index({ "...", test_t });
y_test = y.index({ test_t });
if (discretize) {
auto discretizer = Discretization::instance()->create(discretizer_algorithm);
auto X_train_d = torch::zeros({ n_features, samples_train }, torch::kInt32);
auto X_test_d = torch::zeros({ n_features, samples_test }, torch::kInt32);
for (auto feature = 0; feature < n_features; ++feature) {
if (numericFeatures[feature]) {
auto feature_train = X_train.index({ feature, "..." });
auto feature_test = X_test.index({ feature, "..." });
auto feature_train_disc = discretizer->fit_transform_t(feature_train, y_train);
auto feature_test_disc = discretizer->transform_t(feature_test);
X_train_d.index_put_({ feature, "..." }, feature_train_disc);
X_test_d.index_put_({ feature, "..." }, feature_test_disc);
} else {
X_train_d.index_put_({ feature, "..." }, X_train.index({ feature, "..." }).to(torch::kInt32));
X_test_d.index_put_({ feature, "..." }, X_test.index({ feature, "..." }).to(torch::kInt32));
}
}
X_train = X_train_d;
X_test = X_test_d;
assert(X_train.dtype() == torch::kInt32);
assert(X_test.dtype() == torch::kInt32);
computeStates();
}
assert(y_train.dtype() == torch::kInt32);
assert(y_test.dtype() == torch::kInt32);
return { X_train, X_test, y_train, y_test };
} }
} }

View File

@@ -1,28 +1,35 @@
#pragma once #ifndef DATASET_H
#define DATASET_H
#include <torch/torch.h> #include <torch/torch.h>
#include <map> #include <map>
#include <vector> #include <vector>
#include <string> #include <string>
#include <CPPFImdlp.h> #include <tuple>
#include <common/DiscretizationRegister.h>
#include "Utils.h" #include "Utils.h"
#include "SourceData.h" #include "SourceData.h"
namespace platform { namespace platform {
class Dataset { class Dataset {
public: public:
Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {}; Dataset(const std::string& path, const std::string& name, const std::string& className, bool discretize, fileType_t fileType, std::vector<int> numericFeaturesIdx, std::string discretizer_algo = "none") :
path(path), name(name), className(className), discretize(discretize),
loaded(false), fileType(fileType), numericFeaturesIdx(numericFeaturesIdx), discretizer_algorithm(discretizer_algo)
{
};
explicit Dataset(const Dataset&); explicit Dataset(const Dataset&);
std::string getName() const; std::string getName() const;
std::string getClassName() const; std::string getClassName() const;
std::vector<std::string> getLabels() const { return labels; } int getNClasses() const;
std::vector<std::string> getLabels() const; // return the labels factorization result
std::vector<int> getClassesCounts() const;
std::vector<string> getFeatures() const; std::vector<string> getFeatures() const;
std::map<std::string, std::vector<int>> getStates() const; std::map<std::string, std::vector<int>> getStates() const;
std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors(); std::pair<vector<std::vector<float>>&, std::vector<int>&> getVectors();
std::pair<vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized();
std::pair<torch::Tensor&, torch::Tensor&> getTensors(); std::pair<torch::Tensor&, torch::Tensor&> getTensors();
std::tuple<torch::Tensor&, torch::Tensor&, torch::Tensor&, torch::Tensor&> getTrainTestTensors(std::vector<int>& train, std::vector<int>& test);
int getNFeatures() const; int getNFeatures() const;
int getNSamples() const; int getNSamples() const;
std::vector<bool>& getNumericFeatures() { return numericFeatures; }
void load(); void load();
const bool inline isLoaded() const { return loaded; }; const bool inline isLoaded() const { return loaded; };
private: private:
@@ -31,16 +38,18 @@ namespace platform {
fileType_t fileType; fileType_t fileType;
std::string className; std::string className;
int n_samples{ 0 }, n_features{ 0 }; int n_samples{ 0 }, n_features{ 0 };
std::vector<int> numericFeaturesIdx;
std::string discretizer_algorithm;
std::vector<bool> numericFeatures; // true if feature is numeric
std::vector<std::string> features; std::vector<std::string> features;
std::vector<std::string> labels; std::vector<std::string> labels;
std::map<std::string, std::vector<int>> states; std::map<std::string, std::vector<int>> states;
bool loaded; bool loaded;
bool discretize; bool discretize;
torch::Tensor X, y; torch::Tensor X, y;
torch::Tensor X_train, X_test, y_train, y_test;
std::vector<std::vector<float>> Xv; std::vector<std::vector<float>> Xv;
std::vector<std::vector<int>> Xd;
std::vector<int> yv; std::vector<int> yv;
void buildTensors();
void load_csv(); void load_csv();
void load_arff(); void load_arff();
void load_rdata(); void load_rdata();
@@ -48,4 +57,4 @@ namespace platform {
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y); std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
}; };
}; };
#endif

View File

@@ -1,32 +1,70 @@
#include <fstream> #include <fstream>
#include "Datasets.h" #include "Datasets.h"
#include <nlohmann/json.hpp>
namespace platform { namespace platform {
using json = nlohmann::ordered_json;
const std::string message_dataset_not_loaded = "dataset not loaded.";
Datasets::Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm) :
discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
{
if ((discretizer_algorithm == "none" || discretizer_algorithm == "") && discretize) {
throw std::runtime_error("Can't discretize without discretization algorithm");
}
load();
}
void Datasets::load() void Datasets::load()
{ {
auto sd = SourceData(sfileType); auto sd = SourceData(sfileType);
fileType = sd.getFileType(); fileType = sd.getFileType();
path = sd.getPath(); path = sd.getPath();
ifstream catalog(path + "all.txt"); ifstream catalog(path + "all.txt");
if (catalog.is_open()) { std::vector<int> numericFeaturesIdx;
std::string line; if (!catalog.is_open()) {
while (getline(catalog, line)) {
if (line.empty() || line[0] == '#') {
continue;
}
std::vector<std::string> tokens = split(line, ',');
std::string name = tokens[0];
std::string className;
if (tokens.size() == 1) {
className = "-1";
} else {
className = tokens[1];
}
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
}
catalog.close();
} else {
throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]"); throw std::invalid_argument("Unable to open catalog file. [" + path + "all.txt" + "]");
} }
std::string line;
while (getline(catalog, line)) {
if (line.empty() || line[0] == '#') {
continue;
}
std::vector<std::string> tokens = split(line, ';');
std::string name = tokens[0];
std::string className;
numericFeaturesIdx.clear();
int size = tokens.size();
switch (size) {
case 1:
className = "-1";
numericFeaturesIdx.push_back(-1);
break;
case 2:
className = tokens[1];
numericFeaturesIdx.push_back(-1);
break;
case 3:
{
className = tokens[1];
auto numericFeatures = tokens[2];
if (numericFeatures == "all") {
numericFeaturesIdx.push_back(-1);
} else {
if (numericFeatures != "none") {
auto features = json::parse(numericFeatures);
for (auto& f : features) {
numericFeaturesIdx.push_back(f);
}
}
}
}
break;
default:
throw std::invalid_argument("Invalid catalog file format.");
}
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType, numericFeaturesIdx, discretizer_algorithm);
}
catalog.close();
} }
std::vector<std::string> Datasets::getNames() std::vector<std::string> Datasets::getNames()
{ {
@@ -34,102 +72,6 @@ namespace platform {
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; }); transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result; return result;
} }
std::vector<std::string> Datasets::getFeatures(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getFeatures();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
std::vector<std::string> Datasets::getLabels(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getLabels();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
map<std::string, std::vector<int>> Datasets::getStates(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getStates();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
void Datasets::loadDataset(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return;
} else {
datasets.at(name)->load();
}
}
std::string Datasets::getClassName(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getClassName();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNSamples(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
return datasets.at(name)->getNSamples();
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNClasses(const std::string& name)
{
if (datasets.at(name)->isLoaded()) {
auto className = datasets.at(name)->getClassName();
if (discretize) {
auto states = getStates(name);
return states.at(className).size();
}
auto [Xv, yv] = getVectors(name);
return *std::max_element(yv.begin(), yv.end()) + 1;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
std::vector<int> Datasets::getClassesCounts(const std::string& name) const
{
if (datasets.at(name)->isLoaded()) {
auto [Xv, yv] = datasets.at(name)->getVectors();
std::vector<int> counts(*std::max_element(yv.begin(), yv.end()) + 1);
for (auto y : yv) {
counts[y]++;
}
return counts;
} else {
throw std::invalid_argument("Dataset not loaded.");
}
}
pair<std::vector<std::vector<float>>&, std::vector<int>&> Datasets::getVectors(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectors();
}
pair<std::vector<std::vector<int>>&, std::vector<int>&> Datasets::getVectorsDiscretized(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectorsDiscretized();
}
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(const std::string& name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getTensors();
}
bool Datasets::isDataset(const std::string& name) const bool Datasets::isDataset(const std::string& name) const
{ {
return datasets.find(name) != datasets.end(); return datasets.find(name) != datasets.end();

View File

@@ -1,30 +1,22 @@
#pragma once #ifndef DATASETS_H
#define DATASETS_H
#include "Dataset.h" #include "Dataset.h"
namespace platform { namespace platform {
class Datasets { class Datasets {
public: public:
explicit Datasets(bool discretize, std::string sfileType) : discretize(discretize), sfileType(sfileType) { load(); }; explicit Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm = "none");
std::vector<std::string> getNames(); std::vector<std::string> getNames();
std::vector<std::string> getFeatures(const std::string& name) const;
int getNSamples(const std::string& name) const;
std::vector<std::string> getLabels(const std::string& name) const;
std::string getClassName(const std::string& name) const;
int getNClasses(const std::string& name);
std::vector<int> getClassesCounts(const std::string& name) const;
std::map<std::string, std::vector<int>> getStates(const std::string& name) const;
std::pair<std::vector<std::vector<float>>&, std::vector<int>&> getVectors(const std::string& name);
std::pair<std::vector<std::vector<int>>&, std::vector<int>&> getVectorsDiscretized(const std::string& name);
std::pair<torch::Tensor&, torch::Tensor&> getTensors(const std::string& name);
bool isDataset(const std::string& name) const; bool isDataset(const std::string& name) const;
void loadDataset(const std::string& name) const; Dataset& getDataset(const std::string& name) const { return *datasets.at(name); }
std::string toString() const; std::string toString() const;
private: private:
std::string path; std::string path;
fileType_t fileType; fileType_t fileType;
std::string sfileType; std::string sfileType;
std::string discretizer_algorithm;
std::map<std::string, std::unique_ptr<Dataset>> datasets; std::map<std::string, std::unique_ptr<Dataset>> datasets;
bool discretize; bool discretize;
void load(); // Loads the list of datasets void load(); // Loads the list of datasets
}; };
}; };
#endif

View File

@@ -0,0 +1,55 @@
#include "Discretization.h"
namespace platform {
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
Discretization* Discretization::factory = nullptr;
Discretization* Discretization::instance()
{
//manages singleton
if (factory == nullptr)
factory = new Discretization();
return factory;
}
void Discretization::registerFactoryFunction(const std::string& name,
function<mdlp::Discretizer* (void)> classFactoryFunction)
{
// register the class factory function
functionRegistry[name] = classFactoryFunction;
}
std::shared_ptr<mdlp::Discretizer> Discretization::create(const std::string& name)
{
mdlp::Discretizer* instance = nullptr;
// find name in the registry and call factory method.
auto it = functionRegistry.find(name);
if (it != functionRegistry.end())
instance = it->second();
// wrap instance in a shared ptr and return
if (instance != nullptr)
return std::unique_ptr<mdlp::Discretizer>(instance);
else
throw std::runtime_error("Discretizer not found: " + name);
}
std::vector<std::string> Discretization::getNames()
{
std::vector<std::string> names;
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
[](const pair<std::string, function<mdlp::Discretizer* (void)>>& pair) { return pair.first; });
return names;
}
std::string Discretization::toString()
{
std::string result = "";
std::string sep = "";
for (const auto& pair : functionRegistry) {
result += sep + pair.first;
sep = ", ";
}
return "{" + result + "}";
}
RegistrarDiscretization::RegistrarDiscretization(const std::string& name, function<mdlp::Discretizer* (void)> classFactoryFunction)
{
// register the class factory function
Discretization::instance()->registerFactoryFunction(name, classFactoryFunction);
}
}

View File

@@ -0,0 +1,33 @@
#ifndef DISCRETIZATION_H
#define DISCRETIZATION_H
#include <map>
#include <memory>
#include <string>
#include <functional>
#include <vector>
#include <fimdlp/Discretizer.h>
#include <fimdlp/BinDisc.h>
#include <fimdlp/CPPFImdlp.h>
namespace platform {
class Discretization {
public:
Discretization(Discretization&) = delete;
void operator=(const Discretization&) = delete;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
static Discretization* instance();
std::shared_ptr<mdlp::Discretizer> create(const std::string& name);
void registerFactoryFunction(const std::string& name,
function<mdlp::Discretizer* (void)> classFactoryFunction);
std::vector<string> getNames();
std::string toString();
private:
map<std::string, function<mdlp::Discretizer* (void)>> functionRegistry;
static Discretization* factory; //singleton
Discretization() {};
};
class RegistrarDiscretization {
public:
RegistrarDiscretization(const std::string& className, function<mdlp::Discretizer* (void)> classFactoryFunction);
};
}
#endif

View File

@@ -0,0 +1,38 @@
#ifndef DISCRETIZATIONREGISTER_H
#define DISCRETIZATIONREGISTER_H
#include <common/Discretization.h>
static platform::RegistrarDiscretization registrarM("mdlp",
[](void) -> mdlp::Discretizer* { return new mdlp::CPPFImdlp();});
static platform::RegistrarDiscretization registrarBU3("bin3u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(3, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ3("bin3q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(3, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU4("bin4u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ4("bin4q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(4, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU5("bin5u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ5("bin5q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(5, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU6("bin6u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ6("bin6q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(6, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU7("bin7u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ7("bin7q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(7, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU8("bin8u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ8("bin8q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(8, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU9("bin9u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ9("bin9q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(9, mdlp::strategy_t::QUANTILE);});
static platform::RegistrarDiscretization registrarBU10("bin10u",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::UNIFORM);});
static platform::RegistrarDiscretization registrarBQ10("bin10q",
[](void) -> mdlp::Discretizer* { return new mdlp::BinDisc(10, mdlp::strategy_t::QUANTILE);});
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef DOTENV_H
#define DOTENV_H
#include <string> #include <string>
#include <map> #include <map>
#include <fstream> #include <fstream>
@@ -13,14 +13,53 @@ namespace platform {
class DotEnv { class DotEnv {
private: private:
std::map<std::string, std::string> env; std::map<std::string, std::string> env;
std::map<std::string, std::vector<std::string>> valid;
public: public:
DotEnv(bool create = false) DotEnv(bool create = false)
{ {
valid =
{
{"depth", {"any"}},
{"discretize", {"0", "1"}},
{"discretize_algo", {"mdlp", "bin3u", "bin3q", "bin4u", "bin4q", "bin5q", "bin5u", "bin6q", "bin6u", "bin7q", "bin7u", "bin8q", "bin8u", "bin9q", "bin9u", "bin10q", "bin10u"}},
{"experiment", {"discretiz", "odte", "covid", "Test"}},
{"fit_features", {"0", "1"}},
{"framework", {"bulma", "bootstrap"}},
{"ignore_nan", {"0", "1"}},
{"leaves", {"any"}},
{"margin", {"0.1", "0.2", "0.3"}},
{"model", {"any"}},
{"n_folds", {"5", "10"}},
{"nodes", {"any"}},
{"platform", {"any"}},
{"stratified", {"0", "1"}},
{"score", {"accuracy", "roc-auc-ovr"}},
{"seeds", {"any"}},
{"smooth_strat", {"ORIGINAL", "LAPLACE", "CESTNIK"}},
{"source_data", {"Arff", "Tanveer", "Surcov", "Test"}},
};
if (create) { if (create) {
// For testing purposes // For testing purposes
std::ofstream file(".env"); std::ofstream file(".env");
file << "source_data = Test" << std::endl; file << "experiment=Test" << std::endl;
file << "margin = 0.1" << std::endl; file << "source_data=Test" << std::endl;
file << "margin=0.1" << std::endl;
file << "score=accuracy" << std::endl;
file << "platform=um790Linux" << std::endl;
file << "n_folds=5" << std::endl;
file << "discretize_algo=mdlp" << std::endl;
file << "smooth_strat=ORIGINAL" << std::endl;
file << "stratified=0" << std::endl;
file << "model=TAN" << std::endl;
file << "seeds=[271]" << std::endl;
file << "discretize=0" << std::endl;
file << "ignore_nan=0" << std::endl;
file << "nodes=Nodes" << std::endl;
file << "leaves=Edges" << std::endl;
file << "depth=States" << std::endl;
file << "fit_features=0" << std::endl;
file << "framework=bulma" << std::endl;
file << "margin=0.1" << std::endl;
file.close(); file.close();
} }
std::ifstream file(".env"); std::ifstream file(".env");
@@ -37,12 +76,62 @@ namespace platform {
std::istringstream iss(line); std::istringstream iss(line);
std::string key, value; std::string key, value;
if (std::getline(iss, key, '=') && std::getline(iss, value)) { if (std::getline(iss, key, '=') && std::getline(iss, value)) {
env[trim(key)] = trim(value); key = trim(key);
value = trim(value);
parse(key, value);
env[key] = value;
}
}
parseEnv();
}
void parse(const std::string& key, const std::string& value)
{
if (valid.find(key) == valid.end()) {
std::cerr << "Invalid key in .env: " << key << std::endl;
exit(1);
}
if (valid[key].front() == "any") {
return;
}
if (std::find(valid[key].begin(), valid[key].end(), value) == valid[key].end()) {
std::cerr << "Invalid value in .env: " << key << " = " << value << std::endl;
exit(1);
}
}
std::vector<std::string> valid_tokens(const std::string& key)
{
if (valid.find(key) == valid.end()) {
return {};
}
return valid.at(key);
}
std::string valid_values(const std::string& key)
{
std::string valid_values = "{", sep = "";
if (valid.find(key) == valid.end()) {
return "{}";
}
for (const auto& value : valid.at(key)) {
valid_values += sep + value;
sep = ", ";
}
return valid_values + "}";
}
void parseEnv()
{
for (auto& [key, values] : valid) {
if (env.find(key) == env.end()) {
std::cerr << "Key not found in .env: " << key << ", valid values: " << valid_values(key) << std::endl;
exit(1);
} }
} }
} }
std::string get(const std::string& key) std::string get(const std::string& key)
{ {
if (env.find(key) == env.end()) {
std::cerr << "Key not found in .env: " << key << std::endl;
exit(1);
}
return env.at(key); return env.at(key);
} }
std::vector<int> getSeeds() std::vector<int> getSeeds()
@@ -59,3 +148,4 @@ namespace platform {
} }
}; };
} }
#endif

View File

@@ -1,20 +1,35 @@
#pragma once #ifndef PATHS_H
#define PATHS_H
#include <string> #include <string>
#include <filesystem> #include <filesystem>
#include "DotEnv.h" #include "DotEnv.h"
namespace platform { namespace platform {
class Paths { class Paths {
public: public:
static std::string results() { return "results/"; } static std::string createIfNotExists(const std::string& folder)
static std::string hiddenResults() { return "hidden_results/"; } {
static std::string excel() { return "excel/"; } if (!std::filesystem::exists(folder)) {
static std::string grid() { return "grid/"; } std::filesystem::create_directory(folder);
}
return folder;
}
static std::string results() { return createIfNotExists("results/"); }
static std::string hiddenResults() { return createIfNotExists("hidden_results/"); }
static std::string excel() { return createIfNotExists("excel/"); }
static std::string grid() { return createIfNotExists("grid/"); }
static std::string graphs() { return createIfNotExists("graphs/"); }
static std::string tex() { return createIfNotExists("tex/"); }
static std::string datasets() static std::string datasets()
{ {
auto env = platform::DotEnv(); auto env = platform::DotEnv();
return env.get("source_data"); return env.get("source_data");
} }
static std::string experiment_file(const std::string& fileName, bool discretize, bool stratified, int seed, int nfold)
{
std::string disc = discretize ? "_disc_" : "_ndisc_";
std::string strat = stratified ? "strat_" : "nstrat_";
return "datasets_experiment/" + fileName + disc + strat + std::to_string(seed) + "_" + std::to_string(nfold) + ".json";
}
static void createPath(const std::string& path) static void createPath(const std::string& path)
{ {
// Create directory if it does not exist // Create directory if it does not exist
@@ -25,6 +40,14 @@ namespace platform {
throw std::runtime_error("Could not create directory " + path); throw std::runtime_error("Could not create directory " + path);
} }
} }
static std::string bestResultsFile(const std::string& score, const std::string& model)
{
return "best_results_" + score + "_" + model + ".json";
}
static std::string bestResultsExcel(const std::string& score)
{
return "BestResults_" + score + ".xlsx";
}
static std::string excelResults() { return "some_results.xlsx"; } static std::string excelResults() { return "some_results.xlsx"; }
static std::string grid_input(const std::string& model) static std::string grid_input(const std::string& model)
{ {
@@ -34,5 +57,22 @@ namespace platform {
{ {
return grid() + "grid_" + model + "_output.json"; return grid() + "grid_" + model + "_output.json";
} }
static std::string tex_output()
{
return "results.tex";
}
static std::string md_output()
{
return "results.md";
}
static std::string tex_post_hoc()
{
return "post_hoc.tex";
}
static std::string md_post_hoc()
{
return "post_hoc.md";
}
}; };
} }
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef SYMBOLS_H
#define SYMBOLS_H
#include <string> #include <string>
namespace platform { namespace platform {
class Symbols { class Symbols {
@@ -12,8 +12,10 @@ namespace platform {
inline static const std::string downward_arrow{ "\u27B4" }; inline static const std::string downward_arrow{ "\u27B4" };
inline static const std::string up_arrow{ "\u2B06" }; inline static const std::string up_arrow{ "\u2B06" };
inline static const std::string down_arrow{ "\u2B07" }; inline static const std::string down_arrow{ "\u2B07" };
inline static const std::string ellipsis{ "\u2026" };
inline static const std::string equal_best{ check_mark }; inline static const std::string equal_best{ check_mark };
inline static const std::string better_best{ black_star }; inline static const std::string better_best{ black_star };
inline static const std::string notebook{ "\U0001F5C8" }; inline static const std::string notebook{ "\U0001F5C8" };
}; };
} }
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef TIMER_H
#define TIMER_H
#include <chrono> #include <chrono>
#include <string> #include <string>
#include <sstream> #include <sstream>
@@ -40,3 +40,4 @@ namespace platform {
} }
}; };
} /* namespace platform */ } /* namespace platform */
#endif

View File

@@ -1,18 +1,18 @@
#pragma once #ifndef UTILS_H
#define UTILS_H
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector> #include <vector>
#include <algorithm>
#include <torch/torch.h>
namespace platform { namespace platform {
//static std::vector<std::string> split(const std::string& text, char delimiter); template <typename T>
static std::vector<std::string> split(const std::string& text, char delimiter) std::vector<T> tensorToVector(const torch::Tensor& tensor)
{ {
std::vector<std::string> result; torch::Tensor contig_tensor = tensor.contiguous();
std::stringstream ss(text); auto num_elements = contig_tensor.numel();
std::string token; const T* tensor_data = contig_tensor.data_ptr<T>();
while (std::getline(ss, token, delimiter)) { std::vector<T> result(tensor_data, tensor_data + num_elements);
result.push_back(token);
}
return result; return result;
} }
static std::string trim(const std::string& str) static std::string trim(const std::string& str)
@@ -26,4 +26,45 @@ namespace platform {
}).base(), result.end()); }).base(), result.end());
return result; return result;
} }
static std::vector<std::string> split(const std::string& text, char delimiter)
{
std::vector<std::string> result;
std::stringstream ss(text);
std::string token;
while (std::getline(ss, token, delimiter)) {
result.push_back(trim(token));
}
return result;
}
inline double compute_std(std::vector<double> values, double mean)
{
// Compute standard devation of the values
double sum = 0.0;
for (const auto& value : values) {
sum += std::pow(value - mean, 2);
}
double variance = sum / values.size();
return std::sqrt(variance);
}
inline std::string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
inline std::string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
} }
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef GRIDDATA_H
#define GRIDDATA_H
#include <string> #include <string>
#include <vector> #include <vector>
#include <map> #include <map>
@@ -23,3 +23,4 @@ namespace platform {
std::map<std::string, json> grid; std::map<std::string, json> grid;
}; };
} /* namespace platform */ } /* namespace platform */
#endif

View File

@@ -5,29 +5,11 @@
#include "main/Models.h" #include "main/Models.h"
#include "common/Paths.h" #include "common/Paths.h"
#include "common/Colors.h" #include "common/Colors.h"
#include "common/Utils.h"
#include "GridSearch.h" #include "GridSearch.h"
namespace platform { namespace platform {
std::string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
std::string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
std::string get_color_rank(int rank) std::string get_color_rank(int rank)
{ {
auto colors = { Colors::WHITE(), Colors::RED(), Colors::GREEN(), Colors::BLUE(), Colors::MAGENTA(), Colors::CYAN() }; auto colors = { Colors::WHITE(), Colors::RED(), Colors::GREEN(), Colors::BLUE(), Colors::MAGENTA(), Colors::CYAN() };
@@ -103,11 +85,11 @@ namespace platform {
std::mt19937 g{ 271 }; // Use fixed seed to obtain the same shuffle std::mt19937 g{ 271 }; // Use fixed seed to obtain the same shuffle
std::shuffle(tasks.begin(), tasks.end(), g); std::shuffle(tasks.begin(), tasks.end(), g);
std::cout << get_color_rank(rank) << "* Number of tasks: " << tasks.size() << std::endl; std::cout << get_color_rank(rank) << "* Number of tasks: " << tasks.size() << std::endl;
std::cout << "|"; std::cout << separator;
for (int i = 0; i < tasks.size(); ++i) { for (int i = 0; i < tasks.size(); ++i) {
std::cout << (i + 1) % 10; std::cout << (i + 1) % 10;
} }
std::cout << "|" << std::endl << "|" << std::flush; std::cout << separator << std::endl << separator << std::flush;
return tasks; return tasks;
} }
void process_task_mpi_consumer(struct ConfigGrid& config, struct ConfigMPI& config_mpi, json& tasks, int n_task, Datasets& datasets, Task_Result* result) void process_task_mpi_consumer(struct ConfigGrid& config, struct ConfigMPI& config_mpi, json& tasks, int n_task, Datasets& datasets, Task_Result* result)
@@ -118,17 +100,18 @@ namespace platform {
json task = tasks[n_task]; json task = tasks[n_task];
auto model = config.model; auto model = config.model;
auto grid = GridData(Paths::grid_input(model)); auto grid = GridData(Paths::grid_input(model));
auto dataset = task["dataset"].get<std::string>(); auto dataset_name = task["dataset"].get<std::string>();
auto idx_dataset = task["idx_dataset"].get<int>(); auto idx_dataset = task["idx_dataset"].get<int>();
auto seed = task["seed"].get<int>(); auto seed = task["seed"].get<int>();
auto n_fold = task["fold"].get<int>(); auto n_fold = task["fold"].get<int>();
bool stratified = config.stratified; bool stratified = config.stratified;
// Generate the hyperparamters combinations // Generate the hyperparamters combinations
auto combinations = grid.getGrid(dataset); auto& dataset = datasets.getDataset(dataset_name);
auto [X, y] = datasets.getTensors(dataset); auto combinations = grid.getGrid(dataset_name);
auto states = datasets.getStates(dataset); dataset.load();
auto features = datasets.getFeatures(dataset); auto [X, y] = dataset.getTensors();
auto className = datasets.getClassName(dataset); auto features = dataset.getFeatures();
auto className = dataset.getClassName();
// //
// Start working on task // Start working on task
// //
@@ -138,14 +121,11 @@ namespace platform {
else else
fold = new folding::KFold(config.n_folds, y.size(0), seed); fold = new folding::KFold(config.n_folds, y.size(0), seed);
auto [train, test] = fold->getFold(n_fold); auto [train, test] = fold->getFold(n_fold);
auto train_t = torch::tensor(train); auto [X_train, X_test, y_train, y_test] = dataset.getTrainTestTensors(train, test);
auto test_t = torch::tensor(test); auto states = dataset.getStates(); // Get the states of the features Once they are discretized
auto X_train = X.index({ "...", train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
double best_fold_score = 0.0; double best_fold_score = 0.0;
int best_idx_combination = -1; int best_idx_combination = -1;
bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE;
json best_fold_hyper; json best_fold_hyper;
for (int idx_combination = 0; idx_combination < combinations.size(); ++idx_combination) { for (int idx_combination = 0; idx_combination < combinations.size(); ++idx_combination) {
auto hyperparam_line = combinations[idx_combination]; auto hyperparam_line = combinations[idx_combination];
@@ -168,10 +148,10 @@ namespace platform {
// Build Classifier with selected hyperparameters // Build Classifier with selected hyperparameters
auto clf = Models::instance()->create(config.model); auto clf = Models::instance()->create(config.model);
auto valid = clf->getValidHyperparameters(); auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, dataset); hyperparameters.check(valid, dataset_name);
clf->setHyperparameters(hyperparameters.get(dataset)); clf->setHyperparameters(hyperparameters.get(dataset_name));
// Train model // Train model
clf->fit(X_nested_train, y_nested_train, features, className, states); clf->fit(X_nested_train, y_nested_train, features, className, states, smoothing);
// Test model // Test model
score += clf->score(X_nested_test, y_nested_test); score += clf->score(X_nested_test, y_nested_test);
} }
@@ -188,9 +168,9 @@ namespace platform {
auto hyperparameters = platform::HyperParameters(datasets.getNames(), best_fold_hyper); auto hyperparameters = platform::HyperParameters(datasets.getNames(), best_fold_hyper);
auto clf = Models::instance()->create(config.model); auto clf = Models::instance()->create(config.model);
auto valid = clf->getValidHyperparameters(); auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, dataset); hyperparameters.check(valid, dataset_name);
clf->setHyperparameters(best_fold_hyper); clf->setHyperparameters(best_fold_hyper);
clf->fit(X_train, y_train, features, className, states); clf->fit(X_train, y_train, features, className, states, smoothing);
best_fold_score = clf->score(X_test, y_test); best_fold_score = clf->score(X_test, y_test);
// Return the result // Return the result
result->idx_dataset = task["idx_dataset"].get<int>(); result->idx_dataset = task["idx_dataset"].get<int>();
@@ -373,14 +353,16 @@ namespace platform {
MPI_Bcast(msg, tasks_size + 1, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD); MPI_Bcast(msg, tasks_size + 1, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD);
tasks = json::parse(msg); tasks = json::parse(msg);
delete[] msg; delete[] msg;
auto datasets = Datasets(config.discretize, Paths::datasets()); auto env = platform::DotEnv();
auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretize_algo"));
if (config_mpi.rank == config_mpi.manager) { if (config_mpi.rank == config_mpi.manager) {
// //
// 2a. Producer delivers the tasks to the consumers // 2a. Producer delivers the tasks to the consumers
// //
auto datasets_names = filterDatasets(datasets); auto datasets_names = filterDatasets(datasets);
json all_results = producer(datasets_names, tasks, config_mpi, MPI_Result); json all_results = producer(datasets_names, tasks, config_mpi, MPI_Result);
std::cout << get_color_rank(config_mpi.rank) << "|" << std::endl; std::cout << get_color_rank(config_mpi.rank) << separator << std::endl;
// //
// 3. Manager select the bests sccores for each dataset // 3. Manager select the bests sccores for each dataset
// //

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef GRIDSEARCH_H
#define GRIDSEARCH_H
#include <string> #include <string>
#include <map> #include <map>
#include <mpi.h> #include <mpi.h>
@@ -55,5 +55,7 @@ namespace platform {
struct ConfigGrid config; struct ConfigGrid config;
json build_tasks_mpi(int rank); json build_tasks_mpi(int rank);
Timer timer; // used to measure the time of the whole process Timer timer; // used to measure the time of the whole process
const std::string separator = "|";
}; };
} /* namespace platform */ } /* namespace platform */
#endif

View File

@@ -10,6 +10,7 @@ namespace platform {
void Experiment::saveResult() void Experiment::saveResult()
{ {
result.save(); result.save();
std::cout << "Result saved in " << Paths::results() << result.getFilename() << std::endl;
} }
void Experiment::report(bool classification_report) void Experiment::report(bool classification_report)
{ {
@@ -23,7 +24,24 @@ namespace platform {
{ {
std::cout << result.getJson().dump(4) << std::endl; std::cout << result.getJson().dump(4) << std::endl;
} }
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score) void Experiment::saveGraph()
{
std::cout << "Saving graphs..." << std::endl;
auto data = result.getJson();
for (const auto& item : data["results"]) {
auto graphs = item["graph"];
int i = 0;
for (const auto& graph : graphs) {
i++;
auto fileName = Paths::graphs() + result.getFilename() + "_graph_" + item["dataset"].get<std::string>() + "_" + std::to_string(i) + ".dot";
auto file = std::ofstream(fileName);
file << graph.get<std::string>();
file.close();
std::cout << "Graph saved in " << fileName << std::endl;
}
}
}
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
{ {
for (auto fileName : filesToProcess) { for (auto fileName : filesToProcess) {
if (fileName.size() > max_name) if (fileName.size() > max_name)
@@ -40,14 +58,14 @@ namespace platform {
std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " ) Scoring train dataset" << std::endl; std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " ) Scoring train dataset" << std::endl;
std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " ) Scoring test dataset" << std::endl << std::endl; std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " ) Scoring test dataset" << std::endl << std::endl;
std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl; std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl;
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << std::endl; std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << string(3 * nfolds - 2, ' ') << " Time" << std::endl;
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << Colors::RESET() << std::endl; std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << " ----------" << Colors::RESET() << std::endl;
} }
int num = 0; int num = 0;
for (auto fileName : filesToProcess) { for (auto fileName : filesToProcess) {
if (!quiet) if (!quiet)
std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush; std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush;
cross_validation(fileName, quiet, no_train_score); cross_validation(fileName, quiet, no_train_score, generate_fold_files, graph);
if (!quiet) if (!quiet)
std::cout << std::endl; std::cout << std::endl;
} }
@@ -67,36 +85,88 @@ namespace platform {
return Colors::RESET(); return Colors::RESET();
} }
} }
score_t Experiment::parse_score() const
{
if (result.getScoreName() == "accuracy")
return score_t::ACCURACY;
if (result.getScoreName() == "roc-auc-ovr")
return score_t::ROC_AUC_OVR;
throw std::runtime_error("Unknown score: " + result.getScoreName());
}
void showProgress(int fold, const std::string& color, const std::string& phase) void showProgress(int fold, const std::string& color, const std::string& phase)
{ {
std::string prefix = phase == "a" ? "" : "\b\b\b\b"; std::string prefix = phase == "-" ? "" : "\b\b\b\b";
std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush; std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush;
} }
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score) void generate_files(const std::string& fileName, bool discretize, bool stratified, int seed, int nfold, torch::Tensor X_train, torch::Tensor y_train, torch::Tensor X_test, torch::Tensor y_test, std::vector<int>& train, std::vector<int>& test)
{ {
auto datasets = Datasets(discretized, Paths::datasets()); std::string file_name = Paths::experiment_file(fileName, discretize, stratified, seed, nfold);
// Get dataset auto file = std::ofstream(file_name);
auto [X, y] = datasets.getTensors(fileName); json output;
auto states = datasets.getStates(fileName); output["seed"] = seed;
auto features = datasets.getFeatures(fileName); output["nfold"] = nfold;
auto samples = datasets.getNSamples(fileName); output["X_train"] = json::array();
auto className = datasets.getClassName(fileName); auto n = X_train.size(1);
auto labels = datasets.getLabels(fileName); for (int i = 0; i < X_train.size(0); i++) {
int num_classes = states[className].size() == 0 ? labels.size() : states[className].size(); if (X_train.dtype() == torch::kFloat32) {
if (!quiet) { auto xvf_ptr = X_train.index({ i }).data_ptr<float>();
std::cout << " " << setw(5) << samples << " " << setw(5) << features.size() << flush; auto feature = std::vector<float>(xvf_ptr, xvf_ptr + n);
output["X_train"].push_back(feature);
} else {
auto feature = std::vector<int>(X_train.index({ i }).data_ptr<int>(), X_train.index({ i }).data_ptr<int>() + n);
output["X_train"].push_back(feature);
}
} }
output["y_train"] = std::vector<int>(y_train.data_ptr<int>(), y_train.data_ptr<int>() + n);
output["X_test"] = json::array();
n = X_test.size(1);
for (int i = 0; i < X_test.size(0); i++) {
if (X_train.dtype() == torch::kFloat32) {
auto xvf_ptr = X_test.index({ i }).data_ptr<float>();
auto feature = std::vector<float>(xvf_ptr, xvf_ptr + n);
output["X_test"].push_back(feature);
} else {
auto feature = std::vector<int>(X_test.index({ i }).data_ptr<int>(), X_test.index({ i }).data_ptr<int>() + n);
output["X_test"].push_back(feature);
}
}
output["y_test"] = std::vector<int>(y_test.data_ptr<int>(), y_test.data_ptr<int>() + n);
output["train"] = train;
output["test"] = test;
file << output.dump(4);
file.close();
}
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
{
//
// Load dataset and prepare data
//
auto datasets = Datasets(discretized, Paths::datasets(), discretization_algo);
auto& dataset = datasets.getDataset(fileName);
dataset.load();
auto [X, y] = dataset.getTensors(); // Only need y for folding
auto features = dataset.getFeatures();
auto n_features = dataset.getNFeatures();
auto n_samples = dataset.getNSamples();
auto className = dataset.getClassName();
auto labels = dataset.getLabels();
int num_classes = dataset.getNClasses();
if (!quiet) {
std::cout << " " << setw(5) << n_samples << " " << setw(5) << n_features << flush;
}
//
// Prepare Result // Prepare Result
//
auto partial_result = PartialResult(); auto partial_result = PartialResult();
auto [values, counts] = at::_unique(y); partial_result.setSamples(n_samples).setFeatures(n_features).setClasses(num_classes);
partial_result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
partial_result.setHyperparameters(hyperparameters.get(fileName)); partial_result.setHyperparameters(hyperparameters.get(fileName));
//
// Initialize results std::vectors // Initialize results std::vectors
//
int nResults = nfolds * static_cast<int>(randomSeeds.size()); int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64); auto score_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64); auto score_train = torch::zeros({ nResults }, torch::kFloat64);
auto train_time = torch::zeros({ nResults }, torch::kFloat64); auto train_time = torch::zeros({ nResults }, torch::kFloat64);
auto test_time = torch::zeros({ nResults }, torch::kFloat64); auto test_time = torch::zeros({ nResults }, torch::kFloat64);
auto nodes = torch::zeros({ nResults }, torch::kFloat64); auto nodes = torch::zeros({ nResults }, torch::kFloat64);
@@ -105,10 +175,16 @@ namespace platform {
json confusion_matrices = json::array(); json confusion_matrices = json::array();
json confusion_matrices_train = json::array(); json confusion_matrices_train = json::array();
std::vector<std::string> notes; std::vector<std::string> notes;
Timer train_timer, test_timer; std::vector<std::string> graphs;
Timer train_timer, test_timer, seed_timer;
int item = 0; int item = 0;
bool first_seed = true; bool first_seed = true;
//
// Loop over random seeds
//
auto score = parse_score();
for (auto seed : randomSeeds) { for (auto seed : randomSeeds) {
seed_timer.start();
if (!quiet) { if (!quiet) {
string prefix = " "; string prefix = " ";
if (!first_seed) { if (!first_seed) {
@@ -121,26 +197,33 @@ namespace platform {
if (stratified) if (stratified)
fold = new folding::StratifiedKFold(nfolds, y, seed); fold = new folding::StratifiedKFold(nfolds, y, seed);
else else
fold = new folding::KFold(nfolds, y.size(0), seed); fold = new folding::KFold(nfolds, n_samples, seed);
//
// Loop over folds
//
for (int nfold = 0; nfold < nfolds; nfold++) { for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(result.getModel()); auto clf = Models::instance()->create(result.getModel());
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "-");
setModelVersion(clf->getVersion()); setModelVersion(clf->getVersion());
auto valid = clf->getValidHyperparameters(); auto valid = clf->getValidHyperparameters();
hyperparameters.check(valid, fileName); hyperparameters.check(valid, fileName);
clf->setHyperparameters(hyperparameters.get(fileName)); clf->setHyperparameters(hyperparameters.get(fileName));
//
// Split train - test dataset // Split train - test dataset
//
train_timer.start(); train_timer.start();
auto [train, test] = fold->getFold(nfold); auto [train, test] = fold->getFold(nfold);
auto train_t = torch::tensor(train); auto [X_train, X_test, y_train, y_test] = dataset.getTrainTestTensors(train, test);
auto test_t = torch::tensor(test); auto states = dataset.getStates(); // Get the states of the features Once they are discretized
auto X_train = X.index({ "...", train_t }); if (generate_fold_files)
auto y_train = y.index({ train_t }); generate_files(fileName, discretized, stratified, seed, nfold, X_train, y_train, X_test, y_test, train, test);
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
if (!quiet) if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "a"); showProgress(nfold + 1, getColor(clf->getStatus()), "a");
//
// Train model // Train model
clf->fit(X_train, y_train, features, className, states); //
clf->fit(X_train, y_train, features, className, states, smooth_type);
if (!quiet) if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "b"); showProgress(nfold + 1, getColor(clf->getStatus()), "b");
auto clf_notes = clf->getNotes(); auto clf_notes = clf->getNotes();
@@ -150,40 +233,60 @@ namespace platform {
edges[item] = clf->getNumberOfEdges(); edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates(); num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration(); train_time[item] = train_timer.getDuration();
double accuracy_train_value = 0.0; double score_train_value = 0.0;
//
// Score train // Score train
//
if (!no_train_score) { if (!no_train_score) {
auto y_predict = clf->predict(X_train); auto y_proba_train = clf->predict_proba(X_train);
Scores scores(y_train, y_predict, num_classes, labels); Scores scores(y_train, y_proba_train, num_classes, labels);
accuracy_train_value = scores.accuracy(); score_train_value = score == score_t::ACCURACY ? scores.accuracy() : scores.auc();
confusion_matrices_train.push_back(scores.get_confusion_matrix_json(true)); confusion_matrices_train.push_back(scores.get_confusion_matrix_json(true));
} }
//
// Test model // Test model
//
if (!quiet) if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "c"); showProgress(nfold + 1, getColor(clf->getStatus()), "c");
test_timer.start(); test_timer.start();
auto y_predict = clf->predict(X_test); // auto y_predict = clf->predict(X_test);
Scores scores(y_test, y_predict, num_classes, labels); auto y_proba_test = clf->predict_proba(X_test);
auto accuracy_test_value = scores.accuracy(); Scores scores(y_test, y_proba_test, num_classes, labels);
auto score_test_value = score == score_t::ACCURACY ? scores.accuracy() : scores.auc();
test_time[item] = test_timer.getDuration(); test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value; score_train[item] = score_train_value;
accuracy_test[item] = accuracy_test_value; score_test[item] = score_test_value;
confusion_matrices.push_back(scores.get_confusion_matrix_json(true)); confusion_matrices.push_back(scores.get_confusion_matrix_json(true));
if (!quiet) if (!quiet)
std::cout << "\b\b\b, " << flush; std::cout << "\b\b\b, " << flush;
//
// Store results and times in std::vector // Store results and times in std::vector
partial_result.addScoreTrain(accuracy_train_value); //
partial_result.addScoreTest(accuracy_test_value); partial_result.addScoreTrain(score_train_value);
partial_result.addScoreTest(score_test_value);
partial_result.addTimeTrain(train_time[item].item<double>()); partial_result.addTimeTrain(train_time[item].item<double>());
partial_result.addTimeTest(test_time[item].item<double>()); partial_result.addTimeTest(test_time[item].item<double>());
item++; item++;
if (graph) {
std::string result = "";
for (const auto& line : clf->graph()) {
result += line + "\n";
}
graphs.push_back(result);
}
}
if (!quiet) {
seed_timer.stop();
std::cout << "end. [" << seed_timer.getDurationString() << "]" << std::endl;
} }
if (!quiet)
std::cout << "end. " << flush;
delete fold; delete fold;
} }
partial_result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>()); //
partial_result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>()); // Store result totals in Result
//
partial_result.setGraph(graphs);
partial_result.setScoreTest(torch::mean(score_test).item<double>()).setScoreTrain(torch::mean(score_train).item<double>());
partial_result.setScoreTestStd(torch::std(score_test).item<double>()).setScoreTrainStd(torch::std(score_train).item<double>());
partial_result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>()); partial_result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
partial_result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>()); partial_result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
partial_result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>()); partial_result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef EXPERIMENT_H
#define EXPERIMENT_H
#include <torch/torch.h> #include <torch/torch.h>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include <string> #include <string>
@@ -7,10 +7,11 @@
#include "bayesnet/BaseClassifier.h" #include "bayesnet/BaseClassifier.h"
#include "HyperParameters.h" #include "HyperParameters.h"
#include "results/Result.h" #include "results/Result.h"
#include "bayesnet/network/Network.h"
namespace platform { namespace platform {
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
enum class score_t { NONE, ACCURACY, ROC_AUC_OVR };
class Experiment { class Experiment {
public: public:
Experiment() = default; Experiment() = default;
@@ -20,6 +21,25 @@ namespace platform {
Experiment& setModelVersion(const std::string& model_version) { this->result.setModelVersion(model_version); return *this; } Experiment& setModelVersion(const std::string& model_version) { this->result.setModelVersion(model_version); return *this; }
Experiment& setModel(const std::string& model) { this->result.setModel(model); return *this; } Experiment& setModel(const std::string& model) { this->result.setModel(model); return *this; }
Experiment& setLanguage(const std::string& language) { this->result.setLanguage(language); return *this; } Experiment& setLanguage(const std::string& language) { this->result.setLanguage(language); return *this; }
Experiment& setDiscretizationAlgorithm(const std::string& discretization_algo)
{
this->discretization_algo = discretization_algo; this->result.setDiscretizationAlgorithm(discretization_algo); return *this;
}
Experiment& setSmoothSrategy(const std::string& smooth_strategy)
{
this->smooth_strategy = smooth_strategy; this->result.setSmoothStrategy(smooth_strategy);
if (smooth_strategy == "ORIGINAL")
smooth_type = bayesnet::Smoothing_t::ORIGINAL;
else if (smooth_strategy == "LAPLACE")
smooth_type = bayesnet::Smoothing_t::LAPLACE;
else if (smooth_strategy == "CESTNIK")
smooth_type = bayesnet::Smoothing_t::CESTNIK;
else {
std::cerr << "Experiment: Unknown smoothing strategy: " << smooth_strategy << std::endl;
exit(1);
}
return *this;
}
Experiment& setLanguageVersion(const std::string& language_version) { this->result.setLanguageVersion(language_version); return *this; } Experiment& setLanguageVersion(const std::string& language_version) { this->result.setLanguageVersion(language_version); return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; result.setDiscretized(discretized); return *this; } Experiment& setDiscretized(bool discretized) { this->discretized = discretized; result.setDiscretized(discretized); return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; result.setStratified(stratified); return *this; } Experiment& setStratified(bool stratified) { this->stratified = stratified; result.setStratified(stratified); return *this; }
@@ -28,18 +48,24 @@ namespace platform {
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); result.addSeed(randomSeed); return *this; } Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); result.addSeed(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; } Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; }
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; } Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score); void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score); void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
void saveResult(); void saveResult();
void show(); void show();
void saveGraph();
void report(bool classification_report = false); void report(bool classification_report = false);
private: private:
score_t parse_score() const;
Result result; Result result;
bool discretized{ false }, stratified{ false }; bool discretized{ false }, stratified{ false };
std::vector<PartialResult> results; std::vector<PartialResult> results;
std::vector<int> randomSeeds; std::vector<int> randomSeeds;
std::string discretization_algo;
std::string smooth_strategy;
bayesnet::Smoothing_t smooth_type{ bayesnet::Smoothing_t::NONE };
HyperParameters hyperparameters; HyperParameters hyperparameters;
int nfolds{ 0 }; int nfolds{ 0 };
int max_name{ 7 }; // max length of dataset name for formatting (default 7) int max_name{ 7 }; // max length of dataset name for formatting (default 7)
}; };
} }
#endif

View File

@@ -10,16 +10,9 @@ namespace platform {
for (const auto& item : datasets) { for (const auto& item : datasets) {
hyperparameters[item] = hyperparameters_; hyperparameters[item] = hyperparameters_;
} }
normalize_nested(datasets);
} }
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/ HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file, bool best)
std::string join(std::vector<std::string> const& strings, std::string delim)
{
std::stringstream ss;
std::copy(strings.begin(), strings.end(),
std::ostream_iterator<std::string>(ss, delim.c_str()));
return ss.str();
}
HyperParameters::HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file)
{ {
// Check if file exists // Check if file exists
std::ifstream file(hyperparameters_file); std::ifstream file(hyperparameters_file);
@@ -28,7 +21,14 @@ namespace platform {
} }
// Check if file is a json // Check if file is a json
json file_hyperparameters = json::parse(file); json file_hyperparameters = json::parse(file);
auto input_hyperparameters = file_hyperparameters["results"]; json input_hyperparameters;
if (best) {
for (const auto& [key, value] : file_hyperparameters.items()) {
input_hyperparameters[key]["hyperparameters"] = value[1];
}
} else {
input_hyperparameters = file_hyperparameters["results"];
}
// Check if hyperparameters are valid // Check if hyperparameters are valid
for (const auto& dataset : datasets) { for (const auto& dataset : datasets) {
if (!input_hyperparameters.contains(dataset)) { if (!input_hyperparameters.contains(dataset)) {
@@ -38,6 +38,24 @@ namespace platform {
} }
hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get<json>(); hyperparameters[dataset] = input_hyperparameters[dataset]["hyperparameters"].get<json>();
} }
normalize_nested(datasets);
}
void HyperParameters::normalize_nested(const std::vector<std::string>& datasets)
{
// for (const auto& dataset : datasets) {
// if (hyperparameters[dataset].contains("be_hyperparams")) {
// // Odte has base estimator hyperparameters set this way
// hyperparameters[dataset]["be_hyperparams"] = hyperparameters[dataset]["be_hyperparams"].dump();
// }
// }
}
// https://www.techiedelight.com/implode-a-vector-of-strings-into-a-comma-separated-string-in-cpp/
std::string join(std::vector<std::string> const& strings, std::string delim)
{
std::stringstream ss;
std::copy(strings.begin(), strings.end(),
std::ostream_iterator<std::string>(ss, delim.c_str()));
return ss.str();
} }
void HyperParameters::check(const std::vector<std::string>& valid, const std::string& fileName) void HyperParameters::check(const std::vector<std::string>& valid, const std::string& fileName)
{ {

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef HYPERPARAMETERS_H
#define HYPERPARAMETERS_H
#include <string> #include <string>
#include <map> #include <map>
#include <vector> #include <vector>
@@ -10,13 +10,18 @@ namespace platform {
class HyperParameters { class HyperParameters {
public: public:
HyperParameters() = default; HyperParameters() = default;
// Constructor to use command line hyperparameters
explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_); explicit HyperParameters(const std::vector<std::string>& datasets, const json& hyperparameters_);
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file); // Constructor to use hyperparameters file generated by grid or by best results
explicit HyperParameters(const std::vector<std::string>& datasets, const std::string& hyperparameters_file, bool best = false);
~HyperParameters() = default; ~HyperParameters() = default;
bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); } bool notEmpty(const std::string& key) const { return !hyperparameters.at(key).empty(); }
void check(const std::vector<std::string>& valid, const std::string& fileName); void check(const std::vector<std::string>& valid, const std::string& fileName);
json get(const std::string& fileName); json get(const std::string& fileName);
private: private:
void normalize_nested(const std::vector<std::string>& datasets);
std::map<std::string, json> hyperparameters; std::map<std::string, json> hyperparameters;
bool best = false; // Used to separate grid/best hyperparameters as the format of those files are different
}; };
} /* namespace platform */ } /* namespace platform */
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef MODELS_H
#define MODELS_H
#include <map> #include <map>
#include <bayesnet/BaseClassifier.h> #include <bayesnet/BaseClassifier.h>
#include <bayesnet/ensembles/AODE.h> #include <bayesnet/ensembles/AODE.h>
@@ -42,3 +42,4 @@ namespace platform {
Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction); Registrar(const std::string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
}; };
} }
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef PARTIAL_RESULT_H
#define PARTIAL_RESULT_H
#include <string> #include <string>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
@@ -15,6 +15,7 @@ namespace platform {
data["times_train"] = json::array(); data["times_train"] = json::array();
data["times_test"] = json::array(); data["times_test"] = json::array();
data["notes"] = json::array(); data["notes"] = json::array();
data["graph"] = json::array();
data["train_time"] = 0.0; data["train_time"] = 0.0;
data["train_time_std"] = 0.0; data["train_time_std"] = 0.0;
data["test_time"] = 0.0; data["test_time"] = 0.0;
@@ -27,6 +28,12 @@ namespace platform {
data["notes"].insert(data["notes"].end(), notes_.begin(), notes_.end()); data["notes"].insert(data["notes"].end(), notes_.begin(), notes_.end());
return *this; return *this;
} }
PartialResult& setGraph(const std::vector<std::string>& graph)
{
json graph_ = graph;
data["graph"].insert(data["graph"].end(), graph_.begin(), graph_.end());
return *this;
}
PartialResult& setConfusionMatrices(const json& confusion_matrices) { data["confusion_matrices"] = confusion_matrices; return *this; } PartialResult& setConfusionMatrices(const json& confusion_matrices) { data["confusion_matrices"] = confusion_matrices; return *this; }
PartialResult& setConfusionMatricesTrain(const json& confusion_matrices) { data["confusion_matrices_train"] = confusion_matrices; return *this; } PartialResult& setConfusionMatricesTrain(const json& confusion_matrices) { data["confusion_matrices_train"] = confusion_matrices; return *this; }
PartialResult& setHyperparameters(const json& hyperparameters) { data["hyperparameters"] = hyperparameters; return *this; } PartialResult& setHyperparameters(const json& hyperparameters) { data["hyperparameters"] = hyperparameters; return *this; }
@@ -73,3 +80,4 @@ namespace platform {
json data; json data;
}; };
} }
#endif

67
src/main/RocAuc.cpp Normal file
View File

@@ -0,0 +1,67 @@
#include <sstream>
#include <algorithm>
#include <numeric>
#include <utility>
#include "RocAuc.h"
namespace platform {
double RocAuc::compute(const torch::Tensor& y_proba, const torch::Tensor& labels)
{
size_t nClasses = y_proba.size(1);
// In binary classification problem there's no need to calculate the average of the AUCs
if (nClasses == 2)
nClasses = 1;
size_t nSamples = y_proba.size(0);
y_test = tensorToVector(labels);
std::vector<double> aucScores(nClasses, 0.0);
for (size_t classIdx = 0; classIdx < nClasses; ++classIdx) {
scoresAndLabels.clear();
for (size_t i = 0; i < nSamples; ++i) {
scoresAndLabels.emplace_back(y_proba[i][classIdx].item<float>(), y_test[i] == classIdx ? 1 : 0);
}
aucScores[classIdx] = compute_common(nSamples, classIdx);
}
return std::accumulate(aucScores.begin(), aucScores.end(), 0.0) / nClasses;
}
double RocAuc::compute(const std::vector<std::vector<double>>& y_proba, const std::vector<int>& labels)
{
y_test = labels;
size_t nClasses = y_proba[0].size();
// In binary classification problem there's no need to calculate the average of the AUCs
if (nClasses == 2)
nClasses = 1;
size_t nSamples = y_proba.size();
std::vector<double> aucScores(nClasses, 0.0);
for (size_t classIdx = 0; classIdx < nClasses; ++classIdx) {
scoresAndLabels.clear();
for (size_t i = 0; i < nSamples; ++i) {
scoresAndLabels.emplace_back(y_proba[i][classIdx], labels[i] == classIdx ? 1 : 0);
}
aucScores[classIdx] = compute_common(nSamples, classIdx);
}
return std::accumulate(aucScores.begin(), aucScores.end(), 0.0) / nClasses;
}
double RocAuc::compute_common(size_t nSamples, size_t classIdx)
{
std::sort(scoresAndLabels.begin(), scoresAndLabels.end(), std::greater<>());
std::vector<double> tpr, fpr;
double tp = 0, fp = 0;
double totalPos = std::count(y_test.begin(), y_test.end(), classIdx);
double totalNeg = nSamples - totalPos;
for (const auto& [score, label] : scoresAndLabels) {
if (label == 1) {
tp += 1;
} else {
fp += 1;
}
tpr.push_back(tp / totalPos);
fpr.push_back(fp / totalNeg);
}
double auc = 0.0;
for (size_t i = 1; i < tpr.size(); ++i) {
auc += 0.5 * (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]);
}
return auc;
}
}

21
src/main/RocAuc.h Normal file
View File

@@ -0,0 +1,21 @@
#ifndef ROCAUC_H
#define ROCAUC_H
#include <torch/torch.h>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::ordered_json;
class RocAuc {
public:
RocAuc() = default;
double compute(const std::vector<std::vector<double>>& y_proba, const std::vector<int>& y_test);
double compute(const torch::Tensor& y_proba, const torch::Tensor& y_test);
private:
double compute_common(size_t nSamples, size_t classIdx);
std::vector<std::pair<double, int>> scoresAndLabels;
std::vector<int> y_test;
};
}
#endif

View File

@@ -1,13 +1,15 @@
#include <sstream> #include <sstream>
#include "Scores.h" #include "Scores.h"
#include "common/Utils.h" // tensorToVector
#include "common/Colors.h" #include "common/Colors.h"
namespace platform { namespace platform {
Scores::Scores(torch::Tensor& y_test, torch::Tensor& y_pred, int num_classes, std::vector<std::string> labels) : num_classes(num_classes), labels(labels) Scores::Scores(torch::Tensor& y_test, torch::Tensor& y_proba, int num_classes, std::vector<std::string> labels) : num_classes(num_classes), labels(labels), y_test(y_test), y_proba(y_proba)
{ {
if (labels.size() == 0) { if (labels.size() == 0) {
init_default_labels(); init_default_labels();
} }
total = y_test.size(0); total = y_test.size(0);
auto y_pred = y_proba.argmax(1);
accuracy_value = (y_pred == y_test).sum().item<float>() / total; accuracy_value = (y_pred == y_test).sum().item<float>() / total;
init_confusion_matrix(); init_confusion_matrix();
for (int i = 0; i < total; i++) { for (int i = 0; i < total; i++) {
@@ -16,7 +18,7 @@ namespace platform {
confusion_matrix[actual][predicted] += 1; confusion_matrix[actual][predicted] += 1;
} }
} }
Scores::Scores(json& confusion_matrix_) Scores::Scores(const json& confusion_matrix_)
{ {
json values; json values;
total = 0; total = 0;
@@ -40,6 +42,57 @@ namespace platform {
} }
compute_accuracy_value(); compute_accuracy_value();
} }
float Scores::auc()
{
size_t nSamples = y_test.numel();
if (nSamples == 0) return 0;
// In binary classification problem there's no need to calculate the average of the AUCs
auto nClasses = num_classes;
if (num_classes == 2)
nClasses = 1;
auto y_testv = tensorToVector<int>(y_test);
std::vector<double> aucScores(nClasses, 0.0);
std::vector<std::pair<double, int>> scoresAndLabels;
for (size_t classIdx = 0; classIdx < nClasses; ++classIdx) {
if (classIdx >= y_proba.size(1)) {
std::cerr << "AUC warning - class index out of range" << std::endl;
return 0;
}
scoresAndLabels.clear();
for (size_t i = 0; i < nSamples; ++i) {
scoresAndLabels.emplace_back(y_proba[i][classIdx].item<float>(), y_testv[i] == classIdx ? 1 : 0);
}
std::sort(scoresAndLabels.begin(), scoresAndLabels.end(), std::greater<>());
std::vector<double> tpr, fpr;
double tp = 0, fp = 0;
double totalPos = std::count(y_testv.begin(), y_testv.end(), classIdx);
double totalNeg = nSamples - totalPos;
for (const auto& [score, label] : scoresAndLabels) {
if (label == 1) {
tp += 1;
} else {
fp += 1;
}
tpr.push_back(tp / totalPos);
fpr.push_back(fp / totalNeg);
}
double auc = 0.0;
for (size_t i = 1; i < tpr.size(); ++i) {
auc += 0.5 * (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]);
}
aucScores[classIdx] = auc;
}
return std::accumulate(aucScores.begin(), aucScores.end(), 0.0) / nClasses;
}
Scores Scores::create_aggregate(const json& data, const std::string key)
{
auto scores = Scores(data[key][0]);
for (int i = 1; i < data[key].size(); i++) {
auto score = Scores(data[key][i]);
scores.aggregate(score);
}
return scores;
}
void Scores::compute_accuracy_value() void Scores::compute_accuracy_value()
{ {
accuracy_value = 0; accuracy_value = 0;
@@ -129,6 +182,25 @@ namespace platform {
<< std::setw(dlen) << std::right << support; << std::setw(dlen) << std::right << support;
return oss.str(); return oss.str();
} }
std::tuple<float, float, float, float> Scores::compute_averages()
{
float precision_avg = 0;
float recall_avg = 0;
float precision_wavg = 0;
float recall_wavg = 0;
for (int i = 0; i < num_classes; i++) {
int support = confusion_matrix[i].sum().item<int>();
precision_avg += precision(i);
precision_wavg += precision(i) * support;
recall_avg += recall(i);
recall_wavg += recall(i) * support;
}
precision_wavg /= total;
recall_wavg /= total;
precision_avg /= num_classes;
recall_avg /= num_classes;
return { precision_avg, recall_avg, precision_wavg, recall_wavg };
}
std::vector<std::string> Scores::classification_report(std::string color, std::string title) std::vector<std::string> Scores::classification_report(std::string color, std::string title)
{ {
std::stringstream oss; std::stringstream oss;
@@ -148,21 +220,7 @@ namespace platform {
report.push_back(" "); report.push_back(" ");
oss << classification_report_line("accuracy", 0, 0, accuracy(), total); oss << classification_report_line("accuracy", 0, 0, accuracy(), total);
report.push_back(oss.str()); oss.str(""); report.push_back(oss.str()); oss.str("");
float precision_avg = 0; auto [precision_avg, recall_avg, precision_wavg, recall_wavg] = compute_averages();
float recall_avg = 0;
float precision_wavg = 0;
float recall_wavg = 0;
for (int i = 0; i < num_classes; i++) {
int support = confusion_matrix[i].sum().item<int>();
precision_avg += precision(i);
precision_wavg += precision(i) * support;
recall_avg += recall(i);
recall_wavg += recall(i) * support;
}
precision_wavg /= total;
recall_wavg /= total;
precision_avg /= num_classes;
recall_avg /= num_classes;
report.push_back(classification_report_line("macro avg", precision_avg, recall_avg, f1_macro(), total)); report.push_back(classification_report_line("macro avg", precision_avg, recall_avg, f1_macro(), total));
report.push_back(classification_report_line("weighted avg", precision_wavg, recall_wavg, f1_weighted(), total)); report.push_back(classification_report_line("weighted avg", precision_wavg, recall_wavg, f1_weighted(), total));
report.push_back(""); report.push_back("");
@@ -180,17 +238,33 @@ namespace platform {
} }
return report; return report;
} }
json Scores::classification_report_json(std::string title)
{
json output;
output["title"] = "Classification Report using " + title + " dataset";
output["headers"] = { " ", "precision", "recall", "f1-score", "support" };
output["body"] = {};
for (int i = 0; i < num_classes; i++) {
output["body"].push_back({ labels[i], precision(i), recall(i), f1_score(i), confusion_matrix[i].sum().item<int>() });
}
output["accuracy"] = { "accuracy", 0, 0, accuracy(), total };
auto [precision_avg, recall_avg, precision_wavg, recall_wavg] = compute_averages();
output["averages"] = { "macro avg", precision_avg, recall_avg, f1_macro(), total };
output["weighted"] = { "weighted avg", precision_wavg, recall_wavg, f1_weighted(), total };
output["confusion_matrix"] = get_confusion_matrix_json();
return output;
}
json Scores::get_confusion_matrix_json(bool labels_as_keys) json Scores::get_confusion_matrix_json(bool labels_as_keys)
{ {
json j; json output;
for (int i = 0; i < num_classes; i++) { for (int i = 0; i < num_classes; i++) {
auto r_ptr = confusion_matrix[i].data_ptr<int>(); auto r_ptr = confusion_matrix[i].data_ptr<int>();
if (labels_as_keys) { if (labels_as_keys) {
j[labels[i]] = std::vector<int>(r_ptr, r_ptr + num_classes); output[labels[i]] = std::vector<int>(r_ptr, r_ptr + num_classes);
} else { } else {
j[i] = std::vector<int>(r_ptr, r_ptr + num_classes); output[i] = std::vector<int>(r_ptr, r_ptr + num_classes);
} }
} }
return j; return output;
} }
} }

View File

@@ -1,16 +1,19 @@
#ifndef SCORES_H #ifndef SCORES_H
#define SCORES_H #define SCORES_H
#include <torch/torch.h>
#include <vector> #include <vector>
#include <string> #include <string>
#include <torch/torch.h>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
namespace platform { namespace platform {
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
class Scores { class Scores {
public: public:
Scores(torch::Tensor& y_test, torch::Tensor& y_pred, int num_classes, std::vector<std::string> labels = {}); Scores(torch::Tensor& y_test, torch::Tensor& y_proba, int num_classes, std::vector<std::string> labels = {});
explicit Scores(json& confusion_matrix_); explicit Scores(const json& confusion_matrix_);
static Scores create_aggregate(const json& data, const std::string key);
float accuracy(); float accuracy();
float auc();
float f1_score(int num_class); float f1_score(int num_class);
float f1_weighted(); float f1_weighted();
float f1_macro(); float f1_macro();
@@ -18,6 +21,7 @@ namespace platform {
float recall(int num_class); float recall(int num_class);
torch::Tensor get_confusion_matrix() { return confusion_matrix; } torch::Tensor get_confusion_matrix() { return confusion_matrix; }
std::vector<std::string> classification_report(std::string color = "", std::string title = ""); std::vector<std::string> classification_report(std::string color = "", std::string title = "");
json classification_report_json(std::string title = "");
json get_confusion_matrix_json(bool labels_as_keys = false); json get_confusion_matrix_json(bool labels_as_keys = false);
void aggregate(const Scores& a); void aggregate(const Scores& a);
private: private:
@@ -25,11 +29,15 @@ namespace platform {
void init_confusion_matrix(); void init_confusion_matrix();
void init_default_labels(); void init_default_labels();
void compute_accuracy_value(); void compute_accuracy_value();
std::tuple<float, float, float, float> compute_averages();
int num_classes; int num_classes;
float accuracy_value; float accuracy_value;
int total; int total;
std::vector<std::string> labels; std::vector<std::string> labels;
torch::Tensor confusion_matrix; // Rows ar actual, columns are predicted torch::Tensor confusion_matrix; // Rows ar actual, columns are predicted
torch::Tensor null_t; // Covenient null tensor needed when confusion_matrix constructor is used
torch::Tensor& y_test = null_t; // for ROC AUC
torch::Tensor& y_proba = null_t; // for ROC AUC
int label_len = 16; int label_len = 16;
int dlen = 9; int dlen = 9;
int ndec = 7; int ndec = 7;

View File

@@ -1,4 +1,5 @@
#pragma once #ifndef MODELREGISTER_H
#define MODELREGISTER_H
static platform::Registrar registrarT("TAN", static platform::Registrar registrarT("TAN",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();}); [](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
@@ -33,4 +34,6 @@ static platform::Registrar registrarSvc("SVC",
static platform::Registrar registrarRaF("RandomForest", static platform::Registrar registrarRaF("RandomForest",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();}); [](void) -> bayesnet::BaseClassifier* { return new pywrap::RandomForest();});
static platform::Registrar registrarXGB("XGBoost", static platform::Registrar registrarXGB("XGBoost",
[](void) -> bayesnet::BaseClassifier* { return new pywrap::XGBoost();}); [](void) -> bayesnet::BaseClassifier* { return new pywrap::XGBoost();});
#endif

View File

@@ -1,20 +0,0 @@
#pragma once
#include <string>
#include <vector>
#include <tuple>
namespace platform {
class CommandParser {
public:
CommandParser() = default;
std::tuple<char, int, bool> parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int minIndex, const int maxIndex);
char getCommand() const { return command; };
int getIndex() const { return index; };
std::string getErrorMessage() const { return errorMessage; };
private:
std::string errorMessage;
char command;
int index;
};
} /* namespace platform */

View File

@@ -3,29 +3,33 @@
#include <string> #include <string>
#include <algorithm> #include <algorithm>
#include "folding.hpp" #include "folding.hpp"
#include "common/Colors.h"
#include "common/CLocale.h" #include "common/CLocale.h"
#include "common/Paths.h" #include "common/Paths.h"
#include "CommandParser.h" #include "OptionsMenu.h"
#include "ManageScreen.h" #include "ManageScreen.h"
#include "reports/DatasetsConsole.h" #include "reports/DatasetsConsole.h"
#include "reports/ReportConsole.h" #include "reports/ReportConsole.h"
#include "reports/ReportExcel.h" #include "reports/ReportExcel.h"
#include "reports/ReportExcelCompared.h" #include "reports/ReportExcelCompared.h"
#include <bayesnet/classifiers/TAN.h> #include <bayesnet/classifiers/TAN.h>
#include "CPPFImdlp.h" #include <fimdlp/CPPFImdlp.h>
namespace platform { namespace platform {
const std::string STATUS_OK = "Ok."; const std::string STATUS_OK = "Ok.";
const std::string STATUS_COLOR = Colors::GREEN(); const std::string STATUS_COLOR = Colors::GREEN();
ManageScreen::ManageScreen(int rows, int cols, const std::string& model, const std::string& score, const std::string& platform, bool complete, bool partial, bool compare) : ManageScreen::ManageScreen(int rows, int cols, const std::string& model, const std::string& score, const std::string& platform, bool complete, bool partial, bool compare) :
rows{ rows }, cols{ cols }, complete{ complete }, partial{ partial }, compare{ compare }, didExcel(false), results(ResultsManager(model, score, platform, complete, partial)) rows{ rows }, cols{ cols }, complete{ complete }, partial{ partial }, compare{ compare }, didExcel(false), results(ResultsManager(model, score, platform, complete, partial))
{ {
results.load(); results.load();
openExcel = false; openExcel = false;
workbook = NULL; workbook = NULL;
this->rows = std::max(0, rows - 6); // 6 is the number of lines used by the menu & header maxModel = results.maxModelSize();
cols = std::max(cols, 140); maxTitle = results.maxTitleSize();
header_lengths = { 3, 10, maxModel, 11, 10, 12, 2, 3, 7, maxTitle };
header_labels = { " #", "Date", "Model", "Score Name", "Score", "Platform", "SD", "C/P", "Time", "Title" };
sort_fields = { "Date", "Model", "Score", "Time" };
updateSize(rows, cols);
// Initializes the paginator for each output type (experiments, datasets, result) // Initializes the paginator for each output type (experiments, datasets, result)
for (int i = 0; i < static_cast<int>(OutputType::Count); i++) { for (int i = 0; i < static_cast<int>(OutputType::Count); i++) {
paginator.push_back(Paginator(this->rows, results.size())); paginator.push_back(Paginator(this->rows, results.size()));
@@ -36,12 +40,41 @@ namespace platform {
subIndex = -1; subIndex = -1;
output_type = OutputType::EXPERIMENTS; output_type = OutputType::EXPERIMENTS;
} }
void ManageScreen::computeSizes()
{
int minTitle = 10;
// set 10 chars as minimum for Title
auto header_title = header_lengths[header_lengths.size() - 1];
min_columns = std::accumulate(header_lengths.begin(), header_lengths.end(), 0) + header_lengths.size() - header_title + minTitle;
maxTitle = minTitle + cols - min_columns;
header_lengths[header_lengths.size() - 1] = maxTitle;
cols = std::min(cols, min_columns + maxTitle);
for (auto& paginator_ : paginator) {
paginator_.setPageSize(rows);
}
}
bool ManageScreen::checkWrongColumns()
{
if (min_columns > cols) {
std::cerr << Colors::MAGENTA() << "Make screen bigger to fit the results! " + std::to_string(min_columns - cols) + " columns needed! " << std::endl;
return true;
}
return false;
}
void ManageScreen::updateSize(int rows_, int cols_)
{
rows = std::max(6, rows_ - 6); // 6 is the number of lines used by the menu & header
cols = cols_;
computeSizes();
}
void ManageScreen::doMenu() void ManageScreen::doMenu()
{ {
if (results.empty()) { if (results.empty()) {
std::cout << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl; std::cerr << Colors::MAGENTA() << "No results found!" << Colors::RESET() << std::endl;
return; return;
} }
if (checkWrongColumns())
return;
results.sortResults(sort_field, sort_type); results.sortResults(sort_field, sort_type);
list(STATUS_OK, STATUS_COLOR); list(STATUS_OK, STATUS_COLOR);
menu(); menu();
@@ -115,7 +148,6 @@ namespace platform {
} }
void ManageScreen::list_result(const std::string& status_message, const std::string& status_color) void ManageScreen::list_result(const std::string& status_message, const std::string& status_color)
{ {
auto data = results.at(index).getJson(); auto data = results.at(index).getJson();
ReportConsole report(data, compare); ReportConsole report(data, compare);
auto header_text = report.getHeader(); auto header_text = report.getHeader();
@@ -140,11 +172,9 @@ namespace platform {
// Status Area // Status Area
// //
footer(status_message, status_color); footer(status_message, status_color);
} }
void ManageScreen::list_detail(const std::string& status_message, const std::string& status_color) void ManageScreen::list_detail(const std::string& status_message, const std::string& status_color)
{ {
auto data = results.at(index).getJson(); auto data = results.at(index).getJson();
ReportConsole report(data, compare, subIndex); ReportConsole report(data, compare, subIndex);
auto header_text = report.getHeader(); auto header_text = report.getHeader();
@@ -169,7 +199,6 @@ namespace platform {
// Status Area // Status Area
// //
footer(status_message, status_color); footer(status_message, status_color);
} }
void ManageScreen::list_datasets(const std::string& status_message, const std::string& status_color) void ManageScreen::list_datasets(const std::string& status_message, const std::string& status_color)
{ {
@@ -193,7 +222,6 @@ namespace platform {
// Status Area // Status Area
// //
footer(status_message, status_color); footer(status_message, status_color);
} }
void ManageScreen::list_experiments(const std::string& status_message, const std::string& status_color) void ManageScreen::list_experiments(const std::string& status_message, const std::string& status_color)
{ {
@@ -201,17 +229,9 @@ namespace platform {
// header // header
// //
header(); header();
//
// Field names
//
int maxModel = results.maxModelSize();
int maxTitle = results.maxTitleSize();
std::vector<int> header_lengths = { 3, 10, maxModel, 11, 10, 12, 2, 3, 7, maxTitle };
std::cout << Colors::RESET(); std::cout << Colors::RESET();
std::string arrow_dn = Symbols::down_arrow + " "; std::string arrow_dn = Symbols::down_arrow + " ";
std::string arrow_up = Symbols::up_arrow + " "; std::string arrow_up = Symbols::up_arrow + " ";
std::vector<std::string> header_labels = { " #", "Date", "Model", "Score Name", "Score", "Platform", "SD", "C/P", "Time", "Title" };
std::vector<std::string> sort_fields = { "Date", "Model", "Score", "Time" };
for (int i = 0; i < header_labels.size(); i++) { for (int i = 0; i < header_labels.size(); i++) {
std::string suffix = "", color = Colors::GREEN(); std::string suffix = "", color = Colors::GREEN();
int diff = 0; int diff = 0;
@@ -230,11 +250,15 @@ namespace platform {
// //
// Results // Results
// //
if (results.empty()) {
std::cout << "No results found!" << std::endl;
return;
}
auto [index_from, index_to] = paginator[static_cast<int>(output_type)].getOffset(); auto [index_from, index_to] = paginator[static_cast<int>(output_type)].getOffset();
for (int i = index_from; i <= index_to; i++) { for (int i = index_from; i <= index_to; i++) {
auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN(); auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN();
std::cout << color << std::setw(3) << std::fixed << std::right << i << " "; std::cout << color << std::setw(3) << std::fixed << std::right << i << " ";
std::cout << results.at(i).to_string(maxModel) << std::endl; std::cout << results.at(i).to_string(maxModel, maxTitle) << std::endl;
} }
// //
// Status Area // Status Area
@@ -254,7 +278,7 @@ namespace platform {
while (!finished) { while (!finished) {
std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): "; std::cout << color << "Really want to " << intent << " " << fileName << "? (y/n): ";
getline(std::cin, line); getline(std::cin, line);
finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0] == 'n')); finished = line.size() == 1 && (tolower(line[0]) == 'y' || tolower(line[0]) == 'n');
} }
if (tolower(line[0]) == 'y') { if (tolower(line[0]) == 'y') {
return true; return true;
@@ -289,16 +313,28 @@ namespace platform {
} }
std::pair<std::string, std::string> ManageScreen::sortList() std::pair<std::string, std::string> ManageScreen::sortList()
{ {
std::cout << Colors::YELLOW() << "Choose sorting field (date='d', score='s', time='t', model='m', ascending='+', descending='-'): "; std::vector<std::tuple<std::string, char, bool>> sortOptions = {
std::vector<std::string> fields = { "Date", "Model", "Score", "Time" }; {"date", 'd', false},
{"score", 's', false},
{"time", 't', false},
{"model", 'm', false},
{"ascending+", '+', false},
{"descending-", '-', false}
};
auto sortMenu = OptionsMenu(sortOptions, Colors::YELLOW(), Colors::RED(), cols);
std::string invalid_option = "Invalid sorting option"; std::string invalid_option = "Invalid sorting option";
std::string line;
char option; char option;
getline(std::cin, line); bool parserError = true; // force the first iteration
if (line.size() == 0 || line.size() > 1) { while (parserError) {
return { Colors::RED(), invalid_option }; if (checkWrongColumns())
return { Colors::RED(), "Invalid column size" };
auto [min_index, max_index] = paginator[static_cast<int>(output_type)].getOffset();
std::tie(option, index, parserError) = sortMenu.parse(' ', 0, 0);
sortMenu.updateColumns(cols);
if (parserError) {
return { Colors::RED(), invalid_option };
}
} }
option = line[0];
switch (option) { switch (option) {
case 'd': case 'd':
sort_field = SortField::DATE; sort_field = SortField::DATE;
@@ -322,7 +358,7 @@ namespace platform {
return { Colors::RED(), invalid_option }; return { Colors::RED(), invalid_option };
} }
results.sortResults(sort_field, sort_type); results.sortResults(sort_field, sort_type);
return { Colors::GREEN(), "Sorted by " + fields[static_cast<int>(sort_field)] + " " + (sort_type == SortType::ASC ? "ascending" : "descending") }; return { Colors::GREEN(), "Sorted by " + sort_fields[static_cast<int>(sort_field)] + " " + (sort_type == SortType::ASC ? "ascending" : "descending") };
} }
void ManageScreen::menu() void ManageScreen::menu()
{ {
@@ -333,17 +369,17 @@ namespace platform {
std::vector<std::tuple<std::string, char, bool>> mainOptions = { std::vector<std::tuple<std::string, char, bool>> mainOptions = {
{"quit", 'q', false}, {"quit", 'q', false},
{"list", 'l', false}, {"list", 'l', false},
{"delete", 'D', true}, {"Delete", 'D', true},
{"datasets", 'd', false}, {"datasets", 'd', false},
{"hide", 'h', true}, {"hide", 'h', true},
{"sort", 's', false}, {"sort", 's', false},
{"report", 'r', true}, {"report", 'r', true},
{"excel", 'e', true}, {"excel", 'e', true},
{"title", 't', true}, {"title", 't', true},
{"set A", 'a', true}, {"set A", 'A', true},
{"set B", 'b', true}, {"set B", 'B', true},
{"compare A~B", 'c', false}, {"compare A~B", 'c', false},
{"Page", 'p', true}, {"page", 'p', true},
{"Page+", '+', false }, {"Page+", '+', false },
{"Page-", '-', false} {"Page-", '-', false}
}; };
@@ -352,25 +388,33 @@ namespace platform {
{"quit", 'q', false}, {"quit", 'q', false},
{"report", 'r', true}, {"report", 'r', true},
{"list", 'l', false}, {"list", 'l', false},
{"excel", 'e', false}, {"excel", 'e', true},
{"back", 'b', false}, {"back", 'b', false},
{"Page", 'p', true}, {"page", 'p', true},
{"Page+", '+', false}, {"Page+", '+', false},
{"Page-", '-', false} {"Page-", '-', false}
}; };
auto parser = CommandParser();
while (!finished) { while (!finished) {
auto main_menu = OptionsMenu(mainOptions, Colors::IGREEN(), Colors::YELLOW(), cols);
auto list_menu = OptionsMenu(listOptions, Colors::IBLUE(), Colors::YELLOW(), cols);
OptionsMenu& menu = output_type == OutputType::EXPERIMENTS ? main_menu : list_menu;
bool parserError = true; // force the first iteration bool parserError = true; // force the first iteration
while (parserError) { while (parserError) {
int index_menu;
auto [min_index, max_index] = paginator[static_cast<int>(output_type)].getOffset(); auto [min_index, max_index] = paginator[static_cast<int>(output_type)].getOffset();
std::tie(option, index_menu, parserError) = menu.parse('r', min_index, max_index);
if (output_type == OutputType::EXPERIMENTS) { if (output_type == OutputType::EXPERIMENTS) {
std::tie(option, index, parserError) = parser.parse(Colors::IGREEN(), mainOptions, 'r', min_index, max_index); index = index_menu;
} else { } else {
std::tie(option, subIndex, parserError) = parser.parse(Colors::IBLUE(), listOptions, 'r', min_index, max_index); subIndex = index_menu;
} }
if (min_columns > cols) {
std::cerr << "Make screen bigger to fit the results! " + std::to_string(min_columns - cols) + " columns needed! " << std::endl;
return;
}
menu.updateColumns(cols);
if (parserError) { if (parserError) {
list(parser.getErrorMessage(), Colors::RED()); list(menu.getErrorMessage(), Colors::RED());
} }
} }
switch (option) { switch (option) {
@@ -405,7 +449,7 @@ namespace platform {
case 'q': case 'q':
finished = true; finished = true;
break; break;
case 'a': case 'A':
if (index == index_B) { if (index == index_B) {
list("A and B cannot be the same!", Colors::RED()); list("A and B cannot be the same!", Colors::RED());
break; break;
@@ -413,7 +457,7 @@ namespace platform {
index_A = index; index_A = index;
list("A set to " + std::to_string(index), Colors::GREEN()); list("A set to " + std::to_string(index), Colors::GREEN());
break; break;
case 'b': // set_b or back to list case 'B': // set_b or back to list
if (output_type == OutputType::EXPERIMENTS) { if (output_type == OutputType::EXPERIMENTS) {
if (index == index_A) { if (index == index_A) {
list("A and B cannot be the same!", Colors::RED()); list("A and B cannot be the same!", Colors::RED());
@@ -465,6 +509,7 @@ namespace platform {
std::cout << "Hiding " << filename << std::endl; std::cout << "Hiding " << filename << std::endl;
results.hideResult(index, Paths::hiddenResults()); results.hideResult(index, Paths::hiddenResults());
status_message = filename + " hidden! (moved to " + Paths::hiddenResults() + ")"; status_message = filename + " hidden! (moved to " + Paths::hiddenResults() + ")";
paginator[static_cast<int>(OutputType::EXPERIMENTS)].setTotal(results.size());
list(status_message, Colors::YELLOW()); list(status_message, Colors::YELLOW());
} }
break; break;

View File

@@ -1,7 +1,8 @@
#pragma once #ifndef MANAGE_SCREEN_H
#define MANAGE_SCREEN_H
#include <xlsxwriter.h> #include <xlsxwriter.h>
#include "ResultsManager.h" #include "ResultsManager.h"
#include "common/Colors.h"
#include "Paginator.hpp" #include "Paginator.hpp"
namespace platform { namespace platform {
@@ -17,6 +18,7 @@ namespace platform {
ManageScreen(int rows, int cols, const std::string& model, const std::string& score, const std::string& platform, bool complete, bool partial, bool compare); ManageScreen(int rows, int cols, const std::string& model, const std::string& score, const std::string& platform, bool complete, bool partial, bool compare);
~ManageScreen() = default; ~ManageScreen() = default;
void doMenu(); void doMenu();
void updateSize(int rows, int cols);
private: private:
void list(const std::string& status, const std::string& color); void list(const std::string& status, const std::string& color);
void list_experiments(const std::string& status, const std::string& color); void list_experiments(const std::string& status, const std::string& color);
@@ -28,12 +30,15 @@ namespace platform {
std::string report_compared(); std::string report_compared();
std::pair<std::string, std::string> sortList(); std::pair<std::string, std::string> sortList();
std::string getVersions(); std::string getVersions();
void computeSizes();
bool checkWrongColumns();
void menu(); void menu();
void header(); void header();
void footer(const std::string& status, const std::string& color); void footer(const std::string& status, const std::string& color);
OutputType output_type; OutputType output_type;
int rows; int rows;
int cols; int cols;
int min_columns;
int index; int index;
int subIndex; int subIndex;
int index_A, index_B; // used for comparison of experiments int index_A, index_B; // used for comparison of experiments
@@ -43,6 +48,10 @@ namespace platform {
bool complete; bool complete;
bool partial; bool partial;
bool compare; bool compare;
int maxModel, maxTitle;
std::vector<std::string> header_labels;
std::vector<int> header_lengths;
std::vector<std::string> sort_fields;
SortField sort_field = SortField::DATE; SortField sort_field = SortField::DATE;
SortType sort_type = SortType::DESC; SortType sort_type = SortType::DESC;
std::vector<Paginator> paginator; std::vector<Paginator> paginator;
@@ -50,3 +59,4 @@ namespace platform {
lxw_workbook* workbook; lxw_workbook* workbook;
}; };
} }
#endif

View File

@@ -1,30 +1,46 @@
#include "CommandParser.h" #include "OptionsMenu.h"
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <algorithm> #include <algorithm>
#include "common/Colors.h"
#include "common/Utils.h" #include "common/Utils.h"
namespace platform { namespace platform {
std::string OptionsMenu::to_string()
std::tuple<char, int, bool> CommandParser::parse(const std::string& color, const std::vector<std::tuple<std::string, char, bool>>& options, const char defaultCommand, const int minIndex, const int maxIndex) {
bool first = true;
std::string result = color_normal + "Options: (";
size_t size = 10; // Size of "Options: ("
for (auto& option : options) {
if (!first) {
result += ", ";
size += 2;
}
std::string title = std::get<0>(option);
auto pos = title.find(std::get<1>(option));
result += color_normal + title.substr(0, pos) + color_bold + title.substr(pos, 1) + color_normal + title.substr(pos + 1);
size += title.size();
first = false;
}
if (size + 3 > cols) { // 3 is the size of the "): " at the end
result = "";
first = true;
for (auto& option : options) {
if (!first) {
result += color_normal + ", ";
}
result += color_bold + std::get<1>(option);
first = false;
}
}
result += "): ";
return result;
}
std::tuple<char, int, bool> OptionsMenu::parse(char defaultCommand, int minIndex, int maxIndex)
{ {
bool finished = false; bool finished = false;
while (!finished) { while (!finished) {
std::stringstream oss; std::cout << to_string();
std::string line; std::string line;
oss << color << "Options (";
bool first = true;
for (auto& option : options) {
if (first) {
first = false;
} else {
oss << ", ";
}
oss << std::get<char>(option) << "=" << std::get<std::string>(option);
}
oss << "): ";
std::cout << oss.str();
getline(std::cin, line); getline(std::cin, line);
line = trim(line); line = trim(line);
if (line.size() == 0) { if (line.size() == 0) {

26
src/manage/OptionsMenu.h Normal file
View File

@@ -0,0 +1,26 @@
#ifndef OPTIONS_MENU_H
#define OPTIONS_MENU_H
#include <string>
#include <vector>
#include <tuple>
namespace platform {
class OptionsMenu {
public:
OptionsMenu(std::vector<std::tuple<std::string, char, bool>>& options, std::string color_normal, std::string color_bold, int cols) : options(options), color_normal(color_normal), color_bold(color_bold), cols(cols) {}
std::string to_string();
std::tuple<char, int, bool> parse(char defaultCommand, int minIndex, int maxIndex);
char getCommand() const { return command; };
int getIndex() const { return index; };
std::string getErrorMessage() const { return errorMessage; };
void updateColumns(int cols) { this->cols = cols; }
private:
std::vector<std::tuple<std::string, char, bool>>& options;
std::string color_normal, color_bold;
int cols;
std::string errorMessage;
char command;
int index;
};
} /* namespace platform */
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef PAGINATOR_HPP
#define PAGINATOR_HPP
#include <utility> #include <utility>
class Paginator { class Paginator {
@@ -53,4 +53,5 @@ private:
int total; int total;
int page; int page;
int numPages; int numPages;
}; };
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef RESULTSMANAGER_H
#define RESULTSMANAGER_H
#include <vector> #include <vector>
#include <string> #include <string>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
@@ -45,4 +45,5 @@ namespace platform {
int maxTitle; int maxTitle;
std::vector<Result> files; std::vector<Result> files;
}; };
}; };
#endif

View File

@@ -1,3 +1,4 @@
#include <algorithm>
#include "common/Colors.h" #include "common/Colors.h"
#include "common/Datasets.h" #include "common/Datasets.h"
#include "common/Paths.h" #include "common/Paths.h"
@@ -12,7 +13,7 @@ namespace platform {
auto part = temp.substr(0, DatasetsConsole::BALANCE_LENGTH); auto part = temp.substr(0, DatasetsConsole::BALANCE_LENGTH);
line += part + "\n"; line += part + "\n";
body.push_back(line); body.push_back(line);
line = string(name_len + 22, ' '); line = string(name_len + 28, ' ');
temp = temp.substr(DatasetsConsole::BALANCE_LENGTH); temp = temp.substr(DatasetsConsole::BALANCE_LENGTH);
} }
line += temp + "\n"; line += temp + "\n";
@@ -26,8 +27,8 @@ namespace platform {
std::stringstream sheader; std::stringstream sheader;
auto datasets_names = datasets.getNames(); auto datasets_names = datasets.getNames();
int maxName = std::max(size_t(7), (*max_element(datasets_names.begin(), datasets_names.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size()); int maxName = std::max(size_t(7), (*max_element(datasets_names.begin(), datasets_names.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size());
std::vector<std::string> header_labels = { " #", "Dataset", "Sampl.", "Feat.", "Cls", "Balance" }; std::vector<std::string> header_labels = { " #", "Dataset", "Sampl.", "Feat.", "#Num.", "Cls", "Balance" };
std::vector<int> header_lengths = { 3, maxName, 6, 5, 3, DatasetsConsole::BALANCE_LENGTH }; std::vector<int> header_lengths = { 3, maxName, 6, 5, 5, 3, DatasetsConsole::BALANCE_LENGTH };
sheader << Colors::GREEN(); sheader << Colors::GREEN();
for (int i = 0; i < header_labels.size(); i++) { for (int i = 0; i < header_labels.size(); i++) {
sheader << setw(header_lengths[i]) << left << header_labels[i] << " "; sheader << setw(header_lengths[i]) << left << header_labels[i] << " ";
@@ -41,30 +42,37 @@ namespace platform {
sline += "\n"; sline += "\n";
header.push_back(sline); header.push_back(sline);
int num = 0; int num = 0;
for (const auto& dataset : datasets.getNames()) { for (const auto& dataset_name : datasets.getNames()) {
std::stringstream line; std::stringstream line;
line.imbue(loc); line.imbue(loc);
auto color = num % 2 ? Colors::CYAN() : Colors::BLUE(); auto color = num % 2 ? Colors::CYAN() : Colors::BLUE();
line << color << setw(3) << right << num++ << " "; line << color << setw(3) << right << num++ << " ";
line << setw(maxName) << left << dataset << " "; line << setw(maxName) << left << dataset_name << " ";
datasets.loadDataset(dataset); auto& dataset = datasets.getDataset(dataset_name);
auto nSamples = datasets.getNSamples(dataset); dataset.load();
auto nSamples = dataset.getNSamples();
line << setw(6) << right << nSamples << " "; line << setw(6) << right << nSamples << " ";
line << setw(5) << right << datasets.getFeatures(dataset).size() << " "; auto nFeatures = dataset.getFeatures().size();
line << setw(3) << right << datasets.getNClasses(dataset) << " "; line << setw(5) << right << nFeatures << " ";
auto numericFeatures = dataset.getNumericFeatures();
auto num = std::count(numericFeatures.begin(), numericFeatures.end(), true);
line << setw(5) << right << num << " ";
auto nClasses = dataset.getNClasses();
line << setw(3) << right << nClasses << " ";
std::string sep = ""; std::string sep = "";
oss.str(""); oss.str("");
for (auto number : datasets.getClassesCounts(dataset)) { for (auto number : dataset.getClassesCounts()) {
oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")";
sep = " / "; sep = " / ";
} }
split_lines(maxName, line.str(), oss.str()); split_lines(maxName, line.str(), oss.str());
// Store data for Excel report // Store data for Excel report
data[dataset] = json::object(); data[dataset_name] = json::object();
data[dataset]["samples"] = nSamples; data[dataset_name]["samples"] = nSamples;
data[dataset]["features"] = datasets.getFeatures(dataset).size(); data[dataset_name]["features"] = nFeatures;
data[dataset]["classes"] = datasets.getNClasses(dataset); data[dataset_name]["numericFeatures"] = num;
data[dataset]["balance"] = oss.str(); data[dataset_name]["classes"] = nClasses;
data[dataset_name]["balance"] = oss.str();
} }
} }
} }

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef DATASETSCONSOLE_H
#define DATASETSCONSOLE_H
#include <locale> #include <locale>
#include <sstream> #include <sstream>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
@@ -8,7 +8,6 @@
namespace platform { namespace platform {
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
class DatasetsConsole : public ReportsPaged { class DatasetsConsole : public ReportsPaged {
public: public:
static const int BALANCE_LENGTH; static const int BALANCE_LENGTH;
@@ -19,4 +18,4 @@ namespace platform {
void split_lines(int name_len, std::string line, const std::string& balance); void split_lines(int name_len, std::string line, const std::string& balance);
}; };
} }
#endif

View File

@@ -1,5 +1,4 @@
#include "DatasetsExcel.h" #include "DatasetsExcel.h"
namespace platform { namespace platform {
DatasetsExcel::DatasetsExcel() DatasetsExcel::DatasetsExcel()
{ {
@@ -18,11 +17,11 @@ namespace platform {
int balanceSize = 75; // Min size of the column int balanceSize = 75; // Min size of the column
worksheet = workbook_add_worksheet(workbook, "Datasets"); worksheet = workbook_add_worksheet(workbook, "Datasets");
// Header // Header
worksheet_merge_range(worksheet, 0, 0, 0, 5, "Datasets", styles["headerFirst"]); worksheet_merge_range(worksheet, 0, 0, 0, 6, "Datasets", styles["headerFirst"]);
// Body header // Body header
row = 2; row = 2;
int col = 0; int col = 0;
for (const auto& name : { "", "Dataset", "Samples", "Features", "Classes", "Balance" }) { for (const auto& name : { "#", "Dataset", "Samples", "Features", "#Numer.", "Classes", "Balance" }) {
writeString(row, col++, name, "bodyHeader"); writeString(row, col++, name, "bodyHeader");
} }
// Body // Body
@@ -35,12 +34,13 @@ namespace platform {
writeString(row, 1, key.c_str(), "text"); writeString(row, 1, key.c_str(), "text");
writeInt(row, 2, value["samples"], "ints"); writeInt(row, 2, value["samples"], "ints");
writeInt(row, 3, value["features"], "ints"); writeInt(row, 3, value["features"], "ints");
writeInt(row, 4, value["classes"], "ints"); writeInt(row, 4, value["numericFeatures"], "ints");
writeString(row, 5, value["balance"].get<std::string>().c_str(), "text"); writeInt(row, 5, value["classes"], "ints");
writeString(row, 6, value["balance"].get<std::string>().c_str(), "text");
} }
// Format columns // Format columns
worksheet_freeze_panes(worksheet, 3, 2); worksheet_freeze_panes(worksheet, 3, 2);
std::vector<int> columns_sizes = { 5, datasetNameSize, 10, 10, 10, balanceSize }; std::vector<int> columns_sizes = { 5, datasetNameSize, 10, 10, 10, 10, balanceSize };
for (int i = 0; i < columns_sizes.size(); ++i) { for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
} }

View File

@@ -1,12 +1,11 @@
#pragma once #ifndef DATASETSEXCEL_H
#define DATASETSEXCEL_H
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "reports/ExcelFile.h" #include "reports/ExcelFile.h"
using json = nlohmann::ordered_json;
namespace platform { namespace platform {
using json = nlohmann::ordered_json;
class DatasetsExcel : public ExcelFile { class DatasetsExcel : public ExcelFile {
public: public:
DatasetsExcel(); DatasetsExcel();
@@ -14,3 +13,4 @@ namespace platform {
void report(json& data); void report(json& data);
}; };
} }
#endif

View File

@@ -22,6 +22,27 @@ namespace platform {
colorOdd = 0xDCE6F1; colorOdd = 0xDCE6F1;
colorEven = 0xFDE9D9; colorEven = 0xFDE9D9;
} }
lxw_worksheet* ExcelFile::createWorksheet(const std::string& name)
{
lxw_worksheet* sheet;
std::string suffix = "";
std::string efectiveName;
int num = 1;
// Create a sheet with the name of the model
while (true) {
efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = std::to_string(++num);
} else {
sheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break;
}
if (num > 100) {
throw std::invalid_argument("Couldn't create sheet " + efectiveName);
}
}
return sheet;
}
lxw_workbook* ExcelFile::getWorkbook() lxw_workbook* ExcelFile::getWorkbook()
{ {
@@ -75,7 +96,7 @@ namespace platform {
} }
void ExcelFile::boldGreen() void ExcelFile::boldGreen()
{ {
boldFontColor(0x00FF00); boldFontColor(0x009900);
} }
void ExcelFile::boldRed() void ExcelFile::boldRed()
{ {

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef EXCELFILE_H
#define EXCELFILE_H
#include <locale> #include <locale>
#include <string> #include <string>
#include <map> #include <map>
@@ -24,6 +24,7 @@ namespace platform {
void boldBlue(); //set blue color for the bold styles void boldBlue(); //set blue color for the bold styles
void boldGreen(); //set green color for the bold styles void boldGreen(); //set green color for the bold styles
void createStyle(const std::string& name, lxw_format* style, bool odd); void createStyle(const std::string& name, lxw_format* style, bool odd);
lxw_worksheet* createWorksheet(const std::string& name);
void addColor(lxw_format* style, bool odd); void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const std::string& name); lxw_format* efectiveStyle(const std::string& name);
lxw_workbook* workbook; lxw_workbook* workbook;
@@ -39,3 +40,4 @@ namespace platform {
void setDefault(); void setDefault();
}; };
} }
#endif

View File

@@ -61,12 +61,13 @@ namespace platform {
} }
} else { } else {
if (data["score_name"].get<std::string>() == "accuracy") { if (data["score_name"].get<std::string>() == "accuracy") {
auto dt = Datasets(false, Paths::datasets()); auto datasets = Datasets(false, Paths::datasets());
dt.loadDataset(dataset); auto& dt = datasets.getDataset(dataset);
auto numClasses = dt.getNClasses(dataset); dt.load();
auto numClasses = dt.getNClasses();
if (numClasses == 2) { if (numClasses == 2) {
std::vector<int> distribution = dt.getClassesCounts(dataset); std::vector<int> distribution = dt.getClassesCounts();
double nSamples = dt.getNSamples(dataset); double nSamples = dt.getNSamples();
std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end()); std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin); double mark = *maxValue / nSamples * (1 + margin);
if (mark > 1) { if (mark > 1) {

View File

@@ -1,14 +1,13 @@
#pragma once #ifndef REPORTBASE_H
#define REPORTBASE_H
#include <string> #include <string>
#include <iostream> #include <map>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "common/Paths.h" #include "common/Paths.h"
#include "common/Symbols.h" #include "common/Symbols.h"
using json = nlohmann::ordered_json;
namespace platform { namespace platform {
using json = nlohmann::ordered_json;
class ReportBase { class ReportBase {
public: public:
explicit ReportBase(json data_, bool compare); explicit ReportBase(json data_, bool compare);
@@ -36,3 +35,4 @@ namespace platform {
bool existBestFile = true; bool existBestFile = true;
}; };
}; };
#endif

View File

@@ -2,7 +2,9 @@
#include <locale> #include <locale>
#include "best/BestScore.h" #include "best/BestScore.h"
#include "common/CLocale.h" #include "common/CLocale.h"
#include "common/Timer.h"
#include "ReportConsole.h" #include "ReportConsole.h"
#include "main/Scores.h"
namespace platform { namespace platform {
std::string ReportConsole::headerLine(const std::string& text, int utf = 0) std::string ReportConsole::headerLine(const std::string& text, int utf = 0)
@@ -22,12 +24,30 @@ namespace platform {
+ " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>() + " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
); );
sheader << headerLine(data["title"].get<std::string>()); sheader << headerLine(data["title"].get<std::string>());
std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL";
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
std::string smooth = data.find("smooth_strategy") != data.end() ? data["smooth_strategy"].get<std::string>() : "ORIGINAL";
std::string stratified;
try {
stratified = data["stratified"].get<bool>() ? "True" : "False";
}
catch (nlohmann::json::type_error) {
stratified = data["stratified"].get<int>() == 1 ? "True" : "False";
}
std::string discretized;
try {
discretized = data["discretized"].get<bool>() ? "True" : "False";
}
catch (nlohmann::json::type_error) {
discretized = data["discretized"].get<int>() == 1 ? "True" : "False";
}
sheader << headerLine( sheader << headerLine(
"Random seeds: " + fromVector("seeds") + " Discretized: " + (data["discretized"].get<bool>() ? "True" : "False") "Random seeds: " + fromVector("seeds") + " Discretized: " + discretized + " " + algorithm
+ " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False") + " Stratified: " + stratified + " Smooth Strategy: " + smooth
); );
oss << "Execution took " << std::setprecision(2) << std::fixed << data["duration"].get<float>() Timer timer;
<< " seconds, " << data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<std::string>(); oss << "Execution took " << timer.translate2String(data["duration"].get<float>())
<< " on " << data["platform"].get<std::string>() << " Language: " << data["language"].get<std::string>();
sheader << headerLine(oss.str()); sheader << headerLine(oss.str());
sheader << headerLine("Score is " + data["score_name"].get<std::string>()); sheader << headerLine("Score is " + data["score_name"].get<std::string>());
sheader << std::string(MAXL, '*') << std::endl; sheader << std::string(MAXL, '*') << std::endl;
@@ -120,12 +140,35 @@ namespace platform {
} }
} }
} }
line.str("");
if (lastResult.find("score_train") == lastResult.end()) {
line << headerLine("Train score: -");
} else {
line << headerLine("Train score: " + std::to_string(lastResult["score_train"].get<double>()));
}
vbody.push_back(line.str()); sbody << line.str();
line.str(""); line << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12)); line.str(""); line << headerLine(fVector("Train scores: ", lastResult["scores_train"], 14, 12));
vbody.push_back(line.str()); sbody << line.str(); vbody.push_back(line.str()); sbody << line.str();
line.str(""); line << headerLine("Test score: " + std::to_string(lastResult["score"].get<double>()));
vbody.push_back(line.str()); sbody << line.str();
line.str(""); line << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12)); line.str(""); line << headerLine(fVector("Test scores: ", lastResult["scores_test"], 14, 12));
vbody.push_back(line.str()); sbody << line.str(); vbody.push_back(line.str()); sbody << line.str();
line.str("");
if (lastResult.find("train_time") == lastResult.end()) {
line << headerLine("Train time: -");
} else {
line << headerLine("Train time: " + std::to_string(lastResult["train_time"].get<double>()));
}
vbody.push_back(line.str()); sbody << line.str();
line.str(""); line << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3)); line.str(""); line << headerLine(fVector("Train times: ", lastResult["times_train"], 10, 3));
vbody.push_back(line.str()); sbody << line.str(); vbody.push_back(line.str()); sbody << line.str();
line.str("");
if (lastResult.find("test_time") == lastResult.end()) {
line << headerLine("Test time: -");
} else {
line << headerLine("Test time: " + std::to_string(lastResult["test_time"].get<double>()));
}
vbody.push_back(line.str()); sbody << line.str();
line.str(""); line << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3)); line.str(""); line << headerLine(fVector("Test times: ", lastResult["times_test"], 10, 3));
vbody.push_back(line.str()); sbody << line.str(); vbody.push_back(line.str()); sbody << line.str();
@@ -186,13 +229,13 @@ namespace platform {
int lines_header = 0; int lines_header = 0;
std::string color_line; std::string color_line;
std::string suffix = ""; std::string suffix = "";
auto scores = aggregateScore(result, "confusion_matrices"); auto scores = Scores::create_aggregate(result, "confusion_matrices");
auto output_test = scores.classification_report(color, "Test"); auto output_test = scores.classification_report(color, "Test");
int maxLine = (*std::max_element(output_test.begin(), output_test.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); int maxLine = (*std::max_element(output_test.begin(), output_test.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
bool train_data = result.find("confusion_matrices_train") != result.end(); bool train_data = result.find("confusion_matrices_train") != result.end();
std::vector<std::string> output_train; std::vector<std::string> output_train;
if (train_data) { if (train_data) {
auto scores_train = aggregateScore(result, "confusion_matrices_train"); auto scores_train = Scores::create_aggregate(result, "confusion_matrices_train");
output_train = scores_train.classification_report(color, "Train"); output_train = scores_train.classification_report(color, "Train");
} }
oss << Colors::BLUE(); oss << Colors::BLUE();

View File

@@ -1,12 +1,11 @@
#pragma once #ifndef REPORT_CONSOLE_H
#define REPORT_CONSOLE_H
#include <string> #include <string>
#include "common/Colors.h" #include "common/Colors.h"
#include <sstream> #include <sstream>
#include "ReportBase.h" #include "ReportBase.h"
#include "main/Scores.h" #include "main/Scores.h"
namespace platform { namespace platform {
const int MAXL = 133; const int MAXL = 133;
class ReportConsole : public ReportBase { class ReportConsole : public ReportBase {
@@ -33,3 +32,4 @@ namespace platform {
std::vector<std::string> vbody; std::vector<std::string> vbody;
}; };
}; };
#endif

View File

@@ -2,10 +2,7 @@
#include <locale> #include <locale>
#include "best/BestScore.h" #include "best/BestScore.h"
#include "ReportExcel.h" #include "ReportExcel.h"
namespace platform { namespace platform {
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet) : ReportBase(data_, compare), ExcelFile(workbook, worksheet) ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet) : ReportBase(data_, compare), ExcelFile(workbook, worksheet)
{ {
createFile(); createFile();
@@ -20,26 +17,7 @@ namespace platform {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
} }
} }
void ReportExcel::createWorksheet()
{
const std::string name = data["model"].get<std::string>();
std::string suffix = "";
std::string efectiveName;
int num = 1;
// Create a sheet with the name of the model
while (true) {
efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = std::to_string(++num);
} else {
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break;
}
if (num > 100) {
throw std::invalid_argument("Couldn't create sheet " + efectiveName);
}
}
}
void ReportExcel::createFile() void ReportExcel::createFile()
{ {
@@ -47,7 +25,8 @@ namespace platform {
workbook = workbook_new((Paths::excel() + Paths::excelResults()).c_str()); workbook = workbook_new((Paths::excel() + Paths::excelResults()).c_str());
} }
if (worksheet == NULL) { if (worksheet == NULL) {
createWorksheet(); const std::string name = data["model"].get<std::string>();
worksheet = createWorksheet(name);
} }
setProperties(data["title"].get<std::string>()); setProperties(data["title"].get<std::string>());
formatColumns(); formatColumns();
@@ -70,7 +49,10 @@ namespace platform {
worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]); worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<std::string>().c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<std::string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<std::string>()).c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<std::string>()).c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]); writeString(2, 1, "Smooth", "headerRest");
std::string smooth = data.find("smooth_strategy") != data.end() ? data["smooth_strategy"].get<std::string>() : "ORIGINAL";
writeString(3, 1, smooth, "headerSmall");
worksheet_merge_range(worksheet, 2, 2, 3, 3, "Execution time", styles["headerRest"]);
oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() << " s"; oss << std::setprecision(2) << std::fixed << data["duration"].get<float>() << " s";
worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]); worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
oss.str(""); oss.str("");
@@ -86,7 +68,9 @@ namespace platform {
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]); worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
oss.str(""); oss.str("");
oss.clear(); oss.clear();
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False"); std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp";
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False") << algorithm;
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]); worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
} }
void ReportExcel::header_notes(int row) void ReportExcel::header_notes(int row)
@@ -196,6 +180,10 @@ namespace platform {
writeDouble(row, ++col, item, style); writeDouble(row, ++col, item, style);
} }
} }
// Classificacion report
if (lastResult.find("confusion_matrices") != lastResult.end()) {
create_classification_report(lastResult);
}
// Set with of columns to show those totals completely // Set with of columns to show those totals completely
worksheet_set_column(worksheet, 1, 1, 12, NULL); worksheet_set_column(worksheet, 1, 1, 12, NULL);
for (int i = 2; i < 7; ++i) { for (int i = 2; i < 7; ++i) {
@@ -206,7 +194,129 @@ namespace platform {
footer(totalScore, row); footer(totalScore, row);
} }
} }
void ReportExcel::create_classification_report(const json& result)
{
auto matrix_sheet = createWorksheet("clf_report");
lxw_worksheet* tmp = worksheet;
worksheet = matrix_sheet;
if (matrix_sheet == NULL) {
throw std::invalid_argument("Couldn't create sheet classif_report");
}
row = 1;
int col = 0;
if (result.find("confusion_matrices_train") != result.end()) {
// Train classification report
auto score = Scores::create_aggregate(result, "confusion_matrices_train");
auto train = score.classification_report_json("Train");
std::tie(row, col) = write_classification_report(train, row, 0);
int new_row = 0;
int new_col = col + 1;
for (int i = 0; i < result["confusion_matrices_train"].size(); ++i) {
auto item = result["confusion_matrices_train"][i];
auto score_item = Scores(item);
auto title = "Train Fold " + std::to_string(i);
std::tie(new_row, new_col) = write_classification_report(score_item.classification_report_json(title), 1, new_col);
new_col++;
}
col = new_col;
worksheet_merge_range(matrix_sheet, 0, 0, 0, col - 1, "Train Classification Report", efectiveStyle("headerRest"));
}
// Test classification report
worksheet_merge_range(matrix_sheet, row, 0, row, col - 1, "Test Classification Report", efectiveStyle("headerRest"));
auto score = Scores::create_aggregate(result, "confusion_matrices");
auto test = score.classification_report_json("Test");
int init_row = ++row;
std::tie(row, col) = write_classification_report(test, init_row, 0);
int new_row = 0;
int new_col = col + 1;
for (int i = 0; i < result["confusion_matrices"].size(); ++i) {
auto item = result["confusion_matrices"][i];
auto score_item = Scores(item);
auto title = "Test Fold " + std::to_string(i);
std::tie(new_row, new_col) = write_classification_report(score_item.classification_report_json(title), init_row, new_col);
new_col++;
}
// Format columns (change size to fit the content)
for (int i = 0; i < new_col; ++i) {
// doesn't work with from col to col, so...
worksheet_set_column(worksheet, i, i, 12, NULL);
}
worksheet = tmp;
}
std::pair<int, int> ReportExcel::write_classification_report(const json& result, int init_row, int init_col)
{
row = init_row;
auto text = result["title"].get<std::string>();
worksheet_merge_range(worksheet, row, init_col, row + 1, init_col + 5, text.c_str(), efectiveStyle("bodyHeader"));
row += 2;
int col = init_col + 2;
// Headers
bool first_item = true;
for (const auto& item : result["headers"]) {
auto text = item.get<std::string>();
if (first_item) {
first_item = false;
worksheet_merge_range(worksheet, row, init_col, row, init_col + 1, text.c_str(), efectiveStyle("bodyHeader"));
} else {
writeString(row, col++, text, "bodyHeader");
}
}
row++;
// Classes f1-score
for (const auto& item : result["body"]) {
col = init_col + 2;
for (const auto& value : item) {
if (value.is_string()) {
worksheet_merge_range(worksheet, row, init_col, row, init_col + 1, value.get<std::string>().c_str(), efectiveStyle("text"));
} else {
if (value.is_number_integer()) {
writeInt(row, col++, value.get<int>(), "ints");
} else {
writeDouble(row, col++, value.get<double>(), "result");
}
}
}
row++;
}
// Accuracy and average f1-score
for (const auto& item : { "accuracy", "averages", "weighted" }) {
col = init_col + 2;
for (const auto& value : result[item]) {
if (value.is_string()) {
worksheet_merge_range(worksheet, row, init_col, row, init_col + 1, value.get<std::string>().c_str(), efectiveStyle("text"));
} else {
if (value.is_number_integer()) {
writeInt(row, col++, value.get<int>(), "ints");
} else {
writeDouble(row, col++, value.get<double>(), "result");
}
}
}
row++;
}
// Confusion matrix
auto n_items = result["confusion_matrix"].size();
worksheet_merge_range(worksheet, row, init_col, row, init_col + n_items + 1, "Confusion Matrix", efectiveStyle("bodyHeader"));
row++;
boldGreen();
for (int i = 0; i < n_items; ++i) {
col = init_col + 2;
auto label = result["body"][i][0].get<std::string>();
worksheet_merge_range(worksheet, row, init_col, row, init_col + 1, label.c_str(), efectiveStyle("text"));
for (int j = 0; j < result["confusion_matrix"][i].size(); ++j) {
auto value = result["confusion_matrix"][i][j];
if (i == j) {
writeInt(row, col++, value.get<int>(), "ints_bold");
} else {
writeInt(row, col++, value.get<int>(), "ints");
}
}
row++;
}
int maxcol = std::max(init_col + 5, int(init_col + n_items + 1));
return { row, maxcol };
}
void ReportExcel::showSummary() void ReportExcel::showSummary()
{ {
for (const auto& item : summary) { for (const auto& item : summary) {
@@ -216,7 +326,6 @@ namespace platform {
row += 1; row += 1;
} }
} }
void ReportExcel::footer(double totalScore, int row) void ReportExcel::footer(double totalScore, int row)
{ {
showSummary(); showSummary();

View File

@@ -1,11 +1,13 @@
#pragma once #ifndef REPORT_EXCEL_H
#define REPORT_EXCEL_H
#include <map> #include <algorithm>
#include <xlsxwriter.h> #include "main/Scores.h"
#include "common/Colors.h" #include "common/Colors.h"
#include "ReportBase.h" #include "ReportBase.h"
#include "ExcelFile.h" #include "ExcelFile.h"
namespace platform { namespace platform {
using json = nlohmann::ordered_json;
class ReportExcel : public ReportBase, public ExcelFile { class ReportExcel : public ReportBase, public ExcelFile {
public: public:
explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL); explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook, lxw_worksheet* worksheet = NULL);
@@ -13,12 +15,14 @@ namespace platform {
private: private:
void formatColumns(); void formatColumns();
void createFile(); void createFile();
void createWorksheet();
void header() override; void header() override;
void body() override; void body() override;
void showSummary() override; void showSummary() override;
void footer(double totalScore, int row); void footer(double totalScore, int row);
void append_notes(const json& r, int row); void append_notes(const json& r, int row);
void create_classification_report(const json& result);
std::pair<int, int> write_classification_report(const json& result, int init_row, int init_col);
void header_notes(int row); void header_notes(int row);
}; };
}; };
#endif

View File

@@ -1,4 +1,5 @@
#pragma once #ifndef REPORT_EXCEL_COMPARED_H
#define REPORT_EXCEL_COMPARED_H
#include "ReportExcel.h" #include "ReportExcel.h"
namespace platform { namespace platform {
class ReportExcelCompared : public ExcelFile { class ReportExcelCompared : public ExcelFile {
@@ -16,4 +17,5 @@ namespace platform {
std::string leaves_label; std::string leaves_label;
std::string depth_label; std::string depth_label;
}; };
}; };
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef REPORTS_PAGED_H
#define REPORTS_PAGED_H
#include <locale> #include <locale>
#include <sstream> #include <sstream>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
@@ -23,3 +23,4 @@ namespace platform {
std::locale loc; std::locale loc;
}; };
} }
#endif

View File

@@ -6,6 +6,7 @@
#include "common/DotEnv.h" #include "common/DotEnv.h"
#include "common/CLocale.h" #include "common/CLocale.h"
#include "common/Paths.h" #include "common/Paths.h"
#include "common/Symbols.h"
#include "Result.h" #include "Result.h"
namespace platform { namespace platform {
@@ -64,27 +65,46 @@ namespace platform {
void Result::save() void Result::save()
{ {
std::ofstream file(Paths::results() + "/" + getFilename()); std::ofstream file(Paths::results() + getFilename());
file << data; file << data;
file.close(); file.close();
} }
std::string Result::getFilename() const std::string Result::getFilename() const
{ {
std::ostringstream oss; std::ostringstream oss;
oss << "results_" << data.at("score_name").get<std::string>() << "_" << data.at("model").get<std::string>() << "_" std::string stratified;
<< data.at("platform").get<std::string>() << "_" << data["date"].get<std::string>() << "_" try {
<< data["time"].get<std::string>() << "_" << (data["stratified"] ? "1" : "0") << ".json"; stratified = data["stratified"].get<bool>() ? "1" : "0";
}
catch (nlohmann::json_abi_v3_11_3::detail::type_error) {
stratified = data["stratified"].get<int>() == 1 ? "1" : "0";
}
oss << "results_"
<< data.at("score_name").get<std::string>() << "_"
<< data.at("model").get<std::string>() << "_"
<< data.at("platform").get<std::string>() << "_"
<< data["date"].get<std::string>() << "_"
<< data["time"].get<std::string>() << "_"
<< stratified << ".json";
return oss.str(); return oss.str();
} }
std::string Result::to_string(int maxModel, int maxTitle) const
std::string Result::to_string(int maxModel) const
{ {
auto tmp = ConfigLocale(); auto tmp = ConfigLocale();
std::stringstream oss; std::stringstream oss;
std::string s = data["stratified"].get<bool>() ? "S" : ""; std::string s, d;
std::string d = data["discretized"].get<bool>() ? "D" : ""; try {
std::string sd = s + d; s = data["stratified"].get<bool>() ? "S" : " ";
}
catch (nlohmann::json_abi_v3_11_3::detail::type_error) {
s = data["stratified"].get<int>() == 1 ? "S" : " ";
}
try {
d = data["discretized"].get<bool>() ? "D" : " ";
}
catch (nlohmann::json_abi_v3_11_3::detail::type_error) {
d = data["discretized"].get<int>() == 1 ? "D" : " ";
}
auto duration = data["duration"].get<double>(); auto duration = data["duration"].get<double>();
double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration; double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration;
std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s"; std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s";
@@ -93,11 +113,15 @@ namespace platform {
oss << std::setw(11) << std::left << data["score_name"].get<std::string>() << " "; oss << std::setw(11) << std::left << data["score_name"].get<std::string>() << " ";
oss << std::right << std::setw(10) << std::setprecision(7) << std::fixed << score << " "; oss << std::right << std::setw(10) << std::setprecision(7) << std::fixed << score << " ";
oss << std::left << std::setw(12) << data["platform"].get<std::string>() << " "; oss << std::left << std::setw(12) << data["platform"].get<std::string>() << " ";
oss << std::left << std::setw(2) << sd << " "; oss << s << d << " ";
auto completeString = isComplete() ? "C" : "P"; auto completeString = isComplete() ? "C" : "P";
oss << std::setw(1) << " " << completeString << " "; oss << std::setw(1) << " " << completeString << " ";
oss << std::setw(5) << std::right << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " "; oss << std::setw(5) << std::right << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " ";
oss << std::setw(50) << std::left << data["title"].get<std::string>() << " "; auto title = data["title"].get<std::string>();
if (title.size() > maxTitle) {
title = title.substr(0, maxTitle - 1) + Symbols::ellipsis;
}
oss << std::setw(maxTitle) << std::left << title;
return oss.str(); return oss.str();
} }
} }

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef RESULT_H
#define RESULT_H
#include <map> #include <map>
#include <vector> #include <vector>
#include <string> #include <string>
@@ -18,7 +18,7 @@ namespace platform {
void save(); void save();
// Getters // Getters
json getJson(); json getJson();
std::string to_string(int maxModel) const; std::string to_string(int maxModel, int maxTitle) const;
std::string getFilename() const; std::string getFilename() const;
std::string getDate() const { return data["date"].get<std::string>(); }; std::string getDate() const { return data["date"].get<std::string>(); };
std::string getTime() const { return data["time"].get<std::string>(); }; std::string getTime() const { return data["time"].get<std::string>(); };
@@ -28,10 +28,13 @@ namespace platform {
std::string getModel() const { return data["model"].get<std::string>(); }; std::string getModel() const { return data["model"].get<std::string>(); };
std::string getPlatform() const { return data["platform"].get<std::string>(); }; std::string getPlatform() const { return data["platform"].get<std::string>(); };
std::string getScoreName() const { return data["score_name"].get<std::string>(); }; std::string getScoreName() const { return data["score_name"].get<std::string>(); };
bool isComplete() const { return complete; }; bool isComplete() const { return complete; };
json getData() const { return data; } json getData() const { return data; }
// Setters // Setters
void setTitle(const std::string& title) { data["title"] = title; }; void setTitle(const std::string& title) { data["title"] = title; };
void setSmoothStrategy(const std::string& smooth_strategy) { data["smooth_strategy"] = smooth_strategy; };
void setDiscretizationAlgorithm(const std::string& discretization_algo) { data["discretization_algorithm"] = discretization_algo; };
void setLanguage(const std::string& language) { data["language"] = language; }; void setLanguage(const std::string& language) { data["language"] = language; };
void setLanguageVersion(const std::string& language_version) { data["language_version"] = language_version; }; void setLanguageVersion(const std::string& language_version) { data["language_version"] = language_version; };
void setDuration(double duration) { data["duration"] = duration; }; void setDuration(double duration) { data["duration"] = duration; };
@@ -44,10 +47,10 @@ namespace platform {
void setStratified(bool stratified) { data["stratified"] = stratified; }; void setStratified(bool stratified) { data["stratified"] = stratified; };
void setNFolds(int nfolds) { data["folds"] = nfolds; }; void setNFolds(int nfolds) { data["folds"] = nfolds; };
void setPlatform(const std::string& platform_name) { data["platform"] = platform_name; }; void setPlatform(const std::string& platform_name) { data["platform"] = platform_name; };
private: private:
json data; json data;
bool complete; bool complete;
double score = 0.0; double score = 0.0;
}; };
}; };
#endif

View File

@@ -1,5 +1,5 @@
#pragma once #ifndef RESULTSDATASET_H
#define RESULTSDATASET_H
#include <vector> #include <vector>
#include <string> #include <string>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
@@ -31,4 +31,5 @@ namespace platform {
double maxResult; double maxResult;
std::vector<Result> files; std::vector<Result> files;
}; };
}; };
#endif

View File

@@ -1,11 +1,11 @@
#pragma once #ifndef RESULTSDATASETSCONSOLE_H
#define RESULTSDATASETSCONSOLE_H
#include <locale> #include <locale>
#include <string> #include <string>
#include <sstream> #include <sstream>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "results/ResultsDataset.h" #include "reports/ReportsPaged.h"
#include "ReportsPaged.h" #include "ResultsDataset.h"
namespace platform { namespace platform {
class ResultsDatasetsConsole : public ReportsPaged { class ResultsDatasetsConsole : public ReportsPaged {
@@ -15,7 +15,4 @@ namespace platform {
bool report(const std::string& dataset, const std::string& score, const std::string& model); bool report(const std::string& dataset, const std::string& score, const std::string& model);
}; };
} }
#endif

View File

@@ -20,7 +20,7 @@ namespace platform {
// Body header // Body header
row = 2; row = 2;
int col = 0; int col = 0;
for (const auto& name : { "", "Model", "Date", "Time", "Score", "Hyperparameters" }) { for (const auto& name : { "#", "Model", "Date", "Time", "Score", "Hyperparameters" }) {
writeString(row, col++, name, "bodyHeader"); writeString(row, col++, name, "bodyHeader");
} }
// Body // Body

View File

@@ -1,11 +1,11 @@
#pragma once #ifndef RESULTSDATASETEXCEL_H
#define RESULTSDATASETEXCEL_H
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "reports/ExcelFile.h" #include "reports/ExcelFile.h"
using json = nlohmann::ordered_json;
namespace platform { namespace platform {
using json = nlohmann::ordered_json;
class ResultsDatasetExcel : public ExcelFile { class ResultsDatasetExcel : public ExcelFile {
public: public:
@@ -14,3 +14,4 @@ namespace platform {
void report(json& data); void report(json& data);
}; };
} }
#endif

View File

@@ -3,7 +3,7 @@ if(ENABLE_TESTING)
include_directories( include_directories(
${Platform_SOURCE_DIR}/src ${Platform_SOURCE_DIR}/src
${Platform_SOURCE_DIR}/lib/argparse/include ${Platform_SOURCE_DIR}/lib/argparse/include
${Platform_SOURCE_DIR}/lib/mdlp ${Platform_SOURCE_DIR}/lib/mdlp/src
${Platform_SOURCE_DIR}/lib/Files ${Platform_SOURCE_DIR}/lib/Files
${Platform_SOURCE_DIR}/lib/json/include ${Platform_SOURCE_DIR}/lib/json/include
${Platform_SOURCE_DIR}/lib/folding ${Platform_SOURCE_DIR}/lib/folding
@@ -13,10 +13,10 @@ if(ENABLE_TESTING)
) )
set(TEST_SOURCES_PLATFORM set(TEST_SOURCES_PLATFORM
TestUtils.cpp TestPlatform.cpp TestResult.cpp TestScores.cpp TestUtils.cpp TestPlatform.cpp TestResult.cpp TestScores.cpp
${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp ${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp ${Platform_SOURCE_DIR}/src/common/Discretization.cpp
${Platform_SOURCE_DIR}/src/main/Scores.cpp ${Platform_SOURCE_DIR}/src/main/Scores.cpp
) )
add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM}) add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM})
target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain BayesNet) target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" mdlp Catch2::Catch2WithMain BayesNet)
add_test(NAME ${TEST_PLATFORM} COMMAND ${TEST_PLATFORM}) add_test(NAME ${TEST_PLATFORM} COMMAND ${TEST_PLATFORM})
endif(ENABLE_TESTING) endif(ENABLE_TESTING)

View File

@@ -7,14 +7,15 @@
#include <string> #include <string>
#include "TestUtils.h" #include "TestUtils.h"
#include "folding.hpp" #include "folding.hpp"
#include <ArffFiles.hpp>
#include <bayesnet/classifiers/TAN.h> #include <bayesnet/classifiers/TAN.h>
#include "config.h" #include "config_platform.h"
TEST_CASE("Test Platform version", "[Platform]") TEST_CASE("Test Platform version", "[Platform]")
{ {
std::string version = { platform_project_version.begin(), platform_project_version.end() }; std::string version = { platform_project_version.begin(), platform_project_version.end() };
REQUIRE(version == "1.0.4"); REQUIRE(version == "1.1.0");
} }
TEST_CASE("Test Folding library version", "[Folding]") TEST_CASE("Test Folding library version", "[Folding]")
{ {
@@ -24,10 +25,15 @@ TEST_CASE("Test Folding library version", "[Folding]")
TEST_CASE("Test BayesNet version", "[BayesNet]") TEST_CASE("Test BayesNet version", "[BayesNet]")
{ {
std::string version = bayesnet::TAN().getVersion(); std::string version = bayesnet::TAN().getVersion();
REQUIRE(version == "1.0.5.1"); REQUIRE(version == "1.0.6");
} }
TEST_CASE("Test mdlp version", "[mdlp]") TEST_CASE("Test mdlp version", "[mdlp]")
{ {
std::string version = mdlp::CPPFImdlp::version(); std::string version = mdlp::CPPFImdlp::version();
REQUIRE(version == "1.1.2"); REQUIRE(version == "2.0.0");
}
TEST_CASE("Test Arff version", "[Arff]")
{
std::string version = ArffFiles().version();
REQUIRE(version == "1.1.0");
} }

View File

@@ -7,19 +7,20 @@
#include "common/DotEnv.h" #include "common/DotEnv.h"
#include "common/Datasets.h" #include "common/Datasets.h"
#include "common/Paths.h" #include "common/Paths.h"
#include "config.h" #include "config_platform.h"
TEST_CASE("ZeroR comparison in reports", "[Report]") TEST_CASE("ZeroR comparison in reports", "[Report]")
{ {
auto dotEnv = platform::DotEnv(true); auto dotEnv = platform::DotEnv(true);
auto margin = 1e-2; auto margin = 1e-4;
std::string dataset = "liver-disorders"; std::string dataset_name = "liver-disorders";
auto dt = platform::Datasets(false, platform::Paths::datasets()); auto dt = platform::Datasets(false, platform::Paths::datasets());
dt.loadDataset(dataset); auto& dataset = dt.getDataset(dataset_name);
std::vector<int> distribution = dt.getClassesCounts(dataset); dataset.load();
double nSamples = dt.getNSamples(dataset); std::vector<int> distribution = dataset.getClassesCounts();
double nSamples = dataset.getNSamples();
std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end()); std::vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin); double mark = *maxValue / nSamples * (1 + margin);
REQUIRE(mark == Catch::Approx(0.585507f).epsilon(1e-5)); REQUIRE(mark == Catch::Approx(0.57976811f).epsilon(margin));
} }

View File

@@ -9,7 +9,7 @@
#include "common/Paths.h" #include "common/Paths.h"
#include "common/Colors.h" #include "common/Colors.h"
#include "main/Scores.h" #include "main/Scores.h"
#include "config.h" #include "config_platform.h"
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
auto epsilon = 1e-4; auto epsilon = 1e-4;
@@ -128,7 +128,7 @@ TEST_CASE("Confusion Matrix JSON", "[Scores]")
REQUIRE(res_json_str["Car"][1] == 2); REQUIRE(res_json_str["Car"][1] == 2);
REQUIRE(res_json_str["Car"][2] == 3); REQUIRE(res_json_str["Car"][2] == 3);
} }
TEST_CASE("Classification Report", "[Scores]") - TEST_CASE("Classification Report", "[Scores]")
{ {
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 }; std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 }; std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };

View File

@@ -1,5 +1,5 @@
#include "TestUtils.h" #include "TestUtils.h"
#include "config.h" #include "config_platform.h"
class Paths { class Paths {
public: public:

View File

@@ -1,12 +1,12 @@
#pragma once #ifndef TESTUTILS_H
#define TESTUTILS_H
#include <torch/torch.h> #include <torch/torch.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include <map> #include <map>
#include <tuple> #include <tuple>
#include "ArffFiles.h" #include <ArffFiles.hpp>
#include "CPPFImdlp.h" #include <fimdlp/CPPFImdlp.h>
bool file_exists(const std::string& name); bool file_exists(const std::string& name);
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features); std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);
@@ -39,4 +39,4 @@ public:
int nSamples, classNumStates; int nSamples, classNumStates;
double epsilon = 1e-5; double epsilon = 1e-5;
}; };
#endif

View File

@@ -1,8 +1,8 @@
diabetes,class, all diabetes;class;all
ecoli,class, all ecoli;class;all
glass,Type, all glass;Type,all
iris,class, all iris;class;all
kdd_JapaneseVowels,speaker, [2,3,4,5,6,7,8,9,10,11,12,13] kdd_JapaneseVowels;speaker;[2,3,4,5,6,7,8,9,10,11,12,13]
letter,class, all letter;class;all
liver-disorders,selector, all liver-disorders;selector;all
mfeat-factors,class, all mfeat-factors;class;all