Compare commits

..

41 Commits

Author SHA1 Message Date
926de2bebd Add boost info to README 2023-09-28 09:44:33 +02:00
71704e3547 Enhance output info in Statistics 2023-09-28 01:27:18 +02:00
3b06534327 Remove duplicated code in BestResults 2023-09-28 00:59:34 +02:00
ac89a451e3 Duplicate statistics tests in class 2023-09-28 00:45:15 +02:00
00c6cf663b Fix order of output in posthoc 2023-09-27 19:11:47 +02:00
5043c12be8 Complete posthoc with Holm adjust 2023-09-27 18:34:16 +02:00
11320e2cc7 Complete friedman test as in exreport 2023-09-27 12:36:03 +02:00
ce66483b65 Update boost version requirement for Linux 2023-09-26 14:12:53 +02:00
cab8e14b2d Add friedman hyperparameter 2023-09-26 11:26:59 +02:00
f0d0abe891 Add boost library link to linux build 2023-09-26 01:07:50 +02:00
dcba146e12 Begin adding Friedman test to BestResults 2023-09-26 01:04:59 +02:00
3ea0285119 Fix ranks to match friedman test ranks 2023-09-25 18:38:12 +02:00
e3888e1503 Merge pull request 'bestResults' (#9) from bestResults into main
Reviewed-on: https://gitea.rmontanana.es:3000/rmontanana/BayesNet/pulls/9

Add best results management, build, report, build all & report all
2023-09-25 12:02:17 +00:00
06de13df98 Add date/time to header of report best 2023-09-25 10:04:53 +02:00
de4fa6a04f Add color to totals 2023-09-23 10:30:39 +02:00
3a7bf4e672 Fix ranking order mistake 2023-09-23 01:33:23 +02:00
cd0bc02a74 Add report/build all with totals and ranks 2023-09-23 01:14:02 +02:00
c8597a794e Begin report all models 2023-09-22 18:13:32 +02:00
b30416364d Fix mistake in best results file name 2023-09-22 14:14:39 +02:00
3a16589220 Add best config for debug in vscode 2023-09-22 01:04:36 +02:00
c4f9187e2a Complete best build and report 2023-09-22 01:03:55 +02:00
c4d0a5b4e6 Split Result from Results 2023-09-21 23:30:17 +02:00
7bfafe555f Begin BestResults build 2023-09-21 23:04:11 +02:00
337b6f7e79 Rename BestResult to BestScore 2023-09-21 19:30:07 +02:00
5fa0b957dd Fix mistake in idx range in manage 2023-09-20 19:12:07 +02:00
67252fc41d Fix CMakeLists libxlsxwriter for Linux 2023-09-20 19:02:53 +02:00
94ae9456a0 Fix libxslxwriter linking problem 2023-09-20 18:50:11 +02:00
781993e326 Resolve some warnings 2023-09-20 17:54:15 +02:00
8257a6ae39 Add message of not exist Best Results 2023-09-20 13:50:34 +02:00
fc81730dfc Merge pull request 'Exchange OpenXLSX to libxlsxwriter' (#8) from libxlsxwriter into main
Add multiple sheets to excel file
Add format and color to sheets
Add comparison with ZeroR
Add comparison with Best Results
Separate contextual menu from general in manage
2023-09-20 11:17:16 +00:00
d8734ff082 Separate contextual menu from general 2023-09-20 13:15:33 +02:00
03533461c8 Add compare to best results in manage 2023-09-20 12:51:19 +02:00
68f22a673d Add comparison to report console 2023-09-20 11:40:01 +02:00
b9bc0088f3 Add format to unique dataset results summary 2023-09-20 10:30:45 +02:00
c280e254ca Remove OpenXLSX submodule 2023-09-20 01:09:58 +02:00
3d0f29fda3 Remove .vscode/settings.json from repository 2023-09-20 01:01:40 +02:00
20a6ebab7c Support to add any number of sheets to excel 2023-09-20 00:58:01 +02:00
925f71166c Fix mistake in comparison 2023-09-19 23:46:49 +02:00
f69f415b92 Complete comparison with ZeroR 2023-09-19 17:55:03 +02:00
1bdfbd1620 Complete adding color to format 2023-09-19 14:07:41 +02:00
06fb135526 First approach 2023-09-18 23:26:22 +02:00
32 changed files with 1463 additions and 437 deletions

1
.gitignore vendored
View File

@@ -36,3 +36,4 @@ build/
cmake-build*/** cmake-build*/**
.idea .idea
puml/** puml/**
.vscode/settings.json

6
.gitmodules vendored
View File

@@ -10,6 +10,6 @@
[submodule "lib/json"] [submodule "lib/json"]
path = lib/json path = lib/json
url = https://github.com/nlohmann/json.git url = https://github.com/nlohmann/json.git
[submodule "lib/openXLSX"] [submodule "lib/libxlsxwriter"]
path = lib/openXLSX path = lib/libxlsxwriter
url = https://github.com/troldal/OpenXLSX.git url = https://github.com/jmcnamara/libxlsxwriter.git

14
.vscode/launch.json vendored
View File

@@ -37,6 +37,20 @@
], ],
"cwd": "/Users/rmontanana/Code/discretizbench", "cwd": "/Users/rmontanana/Code/discretizbench",
}, },
{
"type": "lldb",
"request": "launch",
"name": "best",
"program": "${workspaceFolder}/build/src/Platform/best",
"args": [
"-m",
"BoostAODE",
"-s",
"accuracy",
"--build",
],
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{ {
"type": "lldb", "type": "lldb",
"request": "launch", "request": "launch",

109
.vscode/settings.json vendored
View File

@@ -1,109 +0,0 @@
{
"files.associations": {
"*.rmd": "markdown",
"*.py": "python",
"vector": "cpp",
"__bit_reference": "cpp",
"__bits": "cpp",
"__config": "cpp",
"__debug": "cpp",
"__errc": "cpp",
"__hash_table": "cpp",
"__locale": "cpp",
"__mutex_base": "cpp",
"__node_handle": "cpp",
"__nullptr": "cpp",
"__split_buffer": "cpp",
"__string": "cpp",
"__threading_support": "cpp",
"__tuple": "cpp",
"array": "cpp",
"atomic": "cpp",
"bitset": "cpp",
"cctype": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"compare": "cpp",
"complex": "cpp",
"concepts": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"exception": "cpp",
"initializer_list": "cpp",
"ios": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"locale": "cpp",
"memory": "cpp",
"mutex": "cpp",
"new": "cpp",
"optional": "cpp",
"ostream": "cpp",
"ratio": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"string": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"typeinfo": "cpp",
"unordered_map": "cpp",
"variant": "cpp",
"algorithm": "cpp",
"iostream": "cpp",
"iomanip": "cpp",
"numeric": "cpp",
"set": "cpp",
"__tree": "cpp",
"deque": "cpp",
"list": "cpp",
"map": "cpp",
"unordered_set": "cpp",
"any": "cpp",
"condition_variable": "cpp",
"forward_list": "cpp",
"fstream": "cpp",
"stack": "cpp",
"thread": "cpp",
"__memory": "cpp",
"filesystem": "cpp",
"*.toml": "toml",
"utility": "cpp",
"__verbose_abort": "cpp",
"bit": "cpp",
"random": "cpp",
"*.tcc": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory_resource": "cpp",
"format": "cpp",
"valarray": "cpp",
"regex": "cpp",
"span": "cpp",
"cfenv": "cpp",
"cinttypes": "cpp",
"csetjmp": "cpp",
"future": "cpp",
"queue": "cpp",
"typeindex": "cpp",
"shared_mutex": "cpp",
"*.ipp": "cpp",
"cassert": "cpp",
"charconv": "cpp",
"source_location": "cpp",
"ranges": "cpp"
},
"cmake.configureOnOpen": false,
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools"
}

View File

@@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF) option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF) option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF) option(CODE_COVERAGE "Collect coverage from test library" OFF)
# Boost Library
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.78.0 REQUIRED)
if(Boost_FOUND)
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
include_directories(${Boost_INCLUDE_DIRS})
endif()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# CMakes modules # CMakes modules
# -------------- # --------------
@@ -54,7 +65,7 @@ endif (ENABLE_CLANG_TIDY)
add_git_submodule("lib/mdlp") add_git_submodule("lib/mdlp")
add_git_submodule("lib/argparse") add_git_submodule("lib/argparse")
add_git_submodule("lib/json") add_git_submodule("lib/json")
add_git_submodule("lib/openXLSX") find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib)
# Subdirectories # Subdirectories
# -------------- # --------------

View File

@@ -19,13 +19,14 @@ copy: ## Copy binary files to selected folder
@cp build/src/Platform/main $(dest) @cp build/src/Platform/main $(dest)
@cp build/src/Platform/list $(dest) @cp build/src/Platform/list $(dest)
@cp build/src/Platform/manage $(dest) @cp build/src/Platform/manage $(dest)
@cp build/src/Platform/best $(dest)
@echo ">>> Done" @echo ">>> Done"
dependency: ## Create a dependency graph diagram of the project (build/dependency.png) dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
build: ## Build the main and BayesNetSample build: ## Build the main and BayesNetSample
cmake --build build -t main -t BayesNetSample -t manage -t list -j 32 cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32
clean: ## Clean the debug info clean: ## Clean the debug info
@echo ">>> Cleaning Debug BayesNet ..."; @echo ">>> Cleaning Debug BayesNet ...";
@@ -40,7 +41,7 @@ debug: ## Build a debug version of the project
@if [ -d ./build ]; then rm -rf ./build; fi @if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build; @mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \ cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
cmake --build build -t main -t BayesNetSample -t manage -t list unit_tests -j 32; cmake --build build -t main -t BayesNetSample -t manage -t list -t best -t unit_tests -j 32;
@echo ">>> Done"; @echo ">>> Done";
release: ## Build a Release version of the project release: ## Build a Release version of the project
@@ -48,7 +49,7 @@ release: ## Build a Release version of the project
@if [ -d ./build ]; then rm -rf ./build; fi @if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build; @mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \ cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
cmake --build build -t main -t BayesNetSample -t manage -t list -j 32; cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32;
@echo ">>> Done"; @echo ">>> Done";
test: ## Run tests test: ## Run tests

View File

@@ -2,4 +2,40 @@
Bayesian Network Classifier with libtorch from scratch Bayesian Network Classifier with libtorch from scratch
## 0. Setup
Before compiling BayesNet.
### boost library
[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
### libxlswriter
```bash
cd lib/libxlsxwriter
make
sudo make install
```
It has to be installed in /usr/local/lib otherwise CMakeLists.txt has to be modified accordingly
Environment variable has to be set:
```bash
export LD_LIBRARY_PATH=/usr/local/lib
```
### Release
```bash
make release
```
### Debug & Tests
```bash
make debug
```
## 1. Introduction ## 1. Introduction

1
lib/libxlsxwriter Submodule

Submodule lib/libxlsxwriter added at 44e72c5862

Submodule lib/openXLSX deleted from b80da42d14

View File

@@ -104,180 +104,180 @@ int main(int argc, char** argv)
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
cout << weights_.index({ i }).item<double>() << endl; cout << weights_.index({ i }).item<double>() << endl;
} }
// map<string, bool> datasets = { map<string, bool> datasets = {
// {"diabetes", true}, {"diabetes", true},
// {"ecoli", true}, {"ecoli", true},
// {"glass", true}, {"glass", true},
// {"iris", true}, {"iris", true},
// {"kdd_JapaneseVowels", false}, {"kdd_JapaneseVowels", false},
// {"letter", true}, {"letter", true},
// {"liver-disorders", true}, {"liver-disorders", true},
// {"mfeat-factors", true}, {"mfeat-factors", true},
// }; };
// auto valid_datasets = vector<string>(); auto valid_datasets = vector<string>();
// transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets), transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
// [](const pair<string, bool>& pair) { return pair.first; }); [](const pair<string, bool>& pair) { return pair.first; });
// argparse::ArgumentParser program("BayesNetSample"); argparse::ArgumentParser program("BayesNetSample");
// program.add_argument("-d", "--dataset") program.add_argument("-d", "--dataset")
// .help("Dataset file name") .help("Dataset file name")
// .action([valid_datasets](const std::string& value) { .action([valid_datasets](const std::string& value) {
// if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
// return value; return value;
// } }
// throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}"); throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
// } }
// ); );
// program.add_argument("-p", "--path") program.add_argument("-p", "--path")
// .help(" folder where the data files are located, default") .help(" folder where the data files are located, default")
// .default_value(string{ PATH } .default_value(string{ PATH }
// ); );
// program.add_argument("-m", "--model") program.add_argument("-m", "--model")
// .help("Model to use " + platform::Models::instance()->toString()) .help("Model to use " + platform::Models::instance()->toString())
// .action([](const std::string& value) { .action([](const std::string& value) {
// static const vector<string> choices = platform::Models::instance()->getNames(); static const vector<string> choices = platform::Models::instance()->getNames();
// if (find(choices.begin(), choices.end(), value) != choices.end()) { if (find(choices.begin(), choices.end(), value) != choices.end()) {
// return value; return value;
// } }
// throw runtime_error("Model must be one of " + platform::Models::instance()->toString()); throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
// } }
// ); );
// program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
// program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true); program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
// program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
// program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
// program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
// try { try {
// auto k = stoi(value); auto k = stoi(value);
// if (k < 2) { if (k < 2) {
// throw runtime_error("Number of folds must be greater than 1"); throw runtime_error("Number of folds must be greater than 1");
// } }
// return k; return k;
// } }
// catch (const runtime_error& err) { catch (const runtime_error& err) {
// throw runtime_error(err.what()); throw runtime_error(err.what());
// } }
// catch (...) { catch (...) {
// throw runtime_error("Number of folds must be an integer"); throw runtime_error("Number of folds must be an integer");
// }}); }});
// program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
// bool class_last, stratified, tensors, dump_cpt; bool class_last, stratified, tensors, dump_cpt;
// string model_name, file_name, path, complete_file_name; string model_name, file_name, path, complete_file_name;
// int nFolds, seed; int nFolds, seed;
// try { try {
// program.parse_args(argc, argv); program.parse_args(argc, argv);
// file_name = program.get<string>("dataset"); file_name = program.get<string>("dataset");
// path = program.get<string>("path"); path = program.get<string>("path");
// model_name = program.get<string>("model"); model_name = program.get<string>("model");
// complete_file_name = path + file_name + ".arff"; complete_file_name = path + file_name + ".arff";
// stratified = program.get<bool>("stratified"); stratified = program.get<bool>("stratified");
// tensors = program.get<bool>("tensors"); tensors = program.get<bool>("tensors");
// nFolds = program.get<int>("folds"); nFolds = program.get<int>("folds");
// seed = program.get<int>("seed"); seed = program.get<int>("seed");
// dump_cpt = program.get<bool>("dumpcpt"); dump_cpt = program.get<bool>("dumpcpt");
// class_last = datasets[file_name]; class_last = datasets[file_name];
// if (!file_exists(complete_file_name)) { if (!file_exists(complete_file_name)) {
// throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
// } }
// } }
// catch (const exception& err) { catch (const exception& err) {
// cerr << err.what() << endl; cerr << err.what() << endl;
// cerr << program; cerr << program;
// exit(1); exit(1);
// } }
/* /*
* Begin Processing * Begin Processing
*/ */
// auto handler = ArffFiles(); auto handler = ArffFiles();
// handler.load(complete_file_name, class_last); handler.load(complete_file_name, class_last);
// // Get Dataset X, y // Get Dataset X, y
// vector<mdlp::samples_t>& X = handler.getX(); vector<mdlp::samples_t>& X = handler.getX();
// mdlp::labels_t& y = handler.getY(); mdlp::labels_t& y = handler.getY();
// // Get className & Features // Get className & Features
// auto className = handler.getClassName(); auto className = handler.getClassName();
// vector<string> features; vector<string> features;
// auto attributes = handler.getAttributes(); auto attributes = handler.getAttributes();
// transform(attributes.begin(), attributes.end(), back_inserter(features), transform(attributes.begin(), attributes.end(), back_inserter(features),
// [](const pair<string, string>& item) { return item.first; }); [](const pair<string, string>& item) { return item.first; });
// // Discretize Dataset // Discretize Dataset
// auto [Xd, maxes] = discretize(X, y, features); auto [Xd, maxes] = discretize(X, y, features);
// maxes[className] = *max_element(y.begin(), y.end()) + 1; maxes[className] = *max_element(y.begin(), y.end()) + 1;
// map<string, vector<int>> states; map<string, vector<int>> states;
// for (auto feature : features) { for (auto feature : features) {
// states[feature] = vector<int>(maxes[feature]); states[feature] = vector<int>(maxes[feature]);
// } }
// states[className] = vector<int>(maxes[className]); states[className] = vector<int>(maxes[className]);
// auto clf = platform::Models::instance()->create(model_name); auto clf = platform::Models::instance()->create(model_name);
// clf->fit(Xd, y, features, className, states); clf->fit(Xd, y, features, className, states);
// if (dump_cpt) { if (dump_cpt) {
// cout << "--- CPT Tables ---" << endl; cout << "--- CPT Tables ---" << endl;
// clf->dump_cpt(); clf->dump_cpt();
// } }
// auto lines = clf->show(); auto lines = clf->show();
// for (auto line : lines) { for (auto line : lines) {
// cout << line << endl; cout << line << endl;
// } }
// cout << "--- Topological Order ---" << endl; cout << "--- Topological Order ---" << endl;
// auto order = clf->topological_order(); auto order = clf->topological_order();
// for (auto name : order) { for (auto name : order) {
// cout << name << ", "; cout << name << ", ";
// } }
// cout << "end." << endl; cout << "end." << endl;
// auto score = clf->score(Xd, y); auto score = clf->score(Xd, y);
// cout << "Score: " << score << endl; cout << "Score: " << score << endl;
// auto graph = clf->graph(); auto graph = clf->graph();
// auto dot_file = model_name + "_" + file_name; auto dot_file = model_name + "_" + file_name;
// ofstream file(dot_file + ".dot"); ofstream file(dot_file + ".dot");
// file << graph; file << graph;
// file.close(); file.close();
// cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl; cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
// cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl; cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
// string stratified_string = stratified ? " Stratified" : ""; string stratified_string = stratified ? " Stratified" : "";
// cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl; cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
// cout << "==========================================" << endl; cout << "==========================================" << endl;
// torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32); torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
// torch::Tensor yt = torch::tensor(y, torch::kInt32); torch::Tensor yt = torch::tensor(y, torch::kInt32);
// for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
// Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32)); Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
// } }
// float total_score = 0, total_score_train = 0, score_train, score_test; float total_score = 0, total_score_train = 0, score_train, score_test;
// platform::Fold* fold; platform::Fold* fold;
// if (stratified) if (stratified)
// fold = new platform::StratifiedKFold(nFolds, y, seed); fold = new platform::StratifiedKFold(nFolds, y, seed);
// else else
// fold = new platform::KFold(nFolds, y.size(), seed); fold = new platform::KFold(nFolds, y.size(), seed);
// for (auto i = 0; i < nFolds; ++i) { for (auto i = 0; i < nFolds; ++i) {
// auto [train, test] = fold->getFold(i); auto [train, test] = fold->getFold(i);
// cout << "Fold: " << i + 1 << endl; cout << "Fold: " << i + 1 << endl;
// if (tensors) { if (tensors) {
// auto ttrain = torch::tensor(train, torch::kInt64); auto ttrain = torch::tensor(train, torch::kInt64);
// auto ttest = torch::tensor(test, torch::kInt64); auto ttest = torch::tensor(test, torch::kInt64);
// torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain); torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
// torch::Tensor ytraint = yt.index({ ttrain }); torch::Tensor ytraint = yt.index({ ttrain });
// torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest); torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
// torch::Tensor ytestt = yt.index({ ttest }); torch::Tensor ytestt = yt.index({ ttest });
// clf->fit(Xtraint, ytraint, features, className, states); clf->fit(Xtraint, ytraint, features, className, states);
// auto temp = clf->predict(Xtraint); auto temp = clf->predict(Xtraint);
// score_train = clf->score(Xtraint, ytraint); score_train = clf->score(Xtraint, ytraint);
// score_test = clf->score(Xtestt, ytestt); score_test = clf->score(Xtestt, ytestt);
// } else { } else {
// auto [Xtrain, ytrain] = extract_indices(train, Xd, y); auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
// auto [Xtest, ytest] = extract_indices(test, Xd, y); auto [Xtest, ytest] = extract_indices(test, Xd, y);
// clf->fit(Xtrain, ytrain, features, className, states); clf->fit(Xtrain, ytrain, features, className, states);
// score_train = clf->score(Xtrain, ytrain); score_train = clf->score(Xtrain, ytrain);
// score_test = clf->score(Xtest, ytest); score_test = clf->score(Xtest, ytest);
// } }
// if (dump_cpt) { if (dump_cpt) {
// cout << "--- CPT Tables ---" << endl; cout << "--- CPT Tables ---" << endl;
// clf->dump_cpt(); clf->dump_cpt();
// } }
// total_score_train += score_train; total_score_train += score_train;
// total_score += score_test; total_score += score_test;
// cout << "Score Train: " << score_train << endl; cout << "Score Train: " << score_train << endl;
// cout << "Score Test : " << score_test << endl; cout << "Score Test : " << score_test << endl;
// cout << "-------------------------------------------------------------------------------" << endl; cout << "-------------------------------------------------------------------------------" << endl;
// } }
// cout << "**********************************************************************************" << endl; cout << "**********************************************************************************" << endl;
// cout << "Average Score Train: " << total_score_train / nFolds << endl; cout << "Average Score Train: " << total_score_train / nFolds << endl;
// cout << "Average Score Test : " << total_score / nFolds << endl;return 0; cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
} }

292
src/Platform/BestResults.cc Normal file
View File

@@ -0,0 +1,292 @@
#include <filesystem>
#include <fstream>
#include <iostream>
#include <sstream>
#include "BestResults.h"
#include "Result.h"
#include "Colors.h"
#include "Statistics.h"
namespace fs = std::filesystem;
// function ftime_to_string, Code taken from
// https://stackoverflow.com/a/58237530/1389271
template <typename TP>
std::string ftime_to_string(TP tp)
{
using namespace std::chrono;
auto sctp = time_point_cast<system_clock::duration>(tp - TP::clock::now()
+ system_clock::now());
auto tt = system_clock::to_time_t(sctp);
std::tm* gmt = std::gmtime(&tt);
std::stringstream buffer;
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
return buffer.str();
}
namespace platform {
string BestResults::build()
{
auto files = loadResultFiles();
if (files.size() == 0) {
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
exit(1);
}
json bests;
for (const auto& file : files) {
auto result = Result(path, file);
auto data = result.load();
for (auto const& item : data.at("results")) {
bool update = false;
if (bests.contains(item.at("dataset").get<string>())) {
if (item.at("score").get<double>() > bests[item.at("dataset").get<string>()].at(0).get<double>()) {
update = true;
}
} else {
update = true;
}
if (update) {
bests[item.at("dataset").get<string>()] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
}
}
}
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl;
}
ofstream file(bestFileName);
file << bests;
file.close();
return bestFileName;
}
string BestResults::bestResultFile()
{
return "best_results_" + score + "_" + model + ".json";
}
pair<string, string> getModelScore(string name)
{
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
int i = 0;
auto pos = name.find("_");
auto pos2 = name.find("_", pos + 1);
string score = name.substr(pos + 1, pos2 - pos - 1);
pos = name.find("_", pos2 + 1);
string model = name.substr(pos2 + 1, pos - pos2 - 1);
return { model, score };
}
vector<string> BestResults::loadResultFiles()
{
vector<string> files;
using std::filesystem::directory_iterator;
string fileModel, fileScore;
for (const auto& file : directory_iterator(path)) {
auto fileName = file.path().filename().string();
if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) {
tie(fileModel, fileScore) = getModelScore(fileName);
if (score == fileScore && (model == fileModel || model == "any")) {
files.push_back(fileName);
}
}
}
return files;
}
json BestResults::loadFile(const string& fileName)
{
ifstream resultData(fileName);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + fileName + "]");
}
vector<string> BestResults::getModels()
{
set<string> models;
vector<string> result;
auto files = loadResultFiles();
if (files.size() == 0) {
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
exit(1);
}
string fileModel, fileScore;
for (const auto& file : files) {
// extract the model from the file name
tie(fileModel, fileScore) = getModelScore(file);
// add the model to the vector of models
models.insert(fileModel);
}
result = vector<string>(models.begin(), models.end());
return result;
}
void BestResults::buildAll()
{
auto models = getModels();
for (const auto& model : models) {
cout << "Building best results for model: " << model << endl;
this->model = model;
build();
}
model = "any";
}
void BestResults::reportSingle()
{
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
exit(1);
}
auto date = ftime_to_string(filesystem::last_write_time(bestFileName));
auto data = loadFile(bestFileName);
cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
cout << "--------------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset Score File Hyperparameters" << endl;
cout << "=== ========================= =========== ================================================================== ================================================= " << endl;
auto i = 0;
bool odd = true;
for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
cout << setw(66) << item.value().at(2).get<string>() << " ";
cout << item.value().at(1) << " ";
cout << endl;
odd = !odd;
}
}
json BestResults::buildTableResults(vector<string> models)
{
int numberOfDatasets = 0;
bool first = true;
json origin;
json table;
auto maxDate = filesystem::file_time_type::max();
for (const auto& model : models) {
this->model = model;
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
exit(1);
}
auto dateWrite = filesystem::last_write_time(bestFileName);
if (dateWrite < maxDate) {
maxDate = dateWrite;
}
auto data = loadFile(bestFileName);
if (first) {
// Get the number of datasets of the first file and check that is the same for all the models
first = false;
numberOfDatasets = data.size();
origin = data;
} else {
if (numberOfDatasets != data.size()) {
cerr << Colors::MAGENTA() << "The number of datasets in the best results files is not the same for all the models." << Colors::RESET() << endl;
exit(1);
}
}
table[model] = data;
}
table["dateTable"] = ftime_to_string(maxDate);
return table;
}
void BestResults::printTableResults(vector<string> models, json table)
{
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
cout << "------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset ";
for (const auto& model : models) {
cout << setw(12) << left << model << " ";
}
cout << endl;
cout << "=== ========================= ";
for (const auto& model : models) {
cout << "============ ";
}
cout << endl;
auto i = 0;
bool odd = true;
map<string, double> totals;
int nDatasets = table.begin().value().size();
for (const auto& model : models) {
totals[model] = 0.0;
}
json origin = table.begin().value();
for (auto const& item : origin.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
double maxValue = 0;
// Find out the max value for this dataset
for (const auto& model : models) {
double value = table[model].at(item.key()).at(0).get<double>();
if (value > maxValue) {
maxValue = value;
}
}
// Print the row with red colors on max values
for (const auto& model : models) {
string efectiveColor = color;
double value = table[model].at(item.key()).at(0).get<double>();
if (value == maxValue) {
efectiveColor = Colors::RED();
}
totals[model] += value;
cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
}
cout << endl;
odd = !odd;
}
cout << Colors::GREEN() << "=== ========================= ";
for (const auto& model : models) {
cout << "============ ";
}
cout << endl;
cout << Colors::GREEN() << setw(30) << " Totals...................";
double max = 0.0;
for (const auto& total : totals) {
if (total.second > max) {
max = total.second;
}
}
for (const auto& model : models) {
string efectiveColor = Colors::GREEN();
if (totals[model] == max) {
efectiveColor = Colors::RED();
}
cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
}
cout << endl;
}
void BestResults::reportAll()
{
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
// Print the table of results
printTableResults(models, table);
// Compute the Friedman test
if (friedman) {
vector<string> datasets;
for (const auto& dataset : table.begin().value().items()) {
datasets.push_back(dataset.key());
}
double significance = 0.05;
Statistics stats(models, datasets, table, significance);
auto result = stats.friedmanTest();
stats.postHocHolmTest(result);
}
}
}

View File

@@ -0,0 +1,29 @@
#ifndef BESTRESULTS_H
#define BESTRESULTS_H
#include <string>
#include <set>
#include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json;
namespace platform {
class BestResults {
public:
explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
string build();
void reportSingle();
void reportAll();
void buildAll();
private:
vector<string> getModels();
vector<string> loadResultFiles();
json buildTableResults(vector<string> models);
void printTableResults(vector<string> models, json table);
string bestResultFile();
json loadFile(const string& fileName);
string path;
string score;
string model;
bool friedman;
};
}
#endif //BESTRESULTS_H

View File

@@ -1,7 +1,7 @@
#ifndef BESTRESULT_H #ifndef BESTSCORE_H
#define BESTRESULT_H #define BESTSCORE_H
#include <string> #include <string>
class BestResult { class BestScore {
public: public:
static std::string title() { return "STree_default (linear-ovo)"; } static std::string title() { return "STree_default (linear-ovo)"; }
static double score() { return 22.109799; } static double score() { return 22.109799; }

View File

@@ -4,13 +4,17 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp) include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc) add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
add_executable(list list.cc platformUtils Datasets.cc) add_executable(list list.cc platformUtils Datasets.cc)
add_executable(best best.cc BestResults.cc Result.cc Statistics.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
target_link_libraries(manage "${TORCH_LIBRARIES}" OpenXLSX::OpenXLSX stdc++fs) target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
target_link_libraries(best Boost::boost stdc++fs)
else() else()
target_link_libraries(manage "${TORCH_LIBRARIES}" OpenXLSX::OpenXLSX) target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
target_link_libraries(best Boost::boost)
endif() endif()
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,10 +1,22 @@
#include <sstream> #include <sstream>
#include <locale> #include <locale>
#include "Datasets.h"
#include "ReportBase.h" #include "ReportBase.h"
#include "BestResult.h" #include "BestScore.h"
namespace platform { namespace platform {
ReportBase::ReportBase(json data_, bool compare) : data(data_), compare(compare), margin(0.1)
{
stringstream oss;
oss << "Better than ZeroR + " << setprecision(1) << fixed << margin * 100 << "%";
meaning = {
{Symbols::equal_best, "Equal to best"},
{Symbols::better_best, "Better than best"},
{Symbols::cross, "Less than or equal to ZeroR"},
{Symbols::upward_arrow, oss.str()}
};
}
string ReportBase::fromVector(const string& key) string ReportBase::fromVector(const string& key)
{ {
stringstream oss; stringstream oss;
@@ -34,4 +46,69 @@ namespace platform {
header(); header();
body(); body();
} }
string ReportBase::compareResult(const string& dataset, double result)
{
string status = " ";
if (compare) {
double best = bestResult(dataset, data["model"].get<string>());
if (result == best) {
status = Symbols::equal_best;
} else if (result > best) {
status = Symbols::better_best;
}
} else {
if (data["score_name"].get<string>() == "accuracy") {
auto dt = Datasets(Paths::datasets(), false);
dt.loadDataset(dataset);
auto numClasses = dt.getNClasses(dataset);
if (numClasses == 2) {
vector<int> distribution = dt.getClassesCounts(dataset);
double nSamples = dt.getNSamples(dataset);
vector<int>::iterator maxValue = max_element(distribution.begin(), distribution.end());
double mark = *maxValue / nSamples * (1 + margin);
if (mark > 1) {
mark = 0.9995;
}
status = result < mark ? Symbols::cross : result > mark ? Symbols::upward_arrow : "=";
}
}
}
if (status != " ") {
auto item = summary.find(status);
if (item != summary.end()) {
summary[status]++;
} else {
summary[status] = 1;
}
}
return status;
}
double ReportBase::bestResult(const string& dataset, const string& model)
{
double value = 0.0;
if (bestResults.size() == 0) {
// try to load the best results
string score = data["score_name"];
replace(score.begin(), score.end(), '_', '-');
string fileName = "best_results_" + score + "_" + model + ".json";
ifstream resultData(Paths::results() + "/" + fileName);
if (resultData.is_open()) {
bestResults = json::parse(resultData);
} else {
existBestFile = false;
}
}
try {
value = bestResults.at(dataset).at(0);
}
catch (exception) {
value = 1.0;
}
return value;
}
bool ReportBase::getExistBestFile()
{
return existBestFile;
}
} }

View File

@@ -2,22 +2,36 @@
#define REPORTBASE_H #define REPORTBASE_H
#include <string> #include <string>
#include <iostream> #include <iostream>
#include "Paths.h"
#include "Symbols.h"
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using json = nlohmann::json; using json = nlohmann::json;
namespace platform { namespace platform {
using namespace std; using namespace std;
class ReportBase { class ReportBase {
public: public:
explicit ReportBase(json data_) { data = data_; }; explicit ReportBase(json data_, bool compare);
virtual ~ReportBase() = default; virtual ~ReportBase() = default;
void show(); void show();
protected: protected:
json data; json data;
string fromVector(const string& key); string fromVector(const string& key);
string fVector(const string& title, const json& data, const int width, const int precision); string fVector(const string& title, const json& data, const int width, const int precision);
bool getExistBestFile();
virtual void header() = 0; virtual void header() = 0;
virtual void body() = 0; virtual void body() = 0;
virtual void showSummary() = 0;
string compareResult(const string& dataset, double result);
map<string, int> summary;
double margin;
map<string, string> meaning;
bool compare;
private:
double bestResult(const string& dataset, const string& model);
json bestResults;
bool existBestFile = true;
}; };
}; };
#endif #endif

View File

@@ -1,7 +1,7 @@
#include <sstream> #include <sstream>
#include <locale> #include <locale>
#include "ReportConsole.h" #include "ReportConsole.h"
#include "BestResult.h" #include "BestScore.h"
namespace platform { namespace platform {
@@ -11,11 +11,11 @@ namespace platform {
string do_grouping() const { return "\03"; } string do_grouping() const { return "\03"; }
}; };
string ReportConsole::headerLine(const string& text) string ReportConsole::headerLine(const string& text, int utf = 0)
{ {
int n = MAXL - text.length() - 3; int n = MAXL - text.length() - 3;
n = n < 0 ? 0 : n; n = n < 0 ? 0 : n;
return "* " + text + string(n, ' ') + "*\n"; return "* " + text + string(n + utf, ' ') + "*\n";
} }
void ReportConsole::header() void ReportConsole::header()
@@ -36,8 +36,8 @@ namespace platform {
} }
void ReportConsole::body() void ReportConsole::body()
{ {
cout << Colors::GREEN() << " # Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl; cout << Colors::GREEN() << " # Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl;
cout << "=== ============================== ====== ===== === ========= ========= ========= =============== ================== ===============" << endl; cout << "=== ========================= ====== ===== === ========= ========= ========= =============== =================== ====================" << endl;
json lastResult; json lastResult;
double totalScore = 0.0; double totalScore = 0.0;
bool odd = true; bool odd = true;
@@ -50,15 +50,17 @@ namespace platform {
auto color = odd ? Colors::CYAN() : Colors::BLUE(); auto color = odd ? Colors::CYAN() : Colors::BLUE();
cout << color; cout << color;
cout << setw(3) << index++ << " "; cout << setw(3) << index++ << " ";
cout << setw(30) << left << r["dataset"].get<string>() << " "; cout << setw(25) << left << r["dataset"].get<string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " "; cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " "; cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " "; cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " "; cout << setw(9) << setprecision(2) << fixed << r["nodes"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " "; cout << setw(9) << setprecision(2) << fixed << r["leaves"].get<float>() << " ";
cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " "; cout << setw(9) << setprecision(2) << fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>() << " "; cout << setw(8) << right << setprecision(6) << fixed << r["score"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_std"].get<double>();
cout << setw(11) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " "; const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
cout << status;
cout << setw(12) << right << setprecision(6) << fixed << r["time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["time_std"].get<double>() << " ";
try { try {
cout << r["hyperparameters"].get<string>(); cout << r["hyperparameters"].get<string>();
} }
@@ -81,15 +83,30 @@ namespace platform {
footer(totalScore); footer(totalScore);
} }
} }
void ReportConsole::showSummary()
{
for (const auto& item : summary) {
stringstream oss;
oss << setw(3) << left << item.first;
oss << setw(3) << right << item.second << " ";
oss << left << meaning.at(item.first);
cout << headerLine(oss.str(), 2);
}
}
void ReportConsole::footer(double totalScore) void ReportConsole::footer(double totalScore)
{ {
cout << Colors::MAGENTA() << string(MAXL, '*') << endl; cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
showSummary();
auto score = data["score_name"].get<string>(); auto score = data["score_name"].get<string>();
if (score == BestResult::scoreName()) { if (score == BestScore::scoreName()) {
stringstream oss; stringstream oss;
oss << score << " compared to " << BestResult::title() << " .: " << totalScore / BestResult::score(); oss << score << " compared to " << BestScore::title() << " .: " << totalScore / BestScore::score();
cout << headerLine(oss.str()); cout << headerLine(oss.str());
} }
if (!getExistBestFile() && compare) {
cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
}
cout << string(MAXL, '*') << endl << Colors::RESET(); cout << string(MAXL, '*') << endl << Colors::RESET();
} }
} }

View File

@@ -7,17 +7,18 @@
namespace platform { namespace platform {
using namespace std; using namespace std;
const int MAXL = 132; const int MAXL = 133;
class ReportConsole : public ReportBase { class ReportConsole : public ReportBase {
public: public:
explicit ReportConsole(json data_, int index = -1) : ReportBase(data_), selectedIndex(index) {}; explicit ReportConsole(json data_, bool compare = false, int index = -1) : ReportBase(data_, compare), selectedIndex(index) {};
virtual ~ReportConsole() = default; virtual ~ReportConsole() = default;
private: private:
int selectedIndex; int selectedIndex;
string headerLine(const string& text); string headerLine(const string& text, int utf);
void header() override; void header() override;
void body() override; void body() override;
void footer(double totalScore); void footer(double totalScore);
void showSummary() override;
}; };
}; };
#endif #endif

View File

@@ -1,7 +1,7 @@
#include <sstream> #include <sstream>
#include <locale> #include <locale>
#include "ReportExcel.h" #include "ReportExcel.h"
#include "BestResult.h" #include "BestScore.h"
namespace platform { namespace platform {
@@ -13,17 +13,195 @@ namespace platform {
string do_grouping() const { return "\03"; } string do_grouping() const { return "\03"; }
}; };
ReportExcel::ReportExcel(json data_, bool compare, lxw_workbook* workbook) : ReportBase(data_, compare), row(0), workbook(workbook)
{
normalSize = 14; //font size for report body
colorTitle = 0xB1A0C7;
colorOdd = 0xDCE6F1;
colorEven = 0xFDE9D9;
createFile();
}
lxw_workbook* ReportExcel::getWorkbook()
{
return workbook;
}
lxw_format* ReportExcel::efectiveStyle(const string& style)
{
lxw_format* efectiveStyle;
if (style == "") {
efectiveStyle = NULL;
} else {
string suffix = row % 2 ? "_odd" : "_even";
efectiveStyle = styles.at(style + suffix);
}
return efectiveStyle;
}
void ReportExcel::writeString(int row, int col, const string& text, const string& style)
{
worksheet_write_string(worksheet, row, col, text.c_str(), efectiveStyle(style));
}
void ReportExcel::writeInt(int row, int col, const int number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::writeDouble(int row, int col, const double number, const string& style)
{
worksheet_write_number(worksheet, row, col, number, efectiveStyle(style));
}
void ReportExcel::formatColumns()
{
worksheet_freeze_panes(worksheet, 6, 1);
vector<int> columns_sizes = { 22, 10, 9, 7, 12, 12, 12, 12, 12, 3, 15, 12, 23 };
for (int i = 0; i < columns_sizes.size(); ++i) {
worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL);
}
}
void ReportExcel::addColor(lxw_format* style, bool odd)
{
uint32_t efectiveColor = odd ? colorEven : colorOdd;
format_set_bg_color(style, lxw_color_t(efectiveColor));
}
void ReportExcel::createStyle(const string& name, lxw_format* style, bool odd)
{
addColor(style, odd);
if (name == "textCentered") {
format_set_align(style, LXW_ALIGN_CENTER);
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "text") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "bodyHeader") {
format_set_bold(style);
format_set_font_size(style, normalSize);
format_set_align(style, LXW_ALIGN_CENTER);
format_set_align(style, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(style, LXW_BORDER_THIN);
format_set_bg_color(style, lxw_color_t(colorTitle));
} else if (name == "result") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "0.0000000");
} else if (name == "time") {
format_set_font_size(style, normalSize);
format_set_border(style, LXW_BORDER_THIN);
format_set_num_format(style, "#,##0.000000");
} else if (name == "ints") {
format_set_font_size(style, normalSize);
format_set_num_format(style, "###,##0");
format_set_border(style, LXW_BORDER_THIN);
} else if (name == "floats") {
format_set_border(style, LXW_BORDER_THIN);
format_set_font_size(style, normalSize);
format_set_num_format(style, "#,##0.00");
}
}
void ReportExcel::createFormats()
{
auto styleNames = { "text", "textCentered", "bodyHeader", "result", "time", "ints", "floats" };
lxw_format* style;
for (string name : styleNames) {
lxw_format* style = workbook_add_format(workbook);
style = workbook_add_format(workbook);
createStyle(name, style, true);
styles[name + "_odd"] = style;
style = workbook_add_format(workbook);
createStyle(name, style, false);
styles[name + "_even"] = style;
}
// Header 1st line
lxw_format* headerFirst = workbook_add_format(workbook);
format_set_bold(headerFirst);
format_set_font_size(headerFirst, 18);
format_set_align(headerFirst, LXW_ALIGN_CENTER);
format_set_align(headerFirst, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerFirst, LXW_BORDER_THIN);
format_set_bg_color(headerFirst, lxw_color_t(colorTitle));
// Header rest
lxw_format* headerRest = workbook_add_format(workbook);
format_set_bold(headerRest);
format_set_align(headerRest, LXW_ALIGN_CENTER);
format_set_font_size(headerRest, 16);
format_set_align(headerRest, LXW_ALIGN_VERTICAL_CENTER);
format_set_border(headerRest, LXW_BORDER_THIN);
format_set_bg_color(headerRest, lxw_color_t(colorOdd));
// Header small
lxw_format* headerSmall = workbook_add_format(workbook);
format_set_bold(headerSmall);
format_set_align(headerSmall, LXW_ALIGN_LEFT);
format_set_font_size(headerSmall, 12);
format_set_border(headerSmall, LXW_BORDER_THIN);
format_set_align(headerSmall, LXW_ALIGN_VERTICAL_CENTER);
format_set_bg_color(headerSmall, lxw_color_t(colorOdd));
// Summary style
lxw_format* summaryStyle = workbook_add_format(workbook);
format_set_bold(summaryStyle);
format_set_font_size(summaryStyle, 16);
format_set_border(summaryStyle, LXW_BORDER_THIN);
format_set_align(summaryStyle, LXW_ALIGN_VERTICAL_CENTER);
styles["headerFirst"] = headerFirst;
styles["headerRest"] = headerRest;
styles["headerSmall"] = headerSmall;
styles["summaryStyle"] = summaryStyle;
}
void ReportExcel::setProperties()
{
char line[data["title"].get<string>().size() + 1];
strcpy(line, data["title"].get<string>().c_str());
lxw_doc_properties properties = {
.title = line,
.subject = (char*)"Machine learning results",
.author = (char*)"Ricardo Montañana Gómez",
.manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta",
.company = (char*)"UCLM",
.comments = (char*)"Created with libxlsxwriter and c++",
};
workbook_set_properties(workbook, &properties);
}
void ReportExcel::createFile() void ReportExcel::createFile()
{ {
doc.create(Paths::excel() + "some_results.xlsx"); if (workbook == NULL) {
wks = doc.workbook().worksheet("Sheet1"); workbook = workbook_new((Paths::excel() + fileName).c_str());
wks.setName(data["model"].get<string>()); }
const string name = data["model"].get<string>();
string suffix = "";
string efectiveName;
int num = 1;
// Create a sheet with the name of the model
while (true) {
efectiveName = name + suffix;
if (workbook_get_worksheet_by_name(workbook, efectiveName.c_str())) {
suffix = to_string(++num);
} else {
worksheet = workbook_add_worksheet(workbook, efectiveName.c_str());
break;
}
if (num > 100) {
throw invalid_argument("Couldn't create sheet " + efectiveName);
}
}
cout << "Adding sheet " << efectiveName << " to " << Paths::excel() + fileName << endl;
setProperties();
createFormats();
formatColumns();
} }
void ReportExcel::closeFile() void ReportExcel::closeFile()
{ {
doc.save(); workbook_close(workbook);
doc.close();
} }
void ReportExcel::header() void ReportExcel::header()
@@ -32,45 +210,62 @@ namespace platform {
locale::global(mylocale); locale::global(mylocale);
cout.imbue(mylocale); cout.imbue(mylocale);
stringstream oss; stringstream oss;
wks.cell("A1").value().set( string message = data["model"].get<string>() + " ver. " + data["version"].get<string>() + " " +
"Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + data["language"].get<string>() + " ver. " + data["language_version"].get<string>() +
to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) +
" random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>()); " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>();
wks.cell("A2").value() = data["title"].get<string>(); worksheet_merge_range(worksheet, 0, 0, 0, 12, message.c_str(), styles["headerFirst"]);
wks.cell("A3").value() = "Random seeds: " + fromVector("seeds") + " Stratified: " + worksheet_merge_range(worksheet, 1, 0, 1, 12, data["title"].get<string>().c_str(), styles["headerRest"]);
(data["stratified"].get<bool>() ? "True" : "False"); worksheet_merge_range(worksheet, 2, 0, 3, 0, ("Score is " + data["score_name"].get<string>()).c_str(), styles["headerRest"]);
oss << "Execution took " << setprecision(2) << fixed << data["duration"].get<float>() << " seconds, " worksheet_merge_range(worksheet, 2, 1, 3, 3, "Execution time", styles["headerRest"]);
<< data["duration"].get<float>() / 3600 << " hours, on " << data["platform"].get<string>(); oss << setprecision(2) << fixed << data["duration"].get<float>() << " s";
wks.cell("A4").value() = oss.str(); worksheet_merge_range(worksheet, 2, 4, 2, 5, oss.str().c_str(), styles["headerRest"]);
wks.cell("A5").value() = "Score is " + data["score_name"].get<string>(); oss.str("");
oss.clear();
oss << setprecision(2) << fixed << data["duration"].get<float>() / 3600 << " h";
worksheet_merge_range(worksheet, 3, 4, 3, 5, oss.str().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 6, 3, 7, "Platform", styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 8, 3, 9, data["platform"].get<string>().c_str(), styles["headerRest"]);
worksheet_merge_range(worksheet, 2, 10, 2, 12, ("Random seeds: " + fromVector("seeds")).c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Stratified: " << (data["stratified"].get<bool>() ? "True" : "False");
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False");
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
} }
void ReportExcel::body() void ReportExcel::body()
{ {
auto head = vector<string>( auto head = vector<string>(
{ "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "Time", { "Dataset", "Samples", "Features", "Classes", "Nodes", "Edges", "States", "Score", "Score Std.", "St.", "Time",
"Time Std.", "Hyperparameters" }); "Time Std.", "Hyperparameters" });
int col = 1; int col = 0;
for (const auto& item : head) { for (const auto& item : head) {
wks.cell(8, col++).value() = item; writeString(5, col++, item, "bodyHeader");
} }
int row = 9; row = 6;
col = 1; col = 0;
int hypSize = 22;
json lastResult; json lastResult;
double totalScore = 0.0; double totalScore = 0.0;
string hyperparameters; string hyperparameters;
for (const auto& r : data["results"]) { for (const auto& r : data["results"]) {
wks.cell(row, col).value() = r["dataset"].get<string>(); writeString(row, col, r["dataset"].get<string>(), "text");
wks.cell(row, col + 1).value() = r["samples"].get<int>(); writeInt(row, col + 1, r["samples"].get<int>(), "ints");
wks.cell(row, col + 2).value() = r["features"].get<int>(); writeInt(row, col + 2, r["features"].get<int>(), "ints");
wks.cell(row, col + 3).value() = r["classes"].get<int>(); writeInt(row, col + 3, r["classes"].get<int>(), "ints");
wks.cell(row, col + 4).value() = r["nodes"].get<float>(); writeDouble(row, col + 4, r["nodes"].get<float>(), "floats");
wks.cell(row, col + 5).value() = r["leaves"].get<float>(); writeDouble(row, col + 5, r["leaves"].get<float>(), "floats");
wks.cell(row, col + 6).value() = r["depth"].get<float>(); writeDouble(row, col + 6, r["depth"].get<double>(), "floats");
wks.cell(row, col + 7).value() = r["score"].get<double>(); writeDouble(row, col + 7, r["score"].get<double>(), "result");
wks.cell(row, col + 8).value() = r["score_std"].get<double>(); writeDouble(row, col + 8, r["score_std"].get<double>(), "result");
wks.cell(row, col + 9).value() = r["time"].get<double>(); const string status = compareResult(r["dataset"].get<string>(), r["score"].get<double>());
wks.cell(row, col + 10).value() = r["time_std"].get<double>(); writeString(row, col + 9, status, "textCentered");
writeDouble(row, col + 10, r["time"].get<double>(), "time");
writeDouble(row, col + 11, r["time_std"].get<double>(), "time");
try { try {
hyperparameters = r["hyperparameters"].get<string>(); hyperparameters = r["hyperparameters"].get<string>();
} }
@@ -79,31 +274,60 @@ namespace platform {
oss << r["hyperparameters"]; oss << r["hyperparameters"];
hyperparameters = oss.str(); hyperparameters = oss.str();
} }
wks.cell(row, col + 11).value() = hyperparameters; if (hyperparameters.size() > hypSize) {
hypSize = hyperparameters.size();
}
writeString(row, col + 12, hyperparameters, "text");
lastResult = r; lastResult = r;
totalScore += r["score"].get<double>(); totalScore += r["score"].get<double>();
row++; row++;
} }
// Set the right column width of hyperparameters with the maximum length
worksheet_set_column(worksheet, 12, 12, hypSize + 5, NULL);
// Show totals if only one dataset is present in the result
if (data["results"].size() == 1) { if (data["results"].size() == 1) {
for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) { for (const string& group : { "scores_train", "scores_test", "times_train", "times_test" }) {
row++; row++;
col = 1; col = 1;
wks.cell(row, col).value() = group; writeString(row, col, group, "text");
for (double item : lastResult[group]) { for (double item : lastResult[group]) {
wks.cell(row, ++col).value() = item; string style = group.find("scores") != string::npos ? "result" : "time";
writeDouble(row, ++col, item, style);
} }
} }
// Set with of columns to show those totals completely
worksheet_set_column(worksheet, 1, 1, 12, NULL);
for (int i = 2; i < 7; ++i) {
// doesn't work with from col to col, so...
worksheet_set_column(worksheet, i, i, 15, NULL);
}
} else { } else {
footer(totalScore, row); footer(totalScore, row);
} }
} }
void ReportExcel::showSummary()
{
for (const auto& item : summary) {
worksheet_write_string(worksheet, row + 2, 1, item.first.c_str(), styles["summaryStyle"]);
worksheet_write_number(worksheet, row + 2, 2, item.second, styles["summaryStyle"]);
worksheet_merge_range(worksheet, row + 2, 3, row + 2, 5, meaning.at(item.first).c_str(), styles["summaryStyle"]);
row += 1;
}
}
void ReportExcel::footer(double totalScore, int row) void ReportExcel::footer(double totalScore, int row)
{ {
showSummary();
row += 4 + summary.size();
auto score = data["score_name"].get<string>(); auto score = data["score_name"].get<string>();
if (score == BestResult::scoreName()) { if (score == BestScore::scoreName()) {
wks.cell(row + 2, 1).value() = score + " compared to " + BestResult::title() + " .: "; worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestScore::title() + " .:").c_str(), efectiveStyle("text"));
wks.cell(row + 2, 5).value() = totalScore / BestResult::score(); writeDouble(row, 6, totalScore / BestScore::score(), "result");
}
if (!getExistBestFile() && compare) {
worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]);
} }
} }
} }

View File

@@ -1,25 +1,42 @@
#ifndef REPORTEXCEL_H #ifndef REPORTEXCEL_H
#define REPORTEXCEL_H #define REPORTEXCEL_H
#include <OpenXLSX.hpp> #include<map>
#include "xlsxwriter.h"
#include "ReportBase.h" #include "ReportBase.h"
#include "Paths.h"
#include "Colors.h" #include "Colors.h"
namespace platform { namespace platform {
using namespace std; using namespace std;
using namespace OpenXLSX;
const int MAXLL = 128; const int MAXLL = 128;
class ReportExcel : public ReportBase{
class ReportExcel : public ReportBase {
public: public:
explicit ReportExcel(json data_) : ReportBase(data_) {createFile();}; explicit ReportExcel(json data_, bool compare, lxw_workbook* workbook);
virtual ~ReportExcel() {closeFile();}; lxw_workbook* getWorkbook();
private: private:
void writeString(int row, int col, const string& text, const string& style = "");
void writeInt(int row, int col, const int number, const string& style = "");
void writeDouble(int row, int col, const double number, const string& style = "");
void formatColumns();
void createFormats();
void setProperties();
void createFile(); void createFile();
void closeFile(); void closeFile();
XLDocument doc; lxw_workbook* workbook;
XLWorksheet wks; lxw_worksheet* worksheet;
map<string, lxw_format*> styles;
int row;
int normalSize; //font size for report body
uint32_t colorTitle;
uint32_t colorOdd;
uint32_t colorEven;
const string fileName = "some_results.xlsx";
void header() override; void header() override;
void body() override; void body() override;
void showSummary() override;
void footer(double totalScore, int row); void footer(double totalScore, int row);
void createStyle(const string& name, lxw_format* style, bool odd);
void addColor(lxw_format* style, bool odd);
lxw_format* efectiveStyle(const string& name);
}; };
}; };
#endif // !REPORTEXCEL_H #endif // !REPORTEXCEL_H

51
src/Platform/Result.cc Normal file
View File

@@ -0,0 +1,51 @@
#include <filesystem>
#include <fstream>
#include <sstream>
#include "Result.h"
#include "Colors.h"
#include "BestScore.h"
namespace platform {
Result::Result(const string& path, const string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
if (scoreName == BestScore::scoreName()) {
score /= BestScore::score();
}
title = data["title"];
duration = data["duration"];
model = data["model"];
complete = data["results"].size() > 1;
}
json Result::load() const
{
ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
string Result::to_string() const
{
stringstream oss;
oss << date << " ";
oss << setw(12) << left << model << " ";
oss << setw(11) << left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << setw(1) << " " << completeString << " ";
oss << setw(9) << setprecision(3) << fixed << duration << " ";
oss << setw(50) << left << title << " ";
return oss.str();
}
}

37
src/Platform/Result.h Normal file
View File

@@ -0,0 +1,37 @@
#ifndef RESULT_H
#define RESULT_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using namespace std;
using json = nlohmann::json;
class Result {
public:
Result(const string& path, const string& filename);
json load() const;
string to_string() const;
string getFilename() const { return filename; };
string getDate() const { return date; };
double getScore() const { return score; };
string getTitle() const { return title; };
double getDuration() const { return duration; };
string getModel() const { return model; };
string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; };
private:
string path;
string filename;
string date;
double score;
string title;
double duration;
string model;
string scoreName;
bool complete;
};
};
#endif

View File

@@ -3,37 +3,9 @@
#include "Results.h" #include "Results.h"
#include "ReportConsole.h" #include "ReportConsole.h"
#include "ReportExcel.h" #include "ReportExcel.h"
#include "BestResult.h" #include "BestScore.h"
#include "Colors.h" #include "Colors.h"
namespace platform { namespace platform {
Result::Result(const string& path, const string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
if (scoreName == BestResult::scoreName()) {
score /= BestResult::score();
}
title = data["title"];
duration = data["duration"];
model = data["model"];
complete = data["results"].size() > 1;
}
json Result::load() const
{
ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
void Results::load() void Results::load()
{ {
using std::filesystem::directory_iterator; using std::filesystem::directory_iterator;
@@ -48,19 +20,9 @@ namespace platform {
files.push_back(result); files.push_back(result);
} }
} }
} if (max == 0) {
string Result::to_string() const max = files.size();
{ }
stringstream oss;
oss << date << " ";
oss << setw(12) << left << model << " ";
oss << setw(11) << left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << setw(1) << " " << completeString << " ";
oss << setw(9) << setprecision(3) << fixed << duration << " ";
oss << setw(50) << left << title << " ";
return oss.str();
} }
void Results::show() const void Results::show() const
{ {
@@ -104,15 +66,17 @@ namespace platform {
cout << "Invalid index" << endl; cout << "Invalid index" << endl;
return -1; return -1;
} }
void Results::report(const int index, const bool excelReport) const void Results::report(const int index, const bool excelReport)
{ {
cout << Colors::YELLOW() << "Reporting " << files.at(index).getFilename() << endl; cout << Colors::YELLOW() << "Reporting " << files.at(index).getFilename() << endl;
auto data = files.at(index).load(); auto data = files.at(index).load();
if (excelReport) { if (excelReport) {
ReportExcel reporter(data); ReportExcel reporter(data, compare, workbook);
reporter.show(); reporter.show();
openExcel = true;
workbook = reporter.getWorkbook();
} else { } else {
ReportConsole reporter(data); ReportConsole reporter(data, compare);
reporter.show(); reporter.show();
} }
} }
@@ -124,7 +88,7 @@ namespace platform {
return; return;
} }
cout << Colors::YELLOW() << "Showing " << files.at(index).getFilename() << endl; cout << Colors::YELLOW() << "Showing " << files.at(index).getFilename() << endl;
ReportConsole reporter(data, idx); ReportConsole reporter(data, compare, idx);
reporter.show(); reporter.show();
} }
void Results::menu() void Results::menu()
@@ -132,9 +96,21 @@ namespace platform {
char option; char option;
int index; int index;
bool finished = false; bool finished = false;
string color, context;
string filename, line, options = "qldhsre"; string filename, line, options = "qldhsre";
while (!finished) { while (!finished) {
cout << Colors::RESET() << "Choose option (quit='q', list='l', delete='d', hide='h', sort='s', report='r', excel='e'): "; if (indexList) {
color = Colors::GREEN();
context = " (quit='q', list='l', delete='d', hide='h', sort='s', report='r', excel='e'): ";
options = "qldhsre";
} else {
color = Colors::MAGENTA();
context = " (quit='q', list='l'): ";
options = "ql";
}
cout << Colors::RESET() << color;
cout << "Choose option " << context;
getline(cin, line); getline(cin, line);
if (line.size() == 0) if (line.size() == 0)
continue; continue;
@@ -148,13 +124,15 @@ namespace platform {
if (all_of(line.begin(), line.end(), ::isdigit)) { if (all_of(line.begin(), line.end(), ::isdigit)) {
int idx = stoi(line); int idx = stoi(line);
if (indexList) { if (indexList) {
// The value is about the files list
index = idx; index = idx;
if (index >= 0 && index < files.size()) { if (index >= 0 && index < max) {
report(index, false); report(index, false);
indexList = false; indexList = false;
continue; continue;
} }
} else { } else {
// The value is about the result showed on screen
showIndex(index, idx); showIndex(index, idx);
continue; continue;
} }
@@ -281,7 +259,10 @@ namespace platform {
sortDate(); sortDate();
show(); show();
menu(); menu();
cout << "Done!" << endl; if (openExcel) {
workbook_close(workbook);
}
cout << Colors::RESET() << "Done!" << endl;
} }
} }

View File

@@ -1,40 +1,22 @@
#ifndef RESULTS_H #ifndef RESULTS_H
#define RESULTS_H #define RESULTS_H
#include "xlsxwriter.h"
#include <map> #include <map>
#include <vector> #include <vector>
#include <string> #include <string>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include "Result.h"
namespace platform { namespace platform {
using namespace std; using namespace std;
using json = nlohmann::json; using json = nlohmann::json;
class Result {
public:
Result(const string& path, const string& filename);
json load() const;
string to_string() const;
string getFilename() const { return filename; };
string getDate() const { return date; };
double getScore() const { return score; };
string getTitle() const { return title; };
double getDuration() const { return duration; };
string getModel() const { return model; };
string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; };
private:
string path;
string filename;
string date;
double score;
string title;
double duration;
string model;
string scoreName;
bool complete;
};
class Results { class Results {
public: public:
Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial) : path(path), max(max), model(model), scoreName(score), complete(complete), partial(partial) { load(); }; Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) :
path(path), max(max), model(model), scoreName(score), complete(complete), partial(partial), compare(compare)
{
load();
};
void manage(); void manage();
private: private:
string path; string path;
@@ -44,10 +26,13 @@ namespace platform {
bool complete; bool complete;
bool partial; bool partial;
bool indexList = true; bool indexList = true;
bool openExcel = false;
bool compare;
lxw_workbook* workbook = NULL;
vector<Result> files; vector<Result> files;
void load(); // Loads the list of results void load(); // Loads the list of results
void show() const; void show() const;
void report(const int index, const bool excelReport) const; void report(const int index, const bool excelReport);
void showIndex(const int index, const int idx) const; void showIndex(const int index, const int idx) const;
int getIndex(const string& intent) const; int getIndex(const string& intent) const;
void menu(); void menu();

215
src/Platform/Statistics.cc Normal file
View File

@@ -0,0 +1,215 @@
#include "Statistics.h"
#include "Colors.h"
#include "Symbols.h"
#include <boost/math/distributions/chi_squared.hpp>
#include <boost/math/distributions/normal.hpp>
namespace platform {
Statistics::Statistics(vector<string>& models, vector<string>& datasets, json data, double significance) : models(models), datasets(datasets), data(data), significance(significance)
{
nModels = models.size();
nDatasets = datasets.size();
};
void Statistics::fit()
{
if (nModels < 3 || nDatasets < 3) {
cerr << "nModels: " << nModels << endl;
cerr << "nDatasets: " << nDatasets << endl;
throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
}
computeRanks();
// Set the control model as the one with the lowest average rank
controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
computeWTL();
fitted = true;
}
map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
{
// sort the ranksOrder vector by value
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
return a.second > b.second;
});
//Assign ranks to values and if they are the same they share the same averaged rank
map<string, float> ranks;
for (int i = 0; i < ranksOrder.size(); i++) {
ranks[ranksOrder[i].first] = i + 1.0;
}
int i = 0;
while (i < static_cast<int>(ranksOrder.size())) {
int j = i + 1;
int sumRanks = ranks[ranksOrder[i].first];
while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
sumRanks += ranks[ranksOrder[j++].first];
}
if (j > i + 1) {
float averageRank = (float)sumRanks / (j - i);
for (int k = i; k < j; k++) {
ranks[ranksOrder[k].first] = averageRank;
}
}
i = j;
}
return ranks;
}
void Statistics::computeRanks()
{
map<string, float> ranksLine;
for (const auto& dataset : datasets) {
vector<pair<string, double>> ranksOrder;
for (const auto& model : models) {
double value = data[model].at(dataset).at(0).get<double>();
ranksOrder.push_back({ model, value });
}
// Assign the ranks
ranksLine = assignRanks(ranksOrder);
if (ranks.size() == 0) {
ranks = ranksLine;
} else {
for (const auto& rank : ranksLine) {
ranks[rank.first] += rank.second;
}
}
}
// Average the ranks
for (const auto& rank : ranks) {
ranks[rank.first] /= nDatasets;
}
}
void Statistics::computeWTL()
{
// Compute the WTL matrix
for (int i = 0; i < nModels; ++i) {
wtl[i] = { 0, 0, 0 };
}
json origin = data.begin().value();
for (auto const& item : origin.items()) {
auto controlModel = models.at(controlIdx);
double controlValue = data[controlModel].at(item.key()).at(0).get<double>();
for (int i = 0; i < nModels; ++i) {
if (i == controlIdx) {
continue;
}
double value = data[models[i]].at(item.key()).at(0).get<double>();
if (value < controlValue) {
wtl[i].win++;
} else if (value == controlValue) {
wtl[i].tie++;
} else {
wtl[i].loss++;
}
}
}
}
void Statistics::postHocHolmTest(bool friedmanResult)
{
if (!fitted) {
fit();
}
// Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
// Post-hoc Holm test
// Calculate the p-value for the models paired with the control model
map<int, double> stats; // p-value of each model paired with the control model
boost::math::normal dist(0.0, 1.0);
double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
for (int i = 0; i < nModels; i++) {
if (i == controlIdx) {
stats[i] = 0.0;
continue;
}
double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
double p_value = (long double)2 * (1 - cdf(dist, z));
stats[i] = p_value;
}
// Sort the models by p-value
vector<pair<int, double>> statsOrder;
for (const auto& stat : stats) {
statsOrder.push_back({ stat.first, stat.second });
}
sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) {
return a.second < b.second;
});
// Holm adjustment
for (int i = 0; i < statsOrder.size(); ++i) {
auto item = statsOrder.at(i);
double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
double p_value = min((double)1.0, item.second * (nModels - i));
p_value = max(before, p_value);
statsOrder[i] = { item.first, p_value };
}
auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
cout << color;
cout << " *************************************************************************************************************" << endl;
cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
cout << " Control model: " << models[controlIdx] << endl;
cout << " Model p-value rank win tie loss Status" << endl;
cout << " ============ ============ ========= === === ==== =============" << endl;
// sort ranks from lowest to highest
vector<pair<string, float>> ranksOrder;
for (const auto& rank : ranks) {
ranksOrder.push_back({ rank.first, rank.second });
}
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, float>& a, const pair<string, float>& b) {
return a.second < b.second;
});
for (const auto& item : ranksOrder) {
if (item.first == models.at(controlIdx)) {
continue;
}
auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
double pvalue = 0.0;
for (const auto& stat : statsOrder) {
if (stat.first == idx) {
pvalue = stat.second;
}
}
auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
cout << " " << colorStatus << left << setw(12) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss;
cout << " " << status << textStatus << endl;
}
cout << color << " *************************************************************************************************************" << endl;
cout << Colors::RESET();
}
bool Statistics::friedmanTest()
{
if (!fitted) {
fit();
}
// Friedman test
// Calculate the Friedman statistic
cout << Colors::BLUE() << endl;
cout << "***************************************************************************************************************" << endl;
cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
double degreesOfFreedom = nModels - 1.0;
double sumSquared = 0;
for (const auto& rank : ranks) {
sumSquared += pow(rank.second, 2);
}
// Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
cout << "Friedman statistic: " << friedmanQ << endl;
// Calculate the critical value
boost::math::chi_squared chiSquared(degreesOfFreedom);
long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
double criticalValue = quantile(chiSquared, 1 - significance);
std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
<< " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
bool result;
if (p_value < significance) {
cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl;
result = true;
} else {
cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl;
result = false;
}
cout << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << endl;
return result;
}
} // namespace platform

37
src/Platform/Statistics.h Normal file
View File

@@ -0,0 +1,37 @@
#ifndef STATISTICS_H
#define STATISTICS_H
#include <iostream>
#include <vector>
#include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json;
namespace platform {
struct WTL {
int win;
int tie;
int loss;
};
class Statistics {
public:
Statistics(vector<string>& models, vector<string>& datasets, json data, double significance = 0.05);
bool friedmanTest();
void postHocHolmTest(bool friedmanResult);
private:
void fit();
void computeRanks();
void computeWTL();
vector<string> models;
vector<string> datasets;
json data;
double significance;
bool fitted = false;
int nModels = 0;
int nDatasets = 0;
int controlIdx = 0;
map<int, WTL> wtl;
map<string, float> ranks;
};
}
#endif // !STATISTICS_H

18
src/Platform/Symbols.h Normal file
View File

@@ -0,0 +1,18 @@
#ifndef SYMBOLS_H
#define SYMBOLS_H
#include <string>
using namespace std;
namespace platform {
class Symbols {
public:
inline static const string check_mark{ "\u2714" };
inline static const string exclamation{ "\u2757" };
inline static const string black_star{ "\u2605" };
inline static const string cross{ "\u2717" };
inline static const string upward_arrow{ "\u27B6" };
inline static const string down_arrow{ "\u27B4" };
inline static const string equal_best{ check_mark };
inline static const string better_best{ black_star };
};
}
#endif // !SYMBOLS_H

71
src/Platform/best.cc Normal file
View File

@@ -0,0 +1,71 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "Paths.h"
#include "BestResults.h"
#include "Colors.h"
using namespace std;
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
argparse::ArgumentParser program("best");
program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
try {
program.parse_args(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
auto friedman = program.get<bool>("friedman");
if (model == "" || score == "") {
throw runtime_error("Model and score name must be supplied");
}
}
catch (const exception& err) {
cerr << err.what() << endl;
cerr << program;
exit(1);
}
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
auto friedman = program.get<bool>("friedman");
if (friedman && model != "any") {
cerr << "Friedman test can only be used with all models" << endl;
cerr << program;
exit(1);
}
if (!report && !build) {
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
cerr << program;
exit(1);
}
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
if (build) {
if (model == "any") {
results.buildAll();
} else {
string fileName = results.build();
cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl;
}
}
if (report) {
if (model == "any") {
results.reportAll();
} else {
results.reportSingle();
}
}
return 0;
}

View File

@@ -87,7 +87,7 @@ int main(int argc, char** argv)
auto stratified = program.get<bool>("stratified"); auto stratified = program.get<bool>("stratified");
auto n_folds = program.get<int>("folds"); auto n_folds = program.get<int>("folds");
auto seeds = program.get<vector<int>>("seeds"); auto seeds = program.get<vector<int>>("seeds");
auto hyperparameters =program.get<string>("hyperparameters"); auto hyperparameters = program.get<string>("hyperparameters");
vector<string> filesToTest; vector<string> filesToTest;
auto datasets = platform::Datasets(path, true, platform::ARFF); auto datasets = platform::Datasets(path, true, platform::ARFF);
auto title = program.get<string>("title"); auto title = program.get<string>("title");
@@ -102,7 +102,7 @@ int main(int argc, char** argv)
} }
filesToTest.push_back(file_name); filesToTest.push_back(file_name);
} else { } else {
filesToTest = platform::Datasets(path, true, platform::ARFF).getNames(); filesToTest = datasets.getNames();
saveResults = true; saveResults = true;
} }
/* /*

View File

@@ -14,6 +14,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied"); program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true); program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true);
program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true); program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true);
program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true);
try { try {
program.parse_args(argc, argv); program.parse_args(argc, argv);
auto number = program.get<int>("number"); auto number = program.get<int>("number");
@@ -24,6 +25,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
auto score = program.get<string>("score"); auto score = program.get<string>("score");
auto complete = program.get<bool>("complete"); auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial"); auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
} }
catch (const exception& err) { catch (const exception& err) {
cerr << err.what() << endl; cerr << err.what() << endl;
@@ -41,9 +43,10 @@ int main(int argc, char** argv)
auto score = program.get<string>("score"); auto score = program.get<string>("score");
auto complete = program.get<bool>("complete"); auto complete = program.get<bool>("complete");
auto partial = program.get<bool>("partial"); auto partial = program.get<bool>("partial");
auto compare = program.get<bool>("compare");
if (complete) if (complete)
partial = false; partial = false;
auto results = platform::Results(platform::Paths::results(), number, model, score, complete, partial); auto results = platform::Results(platform::Paths::results(), number, model, score, complete, partial, compare);
results.manage(); results.manage();
return 0; return 0;
} }

View File

@@ -8,7 +8,6 @@
#include "ArffFiles.h" #include "ArffFiles.h"
#include "CPPFImdlp.h" #include "CPPFImdlp.h"
using namespace std; using namespace std;
const string PATH = "../../data/";
bool file_exists(const std::string& name); bool file_exists(const std::string& name);
vector<string> split(const string& text, char delimiter); vector<string> split(const string& text, char delimiter);