Add boost info to README

Enhance output info in Statistics
Remove duplicated code in BestResults
2023-09-28 09:44:33 +02:00 · 2023-09-28 01:27:18 +02:00 · 2023-09-28 00:59:34 +02:00 · 2023-09-28 00:45:15 +02:00 · 2023-09-27 19:11:47 +02:00 · 2023-09-27 18:34:16 +02:00
24 changed files with 1007 additions and 278 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -37,6 +37,20 @@
            ],
            "cwd": "/Users/rmontanana/Code/discretizbench",
        },
+        {
+            "type": "lldb",
+            "request": "launch",
+            "name": "best",
+            "program": "${workspaceFolder}/build/src/Platform/best",
+            "args": [
+                "-m",
+                "BoostAODE",
+                "-s",
+                "accuracy",
+                "--build",
+            ],
+            "cwd": "/Users/rmontanana/Code/discretizbench",
+        },
        {
            "type": "lldb",
            "request": "launch",
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 option(ENABLE_CLANG_TIDY "Enable to add clang tidy."              OFF)
 option(ENABLE_TESTING "Unit testing build"                        OFF)
 option(CODE_COVERAGE "Collect coverage from test library"         OFF)
+
+# Boost Library
+set(Boost_USE_STATIC_LIBS OFF) 
+set(Boost_USE_MULTITHREADED ON)  
+set(Boost_USE_STATIC_RUNTIME OFF) 
+find_package(Boost 1.78.0 REQUIRED) 
+if(Boost_FOUND)
+    message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
+    include_directories(${Boost_INCLUDE_DIRS}) 
+endif()
+
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
 # CMakes modules
 # --------------
@@ -54,6 +65,7 @@ endif (ENABLE_CLANG_TIDY)
 add_git_submodule("lib/mdlp")
 add_git_submodule("lib/argparse")
 add_git_submodule("lib/json")
+find_library(XLSXWRITER_LIB libxlsxwriter.dylib PATHS /usr/local/lib)

 # Subdirectories
 # --------------
--- a/7
+++ b/7
@@ -19,13 +19,14 @@ copy: ## Copy binary files to selected folder
 	@cp build/src/Platform/main $(dest)
 	@cp build/src/Platform/list $(dest)
 	@cp build/src/Platform/manage $(dest)
+	@cp build/src/Platform/best $(dest)
 	@echo ">>> Done"

 dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
 	cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png

 build: ## Build the main and BayesNetSample
-	cmake --build build -t main -t BayesNetSample -t manage -t list -j 32
+	cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32

 clean: ## Clean the debug info
 	@echo ">>> Cleaning Debug BayesNet ...";
@@ -40,7 +41,7 @@ debug: ## Build a debug version of the project
 	@if [ -d ./build ]; then rm -rf ./build; fi
 	@mkdir build; 
 	cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
-	cmake --build build -t main -t BayesNetSample -t manage -t list  unit_tests -j 32;
+	cmake --build build -t main -t BayesNetSample -t manage -t list -t best -t unit_tests -j 32;
 	@echo ">>> Done";

 release: ## Build a Release version of the project
@@ -48,7 +49,7 @@ release: ## Build a Release version of the project
 	@if [ -d ./build ]; then rm -rf ./build; fi
 	@mkdir build; 
 	cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
-	cmake --build build -t main -t BayesNetSample -t manage -t list -j 32;
+	cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32;
 	@echo ">>> Done";	

 test: ## Run tests
--- a/README.md
+++ b/README.md
@@ -4,10 +4,14 @@ Bayesian Network Classifier with libtorch from scratch

 ## 0. Setup

-### libxlswriter
-
 Before compiling BayesNet.

+### boost library
+
+[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
+
+### libxlswriter
+
 ```bash
 cd lib/libxlsxwriter
 make
--- a/sample/sample.cc
+++ b/sample/sample.cc
@@ -104,180 +104,180 @@ int main(int argc, char** argv)
    for (int i = 0; i < 10; i++) {
        cout << weights_.index({ i }).item<double>() << endl;
    }
-    // map<string, bool> datasets = {
-    //         {"diabetes",           true},
-    //         {"ecoli",              true},
-    //         {"glass",              true},
-    //         {"iris",               true},
-    //         {"kdd_JapaneseVowels", false},
-    //         {"letter",             true},
-    //         {"liver-disorders",    true},
-    //         {"mfeat-factors",      true},
-    // };
-    // auto valid_datasets = vector<string>();
-    // transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
-    //     [](const pair<string, bool>& pair) { return pair.first; });
-    // argparse::ArgumentParser program("BayesNetSample");
-    // program.add_argument("-d", "--dataset")
-    //     .help("Dataset file name")
-    //     .action([valid_datasets](const std::string& value) {
-    //     if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
-    //         return value;
-    //     }
-    //     throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
-    //         }
-    // );
-    // program.add_argument("-p", "--path")
-    //     .help(" folder where the data files are located, default")
-    //     .default_value(string{ PATH }
-    // );
-    // program.add_argument("-m", "--model")
-    //     .help("Model to use " + platform::Models::instance()->toString())
-    //     .action([](const std::string& value) {
-    //     static const vector<string> choices = platform::Models::instance()->getNames();
-    //     if (find(choices.begin(), choices.end(), value) != choices.end()) {
-    //         return value;
-    //     }
-    //     throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
-    //         }
-    // );
-    // program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
-    // program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
-    // program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
-    // program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
-    // program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
-    //     try {
-    //         auto k = stoi(value);
-    //         if (k < 2) {
-    //             throw runtime_error("Number of folds must be greater than 1");
-    //         }
-    //         return k;
-    //     }
-    //     catch (const runtime_error& err) {
-    //         throw runtime_error(err.what());
-    //     }
-    //     catch (...) {
-    //         throw runtime_error("Number of folds must be an integer");
-    //     }});
-    // program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
-    // bool class_last, stratified, tensors, dump_cpt;
-    // string model_name, file_name, path, complete_file_name;
-    // int nFolds, seed;
-    // try {
-    //     program.parse_args(argc, argv);
-    //     file_name = program.get<string>("dataset");
-    //     path = program.get<string>("path");
-    //     model_name = program.get<string>("model");
-    //     complete_file_name = path + file_name + ".arff";
-    //     stratified = program.get<bool>("stratified");
-    //     tensors = program.get<bool>("tensors");
-    //     nFolds = program.get<int>("folds");
-    //     seed = program.get<int>("seed");
-    //     dump_cpt = program.get<bool>("dumpcpt");
-    //     class_last = datasets[file_name];
-    //     if (!file_exists(complete_file_name)) {
-    //         throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
-    //     }
-    // }
-    // catch (const exception& err) {
-    //     cerr << err.what() << endl;
-    //     cerr << program;
-    //     exit(1);
-    // }
+    map<string, bool> datasets = {
+            {"diabetes",           true},
+            {"ecoli",              true},
+            {"glass",              true},
+            {"iris",               true},
+            {"kdd_JapaneseVowels", false},
+            {"letter",             true},
+            {"liver-disorders",    true},
+            {"mfeat-factors",      true},
+    };
+    auto valid_datasets = vector<string>();
+    transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
+        [](const pair<string, bool>& pair) { return pair.first; });
+    argparse::ArgumentParser program("BayesNetSample");
+    program.add_argument("-d", "--dataset")
+        .help("Dataset file name")
+        .action([valid_datasets](const std::string& value) {
+        if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
+            return value;
+        }
+        throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
+            }
+    );
+    program.add_argument("-p", "--path")
+        .help(" folder where the data files are located, default")
+        .default_value(string{ PATH }
+    );
+    program.add_argument("-m", "--model")
+        .help("Model to use " + platform::Models::instance()->toString())
+        .action([](const std::string& value) {
+        static const vector<string> choices = platform::Models::instance()->getNames();
+        if (find(choices.begin(), choices.end(), value) != choices.end()) {
+            return value;
+        }
+        throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
+            }
+    );
+    program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
+    program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
+    program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
+    program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
+    program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
+        try {
+            auto k = stoi(value);
+            if (k < 2) {
+                throw runtime_error("Number of folds must be greater than 1");
+            }
+            return k;
+        }
+        catch (const runtime_error& err) {
+            throw runtime_error(err.what());
+        }
+        catch (...) {
+            throw runtime_error("Number of folds must be an integer");
+        }});
+    program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
+    bool class_last, stratified, tensors, dump_cpt;
+    string model_name, file_name, path, complete_file_name;
+    int nFolds, seed;
+    try {
+        program.parse_args(argc, argv);
+        file_name = program.get<string>("dataset");
+        path = program.get<string>("path");
+        model_name = program.get<string>("model");
+        complete_file_name = path + file_name + ".arff";
+        stratified = program.get<bool>("stratified");
+        tensors = program.get<bool>("tensors");
+        nFolds = program.get<int>("folds");
+        seed = program.get<int>("seed");
+        dump_cpt = program.get<bool>("dumpcpt");
+        class_last = datasets[file_name];
+        if (!file_exists(complete_file_name)) {
+            throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
+        }
+    }
+    catch (const exception& err) {
+        cerr << err.what() << endl;
+        cerr << program;
+        exit(1);
+    }

    /*
    * Begin Processing
    */
-    // auto handler = ArffFiles();
-    // handler.load(complete_file_name, class_last);
-    // // Get Dataset X, y
-    // vector<mdlp::samples_t>& X = handler.getX();
-    // mdlp::labels_t& y = handler.getY();
-    // // Get className & Features
-    // auto className = handler.getClassName();
-    // vector<string> features;
-    // auto attributes = handler.getAttributes();
-    // transform(attributes.begin(), attributes.end(), back_inserter(features),
-    //     [](const pair<string, string>& item) { return item.first; });
-    // // Discretize Dataset
-    // auto [Xd, maxes] = discretize(X, y, features);
-    // maxes[className] = *max_element(y.begin(), y.end()) + 1;
-    // map<string, vector<int>> states;
-    // for (auto feature : features) {
-    //     states[feature] = vector<int>(maxes[feature]);
-    // }
-    // states[className] = vector<int>(maxes[className]);
-    // auto clf = platform::Models::instance()->create(model_name);
-    // clf->fit(Xd, y, features, className, states);
-    // if (dump_cpt) {
-    //     cout << "--- CPT Tables ---" << endl;
-    //     clf->dump_cpt();
-    // }
-    // auto lines = clf->show();
-    // for (auto line : lines) {
-    //     cout << line << endl;
-    // }
-    // cout << "--- Topological Order ---" << endl;
-    // auto order = clf->topological_order();
-    // for (auto name : order) {
-    //     cout << name << ", ";
-    // }
-    // cout << "end." << endl;
-    // auto score = clf->score(Xd, y);
-    // cout << "Score: " << score << endl;
-    // auto graph = clf->graph();
-    // auto dot_file = model_name + "_" + file_name;
-    // ofstream file(dot_file + ".dot");
-    // file << graph;
-    // file.close();
-    // cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
-    // cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
-    // string stratified_string = stratified ? " Stratified" : "";
-    // cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
-    // cout << "==========================================" << endl;
-    // torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
-    // torch::Tensor yt = torch::tensor(y, torch::kInt32);
-    // for (int i = 0; i < features.size(); ++i) {
-    //     Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
-    // }
-    // float total_score = 0, total_score_train = 0, score_train, score_test;
-    // platform::Fold* fold;
-    // if (stratified)
-    //     fold = new platform::StratifiedKFold(nFolds, y, seed);
-    // else
-    //     fold = new platform::KFold(nFolds, y.size(), seed);
-    // for (auto i = 0; i < nFolds; ++i) {
-    //     auto [train, test] = fold->getFold(i);
-    //     cout << "Fold: " << i + 1 << endl;
-    //     if (tensors) {
-    //         auto ttrain = torch::tensor(train, torch::kInt64);
-    //         auto ttest = torch::tensor(test, torch::kInt64);
-    //         torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
-    //         torch::Tensor ytraint = yt.index({ ttrain });
-    //         torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
-    //         torch::Tensor ytestt = yt.index({ ttest });
-    //         clf->fit(Xtraint, ytraint, features, className, states);
-    //         auto temp = clf->predict(Xtraint);
-    //         score_train = clf->score(Xtraint, ytraint);
-    //         score_test = clf->score(Xtestt, ytestt);
-    //     } else {
-    //         auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
-    //         auto [Xtest, ytest] = extract_indices(test, Xd, y);
-    //         clf->fit(Xtrain, ytrain, features, className, states);
-    //         score_train = clf->score(Xtrain, ytrain);
-    //         score_test = clf->score(Xtest, ytest);
-    //     }
-    //     if (dump_cpt) {
-    //         cout << "--- CPT Tables ---" << endl;
-    //         clf->dump_cpt();
-    //     }
-    //     total_score_train += score_train;
-    //     total_score += score_test;
-    //     cout << "Score Train: " << score_train << endl;
-    //     cout << "Score Test : " << score_test << endl;
-    //     cout << "-------------------------------------------------------------------------------" << endl;
-    // }
-    // cout << "**********************************************************************************" << endl;
-    // cout << "Average Score Train: " << total_score_train / nFolds << endl;
-    // cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
+    auto handler = ArffFiles();
+    handler.load(complete_file_name, class_last);
+    // Get Dataset X, y
+    vector<mdlp::samples_t>& X = handler.getX();
+    mdlp::labels_t& y = handler.getY();
+    // Get className & Features
+    auto className = handler.getClassName();
+    vector<string> features;
+    auto attributes = handler.getAttributes();
+    transform(attributes.begin(), attributes.end(), back_inserter(features),
+        [](const pair<string, string>& item) { return item.first; });
+    // Discretize Dataset
+    auto [Xd, maxes] = discretize(X, y, features);
+    maxes[className] = *max_element(y.begin(), y.end()) + 1;
+    map<string, vector<int>> states;
+    for (auto feature : features) {
+        states[feature] = vector<int>(maxes[feature]);
+    }
+    states[className] = vector<int>(maxes[className]);
+    auto clf = platform::Models::instance()->create(model_name);
+    clf->fit(Xd, y, features, className, states);
+    if (dump_cpt) {
+        cout << "--- CPT Tables ---" << endl;
+        clf->dump_cpt();
+    }
+    auto lines = clf->show();
+    for (auto line : lines) {
+        cout << line << endl;
+    }
+    cout << "--- Topological Order ---" << endl;
+    auto order = clf->topological_order();
+    for (auto name : order) {
+        cout << name << ", ";
+    }
+    cout << "end." << endl;
+    auto score = clf->score(Xd, y);
+    cout << "Score: " << score << endl;
+    auto graph = clf->graph();
+    auto dot_file = model_name + "_" + file_name;
+    ofstream file(dot_file + ".dot");
+    file << graph;
+    file.close();
+    cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
+    cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
+    string stratified_string = stratified ? " Stratified" : "";
+    cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
+    cout << "==========================================" << endl;
+    torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
+    torch::Tensor yt = torch::tensor(y, torch::kInt32);
+    for (int i = 0; i < features.size(); ++i) {
+        Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
+    }
+    float total_score = 0, total_score_train = 0, score_train, score_test;
+    platform::Fold* fold;
+    if (stratified)
+        fold = new platform::StratifiedKFold(nFolds, y, seed);
+    else
+        fold = new platform::KFold(nFolds, y.size(), seed);
+    for (auto i = 0; i < nFolds; ++i) {
+        auto [train, test] = fold->getFold(i);
+        cout << "Fold: " << i + 1 << endl;
+        if (tensors) {
+            auto ttrain = torch::tensor(train, torch::kInt64);
+            auto ttest = torch::tensor(test, torch::kInt64);
+            torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
+            torch::Tensor ytraint = yt.index({ ttrain });
+            torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
+            torch::Tensor ytestt = yt.index({ ttest });
+            clf->fit(Xtraint, ytraint, features, className, states);
+            auto temp = clf->predict(Xtraint);
+            score_train = clf->score(Xtraint, ytraint);
+            score_test = clf->score(Xtestt, ytestt);
+        } else {
+            auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
+            auto [Xtest, ytest] = extract_indices(test, Xd, y);
+            clf->fit(Xtrain, ytrain, features, className, states);
+            score_train = clf->score(Xtrain, ytrain);
+            score_test = clf->score(Xtest, ytest);
+        }
+        if (dump_cpt) {
+            cout << "--- CPT Tables ---" << endl;
+            clf->dump_cpt();
+        }
+        total_score_train += score_train;
+        total_score += score_test;
+        cout << "Score Train: " << score_train << endl;
+        cout << "Score Test : " << score_test << endl;
+        cout << "-------------------------------------------------------------------------------" << endl;
+    }
+    cout << "**********************************************************************************" << endl;
+    cout << "Average Score Train: " << total_score_train / nFolds << endl;
+    cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
 }
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -0,0 +1,292 @@
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include "BestResults.h"
+#include "Result.h"
+#include "Colors.h"
+#include "Statistics.h"
+
+
+
+namespace fs = std::filesystem;
+// function ftime_to_string, Code taken from 
+// https://stackoverflow.com/a/58237530/1389271
+template <typename TP>
+std::string ftime_to_string(TP tp)
+{
+    using namespace std::chrono;
+    auto sctp = time_point_cast<system_clock::duration>(tp - TP::clock::now()
+        + system_clock::now());
+    auto tt = system_clock::to_time_t(sctp);
+    std::tm* gmt = std::gmtime(&tt);
+    std::stringstream buffer;
+    buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
+    return buffer.str();
+}
+namespace platform {
+
+    string BestResults::build()
+    {
+        auto files = loadResultFiles();
+        if (files.size() == 0) {
+            cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
+            exit(1);
+        }
+        json bests;
+        for (const auto& file : files) {
+            auto result = Result(path, file);
+            auto data = result.load();
+            for (auto const& item : data.at("results")) {
+                bool update = false;
+                if (bests.contains(item.at("dataset").get<string>())) {
+                    if (item.at("score").get<double>() > bests[item.at("dataset").get<string>()].at(0).get<double>()) {
+                        update = true;
+                    }
+                } else {
+                    update = true;
+                }
+                if (update) {
+                    bests[item.at("dataset").get<string>()] = { item.at("score").get<double>(), item.at("hyperparameters"), file };
+                }
+            }
+        }
+        string bestFileName = path + bestResultFile();
+        if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
+            fclose(fileTest);
+            cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl;
+        }
+        ofstream file(bestFileName);
+        file << bests;
+        file.close();
+        return bestFileName;
+    }
+
+    string BestResults::bestResultFile()
+    {
+        return "best_results_" + score + "_" + model + ".json";
+    }
+
+    pair<string, string> getModelScore(string name)
+    {
+        // results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
+        int i = 0;
+        auto pos = name.find("_");
+        auto pos2 = name.find("_", pos + 1);
+        string score = name.substr(pos + 1, pos2 - pos - 1);
+        pos = name.find("_", pos2 + 1);
+        string model = name.substr(pos2 + 1, pos - pos2 - 1);
+        return { model, score };
+    }
+
+    vector<string> BestResults::loadResultFiles()
+    {
+        vector<string> files;
+        using std::filesystem::directory_iterator;
+        string fileModel, fileScore;
+        for (const auto& file : directory_iterator(path)) {
+            auto fileName = file.path().filename().string();
+            if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) {
+                tie(fileModel, fileScore) = getModelScore(fileName);
+                if (score == fileScore && (model == fileModel || model == "any")) {
+                    files.push_back(fileName);
+                }
+            }
+        }
+        return files;
+    }
+
+    json BestResults::loadFile(const string& fileName)
+    {
+        ifstream resultData(fileName);
+        if (resultData.is_open()) {
+            json data = json::parse(resultData);
+            return data;
+        }
+        throw invalid_argument("Unable to open result file. [" + fileName + "]");
+    }
+    vector<string> BestResults::getModels()
+    {
+        set<string> models;
+        vector<string> result;
+        auto files = loadResultFiles();
+        if (files.size() == 0) {
+            cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
+            exit(1);
+        }
+        string fileModel, fileScore;
+        for (const auto& file : files) {
+            // extract the model from the file name
+            tie(fileModel, fileScore) = getModelScore(file);
+            // add the model to the vector of models
+            models.insert(fileModel);
+        }
+        result = vector<string>(models.begin(), models.end());
+        return result;
+    }
+
+    void BestResults::buildAll()
+    {
+        auto models = getModels();
+        for (const auto& model : models) {
+            cout << "Building best results for model: " << model << endl;
+            this->model = model;
+            build();
+        }
+        model = "any";
+    }
+
+    void BestResults::reportSingle()
+    {
+        string bestFileName = path + bestResultFile();
+        if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
+            fclose(fileTest);
+        } else {
+            cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
+            exit(1);
+        }
+        auto date = ftime_to_string(filesystem::last_write_time(bestFileName));
+        auto data = loadFile(bestFileName);
+        cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
+        cout << "--------------------------------------------------------" << endl;
+        cout << Colors::GREEN() << " #  Dataset                   Score       File                                                               Hyperparameters" << endl;
+        cout << "=== ========================= =========== ================================================================== ================================================= " << endl;
+        auto i = 0;
+        bool odd = true;
+        for (auto const& item : data.items()) {
+            auto color = odd ? Colors::BLUE() : Colors::CYAN();
+            cout << color << setw(3) << fixed << right << i++ << " ";
+            cout << setw(25) << left << item.key() << " ";
+            cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
+            cout << setw(66) << item.value().at(2).get<string>() << " ";
+            cout << item.value().at(1) << " ";
+            cout << endl;
+            odd = !odd;
+        }
+    }
+    json BestResults::buildTableResults(vector<string> models)
+    {
+        int numberOfDatasets = 0;
+        bool first = true;
+        json origin;
+        json table;
+        auto maxDate = filesystem::file_time_type::max();
+        for (const auto& model : models) {
+            this->model = model;
+            string bestFileName = path + bestResultFile();
+            if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
+                fclose(fileTest);
+            } else {
+                cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
+                exit(1);
+            }
+            auto dateWrite = filesystem::last_write_time(bestFileName);
+            if (dateWrite < maxDate) {
+                maxDate = dateWrite;
+            }
+            auto data = loadFile(bestFileName);
+            if (first) {
+                // Get the number of datasets of the first file and check that is the same for all the models
+                first = false;
+                numberOfDatasets = data.size();
+                origin = data;
+            } else {
+                if (numberOfDatasets != data.size()) {
+                    cerr << Colors::MAGENTA() << "The number of datasets in the best results files is not the same for all the models." << Colors::RESET() << endl;
+                    exit(1);
+                }
+            }
+            table[model] = data;
+        }
+        table["dateTable"] = ftime_to_string(maxDate);
+        return table;
+    }
+
+    void BestResults::printTableResults(vector<string> models, json table)
+    {
+        cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
+        cout << "------------------------------------------------" << endl;
+        cout << Colors::GREEN() << " #  Dataset                   ";
+        for (const auto& model : models) {
+            cout << setw(12) << left << model << " ";
+        }
+        cout << endl;
+        cout << "=== ========================= ";
+        for (const auto& model : models) {
+            cout << "============ ";
+        }
+        cout << endl;
+        auto i = 0;
+        bool odd = true;
+        map<string, double> totals;
+        int nDatasets = table.begin().value().size();
+        for (const auto& model : models) {
+            totals[model] = 0.0;
+        }
+        json origin = table.begin().value();
+        for (auto const& item : origin.items()) {
+            auto color = odd ? Colors::BLUE() : Colors::CYAN();
+            cout << color << setw(3) << fixed << right << i++ << " ";
+            cout << setw(25) << left << item.key() << " ";
+            double maxValue = 0;
+            // Find out the max value for this dataset
+            for (const auto& model : models) {
+                double value = table[model].at(item.key()).at(0).get<double>();
+                if (value > maxValue) {
+                    maxValue = value;
+                }
+            }
+            // Print the row with red colors on max values
+            for (const auto& model : models) {
+                string efectiveColor = color;
+                double value = table[model].at(item.key()).at(0).get<double>();
+                if (value == maxValue) {
+                    efectiveColor = Colors::RED();
+                }
+                totals[model] += value;
+                cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
+            }
+            cout << endl;
+            odd = !odd;
+        }
+        cout << Colors::GREEN() << "=== ========================= ";
+        for (const auto& model : models) {
+            cout << "============ ";
+        }
+        cout << endl;
+        cout << Colors::GREEN() << setw(30) << "    Totals...................";
+        double max = 0.0;
+        for (const auto& total : totals) {
+            if (total.second > max) {
+                max = total.second;
+            }
+        }
+        for (const auto& model : models) {
+            string efectiveColor = Colors::GREEN();
+            if (totals[model] == max) {
+                efectiveColor = Colors::RED();
+            }
+            cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
+        }
+        cout << endl;
+    }
+    void BestResults::reportAll()
+    {
+        auto models = getModels();
+        // Build the table of results
+        json table = buildTableResults(models);
+        // Print the table of results
+        printTableResults(models, table);
+        // Compute the Friedman test
+        if (friedman) {
+            vector<string> datasets;
+            for (const auto& dataset : table.begin().value().items()) {
+                datasets.push_back(dataset.key());
+            }
+            double significance = 0.05;
+            Statistics stats(models, datasets, table, significance);
+            auto result = stats.friedmanTest();
+            stats.postHocHolmTest(result);
+        }
+    }
+}
--- a/src/Platform/BestResults.h
+++ b/src/Platform/BestResults.h
@@ -0,0 +1,29 @@
+#ifndef BESTRESULTS_H
+#define BESTRESULTS_H
+#include <string>
+#include <set>
+#include <nlohmann/json.hpp>
+using namespace std;
+using json = nlohmann::json;
+namespace platform {
+    class BestResults {
+    public:
+        explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
+        string build();
+        void reportSingle();
+        void reportAll();
+        void buildAll();
+    private:
+        vector<string> getModels();
+        vector<string> loadResultFiles();
+        json buildTableResults(vector<string> models);
+        void printTableResults(vector<string> models, json table);
+        string bestResultFile();
+        json loadFile(const string& fileName);
+        string path;
+        string score;
+        string model;
+        bool friedman;
+    };
+}
+#endif //BESTRESULTS_H
--- a/src/Platform/BestResult.h
+++ b/src/Platform/BestResult.h
@@ -1,7 +1,7 @@
-#ifndef BESTRESULT_H
-#define BESTRESULT_H
+#ifndef BESTSCORE_H
+#define BESTSCORE_H
 #include <string>
-class BestResult {
+class BestScore {
 public:
    static std::string title() { return "STree_default (linear-ovo)"; }
    static double score() { return 22.109799; }
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -4,13 +4,17 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
 include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
 include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
 include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
+include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
 add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
-add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
+add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
 add_executable(list list.cc platformUtils Datasets.cc)
+add_executable(best best.cc BestResults.cc Result.cc Statistics.cc)
 target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
 if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
    target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
+    target_link_libraries(best Boost::boost stdc++fs)
 else()
-    target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp)
+    target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
+    target_link_libraries(best Boost::boost)
 endif()
 target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")
--- a/src/Platform/ReportBase.cc
+++ b/src/Platform/ReportBase.cc
@@ -2,7 +2,7 @@
 #include <locale>
 #include "Datasets.h"
 #include "ReportBase.h"
-#include "BestResult.h"
+#include "BestScore.h"


 namespace platform {
@@ -94,6 +94,8 @@ namespace platform {
            ifstream resultData(Paths::results() + "/" + fileName);
            if (resultData.is_open()) {
                bestResults = json::parse(resultData);
+            } else {
+                existBestFile = false;
            }
        }
        try {
@@ -101,7 +103,12 @@ namespace platform {
        }
        catch (exception) {
            value = 1.0;
+
        }
        return value;
    }
+    bool ReportBase::getExistBestFile()
+    {
+        return existBestFile;
+    }
 }
--- a/src/Platform/ReportBase.h
+++ b/src/Platform/ReportBase.h
@@ -3,22 +3,13 @@
 #include <string>
 #include <iostream>
 #include "Paths.h"
+#include "Symbols.h"
 #include <nlohmann/json.hpp>

 using json = nlohmann::json;
 namespace platform {
    using namespace std;
-    class Symbols {
-    public:
-        inline static const string check_mark{ "\u2714" };
-        inline static const string exclamation{ "\u2757" };
-        inline static const string black_star{ "\u2605" };
-        inline static const string cross{ "\u2717" };
-        inline static const string upward_arrow{ "\u27B6" };
-        inline static const string down_arrow{ "\u27B4" };
-        inline static const string equal_best{ check_mark };
-        inline static const string better_best{ black_star };
-    };
+
    class ReportBase {
    public:
        explicit ReportBase(json data_, bool compare);
@@ -28,6 +19,7 @@ namespace platform {
        json data;
        string fromVector(const string& key);
        string fVector(const string& title, const json& data, const int width, const int precision);
+        bool getExistBestFile();
        virtual void header() = 0;
        virtual void body() = 0;
        virtual void showSummary() = 0;
@@ -35,10 +27,11 @@ namespace platform {
        map<string, int> summary;
        double margin;
        map<string, string> meaning;
+        bool compare;
    private:
        double bestResult(const string& dataset, const string& model);
-        bool compare;
        json bestResults;
+        bool existBestFile = true;
    };
 };
 #endif
--- a/src/Platform/ReportConsole.cc
+++ b/src/Platform/ReportConsole.cc
@@ -1,7 +1,7 @@
 #include <sstream>
 #include <locale>
 #include "ReportConsole.h"
-#include "BestResult.h"
+#include "BestScore.h"


 namespace platform {
@@ -99,11 +99,14 @@ namespace platform {
        cout << Colors::MAGENTA() << string(MAXL, '*') << endl;
        showSummary();
        auto score = data["score_name"].get<string>();
-        if (score == BestResult::scoreName()) {
+        if (score == BestScore::scoreName()) {
            stringstream oss;
-            oss << score << " compared to " << BestResult::title() << " .:  " << totalScore / BestResult::score();
+            oss << score << " compared to " << BestScore::title() << " .:  " << totalScore / BestScore::score();
            cout << headerLine(oss.str());
        }
+        if (!getExistBestFile() && compare) {
+            cout << headerLine("*** Best Results File not found. Couldn't compare any result!");
+        }
        cout << string(MAXL, '*') << endl << Colors::RESET();
    }
 }
--- a/src/Platform/ReportConsole.h
+++ b/src/Platform/ReportConsole.h
@@ -18,7 +18,7 @@ namespace platform {
        void header() override;
        void body() override;
        void footer(double totalScore);
-        void showSummary();
+        void showSummary() override;
    };
 };
 #endif
--- a/src/Platform/ReportExcel.cc
+++ b/src/Platform/ReportExcel.cc
@@ -1,7 +1,7 @@
 #include <sstream>
 #include <locale>
 #include "ReportExcel.h"
-#include "BestResult.h"
+#include "BestScore.h"


 namespace platform {
@@ -162,11 +162,11 @@ namespace platform {
        strcpy(line, data["title"].get<string>().c_str());
        lxw_doc_properties properties = {
            .title = line,
-            .subject = "Machine learning results",
-            .author = "Ricardo Montañana Gómez",
-            .manager = "Dr. J. A. Gámez, Dr. J. M. Puerta",
-            .company = "UCLM",
-            .comments = "Created with libxlsxwriter and c++",
+            .subject = (char*)"Machine learning results",
+            .author = (char*)"Ricardo Montañana Gómez",
+            .manager = (char*)"Dr. J. A. Gámez, Dr. J. M. Puerta",
+            .company = (char*)"UCLM",
+            .comments = (char*)"Created with libxlsxwriter and c++",
        };
        workbook_set_properties(workbook, &properties);
    }
@@ -322,9 +322,12 @@ namespace platform {
        showSummary();
        row += 4 + summary.size();
        auto score = data["score_name"].get<string>();
-        if (score == BestResult::scoreName()) {
-            worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestResult::title() + " .:").c_str(), efectiveStyle("text"));
-            writeDouble(row, 6, totalScore / BestResult::score(), "result");
+        if (score == BestScore::scoreName()) {
+            worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestScore::title() + " .:").c_str(), efectiveStyle("text"));
+            writeDouble(row, 6, totalScore / BestScore::score(), "result");
+        }
+        if (!getExistBestFile() && compare) {
+            worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]);
        }
    }
 }
--- a/src/Platform/ReportExcel.h
+++ b/src/Platform/ReportExcel.h
@@ -21,7 +21,6 @@ namespace platform {
        void setProperties();
        void createFile();
        void closeFile();
-        void showSummary();
        lxw_workbook* workbook;
        lxw_worksheet* worksheet;
        map<string, lxw_format*> styles;
@@ -33,6 +32,7 @@ namespace platform {
        const string fileName = "some_results.xlsx";
        void header() override;
        void body() override;
+        void showSummary() override;
        void footer(double totalScore, int row);
        void createStyle(const string& name, lxw_format* style, bool odd);
        void addColor(lxw_format* style, bool odd);
--- a/src/Platform/Result.cc
+++ b/src/Platform/Result.cc
@@ -0,0 +1,51 @@
+#include <filesystem>
+#include <fstream>
+#include <sstream>
+#include "Result.h"
+#include "Colors.h"
+#include "BestScore.h"
+namespace platform {
+    Result::Result(const string& path, const string& filename)
+        : path(path)
+        , filename(filename)
+    {
+        auto data = load();
+        date = data["date"];
+        score = 0;
+        for (const auto& result : data["results"]) {
+            score += result["score"].get<double>();
+        }
+        scoreName = data["score_name"];
+        if (scoreName == BestScore::scoreName()) {
+            score /= BestScore::score();
+        }
+        title = data["title"];
+        duration = data["duration"];
+        model = data["model"];
+        complete = data["results"].size() > 1;
+    }
+
+    json Result::load() const
+    {
+        ifstream resultData(path + "/" + filename);
+        if (resultData.is_open()) {
+            json data = json::parse(resultData);
+            return data;
+        }
+        throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
+    }
+
+    string Result::to_string() const
+    {
+        stringstream oss;
+        oss << date << " ";
+        oss << setw(12) << left << model << " ";
+        oss << setw(11) << left << scoreName << " ";
+        oss << right << setw(11) << setprecision(7) << fixed << score << " ";
+        auto completeString = isComplete() ? "C" : "P";
+        oss << setw(1) << " " << completeString << "  ";
+        oss << setw(9) << setprecision(3) << fixed << duration << " ";
+        oss << setw(50) << left << title << " ";
+        return  oss.str();
+    }
+}
--- a/src/Platform/Result.h
+++ b/src/Platform/Result.h
@@ -0,0 +1,37 @@
+#ifndef RESULT_H
+#define RESULT_H
+#include <map>
+#include <vector>
+#include <string>
+#include <nlohmann/json.hpp>
+namespace platform {
+    using namespace std;
+    using json = nlohmann::json;
+
+    class Result {
+    public:
+        Result(const string& path, const string& filename);
+        json load() const;
+        string to_string() const;
+        string getFilename() const { return filename; };
+        string getDate() const { return date; };
+        double getScore() const { return score; };
+        string getTitle() const { return title; };
+        double getDuration() const { return duration; };
+        string getModel() const { return model; };
+        string getScoreName() const { return scoreName; };
+        bool isComplete() const { return complete; };
+    private:
+        string path;
+        string filename;
+        string date;
+        double score;
+        string title;
+        double duration;
+        string model;
+        string scoreName;
+        bool complete;
+    };
+};
+
+#endif
--- a/src/Platform/Results.cc
+++ b/src/Platform/Results.cc
@@ -3,37 +3,9 @@
 #include "Results.h"
 #include "ReportConsole.h"
 #include "ReportExcel.h"
-#include "BestResult.h"
+#include "BestScore.h"
 #include "Colors.h"
 namespace platform {
-    Result::Result(const string& path, const string& filename)
-        : path(path)
-        , filename(filename)
-    {
-        auto data = load();
-        date = data["date"];
-        score = 0;
-        for (const auto& result : data["results"]) {
-            score += result["score"].get<double>();
-        }
-        scoreName = data["score_name"];
-        if (scoreName == BestResult::scoreName()) {
-            score /= BestResult::score();
-        }
-        title = data["title"];
-        duration = data["duration"];
-        model = data["model"];
-        complete = data["results"].size() > 1;
-    }
-    json Result::load() const
-    {
-        ifstream resultData(path + "/" + filename);
-        if (resultData.is_open()) {
-            json data = json::parse(resultData);
-            return data;
-        }
-        throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
-    }
    void Results::load()
    {
        using std::filesystem::directory_iterator;
@@ -48,19 +20,9 @@ namespace platform {
                    files.push_back(result);
            }
        }
-    }
-    string Result::to_string() const
-    {
-        stringstream oss;
-        oss << date << " ";
-        oss << setw(12) << left << model << " ";
-        oss << setw(11) << left << scoreName << " ";
-        oss << right << setw(11) << setprecision(7) << fixed << score << " ";
-        auto completeString = isComplete() ? "C" : "P";
-        oss << setw(1) << " " << completeString << "  ";
-        oss << setw(9) << setprecision(3) << fixed << duration << " ";
-        oss << setw(50) << left << title << " ";
-        return  oss.str();
+        if (max == 0) {
+            max = files.size();
+        }
    }
    void Results::show() const
    {
@@ -164,7 +126,7 @@ namespace platform {
                    if (indexList) {
                        // The value is about the files list
                        index = idx;
-                        if (index >= 0 && index < files.size()) {
+                        if (index >= 0 && index < max) {
                            report(index, false);
                            indexList = false;
                            continue;
@@ -300,7 +262,7 @@ namespace platform {
        if (openExcel) {
            workbook_close(workbook);
        }
-        cout << "Done!" << endl;
+        cout << Colors::RESET() << "Done!" << endl;
    }

 }
--- a/src/Platform/Results.h
+++ b/src/Platform/Results.h
@@ -5,34 +5,11 @@
 #include <vector>
 #include <string>
 #include <nlohmann/json.hpp>
+#include "Result.h"
 namespace platform {
    using namespace std;
    using json = nlohmann::json;

-    class Result {
-    public:
-        Result(const string& path, const string& filename);
-        json load() const;
-        string to_string() const;
-        string getFilename() const { return filename; };
-        string getDate() const { return date; };
-        double getScore() const { return score; };
-        string getTitle() const { return title; };
-        double getDuration() const { return duration; };
-        string getModel() const { return model; };
-        string getScoreName() const { return scoreName; };
-        bool isComplete() const { return complete; };
-    private:
-        string path;
-        string filename;
-        string date;
-        double score;
-        string title;
-        double duration;
-        string model;
-        string scoreName;
-        bool complete;
-    };
    class Results {
    public:
        Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) :
--- a/src/Platform/Statistics.cc
+++ b/src/Platform/Statistics.cc
@@ -0,0 +1,215 @@
+#include "Statistics.h"
+#include "Colors.h"
+#include "Symbols.h"
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/distributions/normal.hpp>
+
+namespace platform {
+
+    Statistics::Statistics(vector<string>& models, vector<string>& datasets, json data, double significance) : models(models), datasets(datasets), data(data), significance(significance)
+    {
+        nModels = models.size();
+        nDatasets = datasets.size();
+    };
+
+    void Statistics::fit()
+    {
+        if (nModels < 3 || nDatasets < 3) {
+            cerr << "nModels: " << nModels << endl;
+            cerr << "nDatasets: " << nDatasets << endl;
+            throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
+        }
+        computeRanks();
+        // Set the control model as the one with the lowest average rank
+        controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
+        computeWTL();
+        fitted = true;
+    }
+    map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
+    {
+        // sort the ranksOrder vector by value
+        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
+            return a.second > b.second;
+            });
+        //Assign ranks to  values and if they are the same they share the same averaged rank
+        map<string, float> ranks;
+        for (int i = 0; i < ranksOrder.size(); i++) {
+            ranks[ranksOrder[i].first] = i + 1.0;
+        }
+        int i = 0;
+        while (i < static_cast<int>(ranksOrder.size())) {
+            int j = i + 1;
+            int sumRanks = ranks[ranksOrder[i].first];
+            while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
+                sumRanks += ranks[ranksOrder[j++].first];
+            }
+            if (j > i + 1) {
+                float averageRank = (float)sumRanks / (j - i);
+                for (int k = i; k < j; k++) {
+                    ranks[ranksOrder[k].first] = averageRank;
+                }
+            }
+            i = j;
+        }
+        return ranks;
+    }
+    void Statistics::computeRanks()
+    {
+        map<string, float> ranksLine;
+        for (const auto& dataset : datasets) {
+            vector<pair<string, double>> ranksOrder;
+            for (const auto& model : models) {
+                double value = data[model].at(dataset).at(0).get<double>();
+                ranksOrder.push_back({ model, value });
+            }
+            // Assign the ranks
+            ranksLine = assignRanks(ranksOrder);
+            if (ranks.size() == 0) {
+                ranks = ranksLine;
+            } else {
+                for (const auto& rank : ranksLine) {
+                    ranks[rank.first] += rank.second;
+                }
+            }
+        }
+        // Average the ranks
+        for (const auto& rank : ranks) {
+            ranks[rank.first] /= nDatasets;
+        }
+    }
+    void Statistics::computeWTL()
+    {
+        // Compute the WTL matrix
+        for (int i = 0; i < nModels; ++i) {
+            wtl[i] = { 0, 0, 0 };
+        }
+        json origin = data.begin().value();
+        for (auto const& item : origin.items()) {
+            auto controlModel = models.at(controlIdx);
+            double controlValue = data[controlModel].at(item.key()).at(0).get<double>();
+            for (int i = 0; i < nModels; ++i) {
+                if (i == controlIdx) {
+                    continue;
+                }
+                double value = data[models[i]].at(item.key()).at(0).get<double>();
+                if (value < controlValue) {
+                    wtl[i].win++;
+                } else if (value == controlValue) {
+                    wtl[i].tie++;
+                } else {
+                    wtl[i].loss++;
+                }
+            }
+        }
+    }
+
+    void Statistics::postHocHolmTest(bool friedmanResult)
+    {
+        if (!fitted) {
+            fit();
+        }
+        // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
+        // Post-hoc Holm test
+        // Calculate the p-value for the models paired with the control model
+        map<int, double> stats; // p-value of each model paired with the control model
+        boost::math::normal dist(0.0, 1.0);
+        double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
+        for (int i = 0; i < nModels; i++) {
+            if (i == controlIdx) {
+                stats[i] = 0.0;
+                continue;
+            }
+            double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
+            double p_value = (long double)2 * (1 - cdf(dist, z));
+            stats[i] = p_value;
+        }
+        // Sort the models by p-value
+        vector<pair<int, double>> statsOrder;
+        for (const auto& stat : stats) {
+            statsOrder.push_back({ stat.first, stat.second });
+        }
+        sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) {
+            return a.second < b.second;
+            });
+
+        // Holm adjustment
+        for (int i = 0; i < statsOrder.size(); ++i) {
+            auto item = statsOrder.at(i);
+            double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
+            double p_value = min((double)1.0, item.second * (nModels - i));
+            p_value = max(before, p_value);
+            statsOrder[i] = { item.first, p_value };
+        }
+        auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
+        cout << color;
+        cout << "  *************************************************************************************************************" << endl;
+        cout << "  Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
+        cout << "  Control model: " << models[controlIdx] << endl;
+        cout << "  Model        p-value      rank      win tie loss Status" << endl;
+        cout << "  ============ ============ ========= === === ==== =============" << endl;
+        // sort ranks from lowest to highest
+        vector<pair<string, float>> ranksOrder;
+        for (const auto& rank : ranks) {
+            ranksOrder.push_back({ rank.first, rank.second });
+        }
+        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, float>& a, const pair<string, float>& b) {
+            return a.second < b.second;
+            });
+        for (const auto& item : ranksOrder) {
+            if (item.first == models.at(controlIdx)) {
+                continue;
+            }
+            auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
+            double pvalue = 0.0;
+            for (const auto& stat : statsOrder) {
+                if (stat.first == idx) {
+                    pvalue = stat.second;
+                }
+            }
+            auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
+            auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
+            auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
+            cout << "  " << colorStatus << left << setw(12) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
+            cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss;
+            cout << " " << status << textStatus << endl;
+        }
+        cout << color << "  *************************************************************************************************************" << endl;
+        cout << Colors::RESET();
+    }
+    bool Statistics::friedmanTest()
+    {
+        if (!fitted) {
+            fit();
+        }
+        // Friedman test
+        // Calculate the Friedman statistic
+        cout << Colors::BLUE() << endl;
+        cout << "***************************************************************************************************************" << endl;
+        cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
+        double degreesOfFreedom = nModels - 1.0;
+        double sumSquared = 0;
+        for (const auto& rank : ranks) {
+            sumSquared += pow(rank.second, 2);
+        }
+        // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
+        double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
+        cout << "Friedman statistic: " << friedmanQ << endl;
+        // Calculate the critical value
+        boost::math::chi_squared chiSquared(degreesOfFreedom);
+        long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
+        double criticalValue = quantile(chiSquared, 1 - significance);
+        std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
+            << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
+        cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
+        bool result;
+        if (p_value < significance) {
+            cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl;
+            result = true;
+        } else {
+            cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl;
+            result = false;
+        }
+        cout << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << endl;
+        return result;
+    }
+} // namespace platform
--- a/src/Platform/Statistics.h
+++ b/src/Platform/Statistics.h
@@ -0,0 +1,37 @@
+#ifndef STATISTICS_H
+#define STATISTICS_H
+#include <iostream>
+#include <vector>
+#include <nlohmann/json.hpp>
+
+using namespace std;
+using json = nlohmann::json;
+
+namespace platform {
+    struct WTL {
+        int win;
+        int tie;
+        int loss;
+    };
+    class Statistics {
+    public:
+        Statistics(vector<string>& models, vector<string>& datasets, json data, double significance = 0.05);
+        bool friedmanTest();
+        void postHocHolmTest(bool friedmanResult);
+    private:
+        void fit();
+        void computeRanks();
+        void computeWTL();
+        vector<string> models;
+        vector<string> datasets;
+        json data;
+        double significance;
+        bool fitted = false;
+        int nModels = 0;
+        int nDatasets = 0;
+        int controlIdx = 0;
+        map<int, WTL> wtl;
+        map<string, float> ranks;
+    };
+}
+#endif // !STATISTICS_H
--- a/src/Platform/Symbols.h
+++ b/src/Platform/Symbols.h
@@ -0,0 +1,18 @@
+#ifndef SYMBOLS_H
+#define SYMBOLS_H
+#include <string>
+using namespace std;
+namespace platform {
+    class Symbols {
+    public:
+        inline static const string check_mark{ "\u2714" };
+        inline static const string exclamation{ "\u2757" };
+        inline static const string black_star{ "\u2605" };
+        inline static const string cross{ "\u2717" };
+        inline static const string upward_arrow{ "\u27B6" };
+        inline static const string down_arrow{ "\u27B4" };
+        inline static const string equal_best{ check_mark };
+        inline static const string better_best{ black_star };
+    };
+}
+#endif // !SYMBOLS_H
--- a/src/Platform/best.cc
+++ b/src/Platform/best.cc
@@ -0,0 +1,71 @@
+#include <iostream>
+#include <argparse/argparse.hpp>
+#include "Paths.h"
+#include "BestResults.h"
+#include "Colors.h"
+
+using namespace std;
+
+argparse::ArgumentParser manageArguments(int argc, char** argv)
+{
+    argparse::ArgumentParser program("best");
+    program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
+    program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
+    program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
+    program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
+    program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
+    try {
+        program.parse_args(argc, argv);
+        auto model = program.get<string>("model");
+        auto score = program.get<string>("score");
+        auto build = program.get<bool>("build");
+        auto report = program.get<bool>("report");
+        auto friedman = program.get<bool>("friedman");
+        if (model == "" || score == "") {
+            throw runtime_error("Model and score name must be supplied");
+        }
+    }
+    catch (const exception& err) {
+        cerr << err.what() << endl;
+        cerr << program;
+        exit(1);
+    }
+    return program;
+}
+
+int main(int argc, char** argv)
+{
+    auto program = manageArguments(argc, argv);
+    auto model = program.get<string>("model");
+    auto score = program.get<string>("score");
+    auto build = program.get<bool>("build");
+    auto report = program.get<bool>("report");
+    auto friedman = program.get<bool>("friedman");
+    if (friedman && model != "any") {
+        cerr << "Friedman test can only be used with all models" << endl;
+        cerr << program;
+        exit(1);
+    }
+    if (!report && !build) {
+        cerr << "Either build, report or both, have to be selected to do anything!" << endl;
+        cerr << program;
+        exit(1);
+    }
+    auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
+    if (build) {
+        if (model == "any") {
+            results.buildAll();
+        } else {
+            string fileName = results.build();
+            cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl;
+        }
+    }
+    if (report) {
+        if (model == "any") {
+            results.reportAll();
+        } else {
+            results.reportSingle();
+        }
+    }
+    return 0;
+}
--- a/src/Platform/platformUtils.h
+++ b/src/Platform/platformUtils.h
@@ -8,7 +8,6 @@
 #include "ArffFiles.h"
 #include "CPPFImdlp.h"
 using namespace std;
-const string PATH = "../../data/";

 bool file_exists(const std::string& name);
 vector<string> split(const string& text, char delimiter);
Author	SHA1	Message	Date
Ricardo Montañana	926de2bebd	Add boost info to README	2023-09-28 09:44:33 +02:00
Ricardo Montañana	71704e3547	Enhance output info in Statistics	2023-09-28 01:27:18 +02:00
Ricardo Montañana	3b06534327	Remove duplicated code in BestResults	2023-09-28 00:59:34 +02:00
Ricardo Montañana	ac89a451e3	Duplicate statistics tests in class	2023-09-28 00:45:15 +02:00
Ricardo Montañana	00c6cf663b	Fix order of output in posthoc	2023-09-27 19:11:47 +02:00
Ricardo Montañana	5043c12be8	Complete posthoc with Holm adjust	2023-09-27 18:34:16 +02:00
Ricardo Montañana	11320e2cc7	Complete friedman test as in exreport	2023-09-27 12:36:03 +02:00
Ricardo Montañana	ce66483b65	Update boost version requirement for Linux	2023-09-26 14:12:53 +02:00
Ricardo Montañana	cab8e14b2d	Add friedman hyperparameter	2023-09-26 11:26:59 +02:00
Ricardo Montañana	f0d0abe891	Add boost library link to linux build	2023-09-26 01:07:50 +02:00
Ricardo Montañana	dcba146e12	Begin adding Friedman test to BestResults	2023-09-26 01:04:59 +02:00
Ricardo Montañana	3ea0285119	Fix ranks to match friedman test ranks	2023-09-25 18:38:12 +02:00
Ricardo Montañana Gómez	e3888e1503	Merge pull request 'bestResults' (#9 ) from bestResults into main Reviewed-on: https://gitea.rmontanana.es:3000/rmontanana/BayesNet/pulls/9 Add best results management, build, report, build all & report all	2023-09-25 12:02:17 +00:00
Ricardo Montañana	06de13df98	Add date/time to header of report best	2023-09-25 10:04:53 +02:00
Ricardo Montañana	de4fa6a04f	Add color to totals	2023-09-23 10:30:39 +02:00
Ricardo Montañana	3a7bf4e672	Fix ranking order mistake	2023-09-23 01:33:23 +02:00
Ricardo Montañana	cd0bc02a74	Add report/build all with totals and ranks	2023-09-23 01:14:02 +02:00
Ricardo Montañana	c8597a794e	Begin report all models	2023-09-22 18:13:32 +02:00
Ricardo Montañana	b30416364d	Fix mistake in best results file name	2023-09-22 14:14:39 +02:00
Ricardo Montañana	3a16589220	Add best config for debug in vscode	2023-09-22 01:04:36 +02:00
Ricardo Montañana	c4f9187e2a	Complete best build and report	2023-09-22 01:03:55 +02:00
Ricardo Montañana	c4d0a5b4e6	Split Result from Results	2023-09-21 23:30:17 +02:00
Ricardo Montañana	7bfafe555f	Begin BestResults build	2023-09-21 23:04:11 +02:00
Ricardo Montañana	337b6f7e79	Rename BestResult to BestScore	2023-09-21 19:30:07 +02:00
Ricardo Montañana	5fa0b957dd	Fix mistake in idx range in manage	2023-09-20 19:12:07 +02:00
Ricardo Montañana	67252fc41d	Fix CMakeLists libxlsxwriter for Linux	2023-09-20 19:02:53 +02:00
Ricardo Montañana	94ae9456a0	Fix libxslxwriter linking problem	2023-09-20 18:50:11 +02:00
Ricardo Montañana	781993e326	Resolve some warnings	2023-09-20 17:54:15 +02:00
Ricardo Montañana	8257a6ae39	Add message of not exist Best Results	2023-09-20 13:50:34 +02:00
Ricardo Montañana Gómez	fc81730dfc	Merge pull request 'Exchange OpenXLSX to libxlsxwriter' (#8 ) from libxlsxwriter into main Add multiple sheets to excel file Add format and color to sheets Add comparison with ZeroR Add comparison with Best Results Separate contextual menu from general in manage	2023-09-20 11:17:16 +00:00