Add boost info to README

Enhance output info in Statistics
Remove duplicated code in BestResults
2023-09-28 09:44:33 +02:00 · 2023-09-28 01:27:18 +02:00 · 2023-09-28 00:59:34 +02:00 · 2023-09-28 00:45:15 +02:00 · 2023-09-27 19:11:47 +02:00 · 2023-09-27 18:34:16 +02:00
15 changed files with 678 additions and 109 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -37,6 +37,20 @@
            ],
            "cwd": "/Users/rmontanana/Code/discretizbench",
        },
+        {
+            "type": "lldb",
+            "request": "launch",
+            "name": "best",
+            "program": "${workspaceFolder}/build/src/Platform/best",
+            "args": [
+                "-m",
+                "BoostAODE",
+                "-s",
+                "accuracy",
+                "--build",
+            ],
+            "cwd": "/Users/rmontanana/Code/discretizbench",
+        },
        {
            "type": "lldb",
            "request": "launch",
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 option(ENABLE_CLANG_TIDY "Enable to add clang tidy."              OFF)
 option(ENABLE_TESTING "Unit testing build"                        OFF)
 option(CODE_COVERAGE "Collect coverage from test library"         OFF)
+
+# Boost Library
+set(Boost_USE_STATIC_LIBS OFF) 
+set(Boost_USE_MULTITHREADED ON)  
+set(Boost_USE_STATIC_RUNTIME OFF) 
+find_package(Boost 1.78.0 REQUIRED) 
+if(Boost_FOUND)
+    message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
+    include_directories(${Boost_INCLUDE_DIRS}) 
+endif()
+
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
 # CMakes modules
 # --------------
--- a/README.md
+++ b/README.md
@@ -4,10 +4,14 @@ Bayesian Network Classifier with libtorch from scratch

 ## 0. Setup

-### libxlswriter
-
 Before compiling BayesNet.

+### boost library
+
+[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
+
+### libxlswriter
+
 ```bash
 cd lib/libxlsxwriter
 make
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -1,18 +1,37 @@
 #include <filesystem>
 #include <fstream>
 #include <iostream>
-#include "platformUtils.h"
+#include <sstream>
 #include "BestResults.h"
-#include "Results.h"
+#include "Result.h"
 #include "Colors.h"
+#include "Statistics.h"

+
+
+namespace fs = std::filesystem;
+// function ftime_to_string, Code taken from 
+// https://stackoverflow.com/a/58237530/1389271
+template <typename TP>
+std::string ftime_to_string(TP tp)
+{
+    using namespace std::chrono;
+    auto sctp = time_point_cast<system_clock::duration>(tp - TP::clock::now()
+        + system_clock::now());
+    auto tt = system_clock::to_time_t(sctp);
+    std::tm* gmt = std::gmtime(&tt);
+    std::stringstream buffer;
+    buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
+    return buffer.str();
+}
 namespace platform {

-    void BestResults::build()
+    string BestResults::build()
    {
-        auto files = loadFiles();
+        auto files = loadResultFiles();
        if (files.size() == 0) {
-            throw runtime_error("No result files were found!");
+            cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
+            exit(1);
        }
        json bests;
        for (const auto& file : files) {
@@ -21,7 +40,7 @@ namespace platform {
            for (auto const& item : data.at("results")) {
                bool update = false;
                if (bests.contains(item.at("dataset").get<string>())) {
-                    if (item.at("score").get<double>() > bests["dataset"].at(0).get<double>()) {
+                    if (item.at("score").get<double>() > bests[item.at("dataset").get<string>()].at(0).get<double>()) {
                        update = true;
                    }
                } else {
@@ -32,13 +51,15 @@ namespace platform {
                }
            }
        }
-        string bestFileName = path + "/" + bestResultFile();
-        if (file_exists(bestFileName)) {
-            cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET();
+        string bestFileName = path + bestResultFile();
+        if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
+            fclose(fileTest);
+            cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl;
        }
        ofstream file(bestFileName);
        file << bests;
        file.close();
+        return bestFileName;
    }

    string BestResults::bestResultFile()
@@ -46,23 +67,226 @@ namespace platform {
        return "best_results_" + score + "_" + model + ".json";
    }

-    vector<string> BestResults::loadFiles()
+    pair<string, string> getModelScore(string name)
+    {
+        // results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
+        int i = 0;
+        auto pos = name.find("_");
+        auto pos2 = name.find("_", pos + 1);
+        string score = name.substr(pos + 1, pos2 - pos - 1);
+        pos = name.find("_", pos2 + 1);
+        string model = name.substr(pos2 + 1, pos - pos2 - 1);
+        return { model, score };
+    }
+
+    vector<string> BestResults::loadResultFiles()
    {
        vector<string> files;
        using std::filesystem::directory_iterator;
+        string fileModel, fileScore;
        for (const auto& file : directory_iterator(path)) {
            auto fileName = file.path().filename().string();
-            if (fileName.find(".json") != string::npos && fileName.find("results_") == 0
-                && fileName.find("_" + score + "_") != string::npos
-                && fileName.find("_" + model + "_") != string::npos) {
-                files.push_back(fileName);
+            if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) {
+                tie(fileModel, fileScore) = getModelScore(fileName);
+                if (score == fileScore && (model == fileModel || model == "any")) {
+                    files.push_back(fileName);
+                }
            }
        }
        return files;
    }

-    void BestResults::report()
+    json BestResults::loadFile(const string& fileName)
    {
+        ifstream resultData(fileName);
+        if (resultData.is_open()) {
+            json data = json::parse(resultData);
+            return data;
+        }
+        throw invalid_argument("Unable to open result file. [" + fileName + "]");
+    }
+    vector<string> BestResults::getModels()
+    {
+        set<string> models;
+        vector<string> result;
+        auto files = loadResultFiles();
+        if (files.size() == 0) {
+            cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
+            exit(1);
+        }
+        string fileModel, fileScore;
+        for (const auto& file : files) {
+            // extract the model from the file name
+            tie(fileModel, fileScore) = getModelScore(file);
+            // add the model to the vector of models
+            models.insert(fileModel);
+        }
+        result = vector<string>(models.begin(), models.end());
+        return result;
+    }

+    void BestResults::buildAll()
+    {
+        auto models = getModels();
+        for (const auto& model : models) {
+            cout << "Building best results for model: " << model << endl;
+            this->model = model;
+            build();
+        }
+        model = "any";
+    }
+
+    void BestResults::reportSingle()
+    {
+        string bestFileName = path + bestResultFile();
+        if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
+            fclose(fileTest);
+        } else {
+            cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
+            exit(1);
+        }
+        auto date = ftime_to_string(filesystem::last_write_time(bestFileName));
+        auto data = loadFile(bestFileName);
+        cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
+        cout << "--------------------------------------------------------" << endl;
+        cout << Colors::GREEN() << " #  Dataset                   Score       File                                                               Hyperparameters" << endl;
+        cout << "=== ========================= =========== ================================================================== ================================================= " << endl;
+        auto i = 0;
+        bool odd = true;
+        for (auto const& item : data.items()) {
+            auto color = odd ? Colors::BLUE() : Colors::CYAN();
+            cout << color << setw(3) << fixed << right << i++ << " ";
+            cout << setw(25) << left << item.key() << " ";
+            cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
+            cout << setw(66) << item.value().at(2).get<string>() << " ";
+            cout << item.value().at(1) << " ";
+            cout << endl;
+            odd = !odd;
+        }
+    }
+    json BestResults::buildTableResults(vector<string> models)
+    {
+        int numberOfDatasets = 0;
+        bool first = true;
+        json origin;
+        json table;
+        auto maxDate = filesystem::file_time_type::max();
+        for (const auto& model : models) {
+            this->model = model;
+            string bestFileName = path + bestResultFile();
+            if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
+                fclose(fileTest);
+            } else {
+                cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
+                exit(1);
+            }
+            auto dateWrite = filesystem::last_write_time(bestFileName);
+            if (dateWrite < maxDate) {
+                maxDate = dateWrite;
+            }
+            auto data = loadFile(bestFileName);
+            if (first) {
+                // Get the number of datasets of the first file and check that is the same for all the models
+                first = false;
+                numberOfDatasets = data.size();
+                origin = data;
+            } else {
+                if (numberOfDatasets != data.size()) {
+                    cerr << Colors::MAGENTA() << "The number of datasets in the best results files is not the same for all the models." << Colors::RESET() << endl;
+                    exit(1);
+                }
+            }
+            table[model] = data;
+        }
+        table["dateTable"] = ftime_to_string(maxDate);
+        return table;
+    }
+
+    void BestResults::printTableResults(vector<string> models, json table)
+    {
+        cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
+        cout << "------------------------------------------------" << endl;
+        cout << Colors::GREEN() << " #  Dataset                   ";
+        for (const auto& model : models) {
+            cout << setw(12) << left << model << " ";
+        }
+        cout << endl;
+        cout << "=== ========================= ";
+        for (const auto& model : models) {
+            cout << "============ ";
+        }
+        cout << endl;
+        auto i = 0;
+        bool odd = true;
+        map<string, double> totals;
+        int nDatasets = table.begin().value().size();
+        for (const auto& model : models) {
+            totals[model] = 0.0;
+        }
+        json origin = table.begin().value();
+        for (auto const& item : origin.items()) {
+            auto color = odd ? Colors::BLUE() : Colors::CYAN();
+            cout << color << setw(3) << fixed << right << i++ << " ";
+            cout << setw(25) << left << item.key() << " ";
+            double maxValue = 0;
+            // Find out the max value for this dataset
+            for (const auto& model : models) {
+                double value = table[model].at(item.key()).at(0).get<double>();
+                if (value > maxValue) {
+                    maxValue = value;
+                }
+            }
+            // Print the row with red colors on max values
+            for (const auto& model : models) {
+                string efectiveColor = color;
+                double value = table[model].at(item.key()).at(0).get<double>();
+                if (value == maxValue) {
+                    efectiveColor = Colors::RED();
+                }
+                totals[model] += value;
+                cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
+            }
+            cout << endl;
+            odd = !odd;
+        }
+        cout << Colors::GREEN() << "=== ========================= ";
+        for (const auto& model : models) {
+            cout << "============ ";
+        }
+        cout << endl;
+        cout << Colors::GREEN() << setw(30) << "    Totals...................";
+        double max = 0.0;
+        for (const auto& total : totals) {
+            if (total.second > max) {
+                max = total.second;
+            }
+        }
+        for (const auto& model : models) {
+            string efectiveColor = Colors::GREEN();
+            if (totals[model] == max) {
+                efectiveColor = Colors::RED();
+            }
+            cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
+        }
+        cout << endl;
+    }
+    void BestResults::reportAll()
+    {
+        auto models = getModels();
+        // Build the table of results
+        json table = buildTableResults(models);
+        // Print the table of results
+        printTableResults(models, table);
+        // Compute the Friedman test
+        if (friedman) {
+            vector<string> datasets;
+            for (const auto& dataset : table.begin().value().items()) {
+                datasets.push_back(dataset.key());
+            }
+            double significance = 0.05;
+            Statistics stats(models, datasets, table, significance);
+            auto result = stats.friedmanTest();
+            stats.postHocHolmTest(result);
+        }
    }
 }
--- a/src/Platform/BestResults.h
+++ b/src/Platform/BestResults.h
@@ -1,20 +1,29 @@
 #ifndef BESTRESULTS_H
 #define BESTRESULTS_H
 #include <string>
+#include <set>
+#include <nlohmann/json.hpp>
 using namespace std;
-
+using json = nlohmann::json;
 namespace platform {
    class BestResults {
    public:
-        explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {}
-        void build();
-        void report();
+        explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
+        string build();
+        void reportSingle();
+        void reportAll();
+        void buildAll();
    private:
-        vector<string> loadFiles();
+        vector<string> getModels();
+        vector<string> loadResultFiles();
+        json buildTableResults(vector<string> models);
+        void printTableResults(vector<string> models, json table);
        string bestResultFile();
+        json loadFile(const string& fileName);
        string path;
        string score;
        string model;
+        bool friedman;
    };
 }
 #endif //BESTRESULTS_H
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -6,15 +6,15 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
 include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
 include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
 add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
-add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
+add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
 add_executable(list list.cc platformUtils Datasets.cc)
-add_executable(best best.cc BestResults.cc Results.cc ReportBase.cc ReportExcel.cc platformUtils.cc)
+add_executable(best best.cc BestResults.cc Result.cc Statistics.cc)
 target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
 if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
    target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
-    target_link_libraries(best "${TORCH_LIBRARIES}" libxlsxwriter.so stdc++fs)
+    target_link_libraries(best Boost::boost stdc++fs)
 else()
    target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
-    target_link_libraries(best "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}")
+    target_link_libraries(best Boost::boost)
 endif()
 target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")
--- a/src/Platform/ReportBase.h
+++ b/src/Platform/ReportBase.h
@@ -3,22 +3,13 @@
 #include <string>
 #include <iostream>
 #include "Paths.h"
+#include "Symbols.h"
 #include <nlohmann/json.hpp>

 using json = nlohmann::json;
 namespace platform {
    using namespace std;
-    class Symbols {
-    public:
-        inline static const string check_mark{ "\u2714" };
-        inline static const string exclamation{ "\u2757" };
-        inline static const string black_star{ "\u2605" };
-        inline static const string cross{ "\u2717" };
-        inline static const string upward_arrow{ "\u27B6" };
-        inline static const string down_arrow{ "\u27B4" };
-        inline static const string equal_best{ check_mark };
-        inline static const string better_best{ black_star };
-    };
+
    class ReportBase {
    public:
        explicit ReportBase(json data_, bool compare);
--- a/src/Platform/Result.cc
+++ b/src/Platform/Result.cc
@@ -0,0 +1,51 @@
+#include <filesystem>
+#include <fstream>
+#include <sstream>
+#include "Result.h"
+#include "Colors.h"
+#include "BestScore.h"
+namespace platform {
+    Result::Result(const string& path, const string& filename)
+        : path(path)
+        , filename(filename)
+    {
+        auto data = load();
+        date = data["date"];
+        score = 0;
+        for (const auto& result : data["results"]) {
+            score += result["score"].get<double>();
+        }
+        scoreName = data["score_name"];
+        if (scoreName == BestScore::scoreName()) {
+            score /= BestScore::score();
+        }
+        title = data["title"];
+        duration = data["duration"];
+        model = data["model"];
+        complete = data["results"].size() > 1;
+    }
+
+    json Result::load() const
+    {
+        ifstream resultData(path + "/" + filename);
+        if (resultData.is_open()) {
+            json data = json::parse(resultData);
+            return data;
+        }
+        throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
+    }
+
+    string Result::to_string() const
+    {
+        stringstream oss;
+        oss << date << " ";
+        oss << setw(12) << left << model << " ";
+        oss << setw(11) << left << scoreName << " ";
+        oss << right << setw(11) << setprecision(7) << fixed << score << " ";
+        auto completeString = isComplete() ? "C" : "P";
+        oss << setw(1) << " " << completeString << "  ";
+        oss << setw(9) << setprecision(3) << fixed << duration << " ";
+        oss << setw(50) << left << title << " ";
+        return  oss.str();
+    }
+}
--- a/src/Platform/Result.h
+++ b/src/Platform/Result.h
@@ -0,0 +1,37 @@
+#ifndef RESULT_H
+#define RESULT_H
+#include <map>
+#include <vector>
+#include <string>
+#include <nlohmann/json.hpp>
+namespace platform {
+    using namespace std;
+    using json = nlohmann::json;
+
+    class Result {
+    public:
+        Result(const string& path, const string& filename);
+        json load() const;
+        string to_string() const;
+        string getFilename() const { return filename; };
+        string getDate() const { return date; };
+        double getScore() const { return score; };
+        string getTitle() const { return title; };
+        double getDuration() const { return duration; };
+        string getModel() const { return model; };
+        string getScoreName() const { return scoreName; };
+        bool isComplete() const { return complete; };
+    private:
+        string path;
+        string filename;
+        string date;
+        double score;
+        string title;
+        double duration;
+        string model;
+        string scoreName;
+        bool complete;
+    };
+};
+
+#endif
--- a/src/Platform/Results.cc
+++ b/src/Platform/Results.cc
@@ -6,34 +6,6 @@
 #include "BestScore.h"
 #include "Colors.h"
 namespace platform {
-    Result::Result(const string& path, const string& filename)
-        : path(path)
-        , filename(filename)
-    {
-        auto data = load();
-        date = data["date"];
-        score = 0;
-        for (const auto& result : data["results"]) {
-            score += result["score"].get<double>();
-        }
-        scoreName = data["score_name"];
-        if (scoreName == BestScore::scoreName()) {
-            score /= BestScore::score();
-        }
-        title = data["title"];
-        duration = data["duration"];
-        model = data["model"];
-        complete = data["results"].size() > 1;
-    }
-    json Result::load() const
-    {
-        ifstream resultData(path + "/" + filename);
-        if (resultData.is_open()) {
-            json data = json::parse(resultData);
-            return data;
-        }
-        throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
-    }
    void Results::load()
    {
        using std::filesystem::directory_iterator;
@@ -52,19 +24,6 @@ namespace platform {
            max = files.size();
        }
    }
-    string Result::to_string() const
-    {
-        stringstream oss;
-        oss << date << " ";
-        oss << setw(12) << left << model << " ";
-        oss << setw(11) << left << scoreName << " ";
-        oss << right << setw(11) << setprecision(7) << fixed << score << " ";
-        auto completeString = isComplete() ? "C" : "P";
-        oss << setw(1) << " " << completeString << "  ";
-        oss << setw(9) << setprecision(3) << fixed << duration << " ";
-        oss << setw(50) << left << title << " ";
-        return  oss.str();
-    }
    void Results::show() const
    {
        cout << Colors::GREEN() << "Results found: " << files.size() << endl;
--- a/src/Platform/Results.h
+++ b/src/Platform/Results.h
@@ -5,34 +5,11 @@
 #include <vector>
 #include <string>
 #include <nlohmann/json.hpp>
+#include "Result.h"
 namespace platform {
    using namespace std;
    using json = nlohmann::json;

-    class Result {
-    public:
-        Result(const string& path, const string& filename);
-        json load() const;
-        string to_string() const;
-        string getFilename() const { return filename; };
-        string getDate() const { return date; };
-        double getScore() const { return score; };
-        string getTitle() const { return title; };
-        double getDuration() const { return duration; };
-        string getModel() const { return model; };
-        string getScoreName() const { return scoreName; };
-        bool isComplete() const { return complete; };
-    private:
-        string path;
-        string filename;
-        string date;
-        double score;
-        string title;
-        double duration;
-        string model;
-        string scoreName;
-        bool complete;
-    };
    class Results {
    public:
        Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) :
--- a/src/Platform/Statistics.cc
+++ b/src/Platform/Statistics.cc
@@ -0,0 +1,215 @@
+#include "Statistics.h"
+#include "Colors.h"
+#include "Symbols.h"
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/distributions/normal.hpp>
+
+namespace platform {
+
+    Statistics::Statistics(vector<string>& models, vector<string>& datasets, json data, double significance) : models(models), datasets(datasets), data(data), significance(significance)
+    {
+        nModels = models.size();
+        nDatasets = datasets.size();
+    };
+
+    void Statistics::fit()
+    {
+        if (nModels < 3 || nDatasets < 3) {
+            cerr << "nModels: " << nModels << endl;
+            cerr << "nDatasets: " << nDatasets << endl;
+            throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
+        }
+        computeRanks();
+        // Set the control model as the one with the lowest average rank
+        controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
+        computeWTL();
+        fitted = true;
+    }
+    map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
+    {
+        // sort the ranksOrder vector by value
+        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
+            return a.second > b.second;
+            });
+        //Assign ranks to  values and if they are the same they share the same averaged rank
+        map<string, float> ranks;
+        for (int i = 0; i < ranksOrder.size(); i++) {
+            ranks[ranksOrder[i].first] = i + 1.0;
+        }
+        int i = 0;
+        while (i < static_cast<int>(ranksOrder.size())) {
+            int j = i + 1;
+            int sumRanks = ranks[ranksOrder[i].first];
+            while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
+                sumRanks += ranks[ranksOrder[j++].first];
+            }
+            if (j > i + 1) {
+                float averageRank = (float)sumRanks / (j - i);
+                for (int k = i; k < j; k++) {
+                    ranks[ranksOrder[k].first] = averageRank;
+                }
+            }
+            i = j;
+        }
+        return ranks;
+    }
+    void Statistics::computeRanks()
+    {
+        map<string, float> ranksLine;
+        for (const auto& dataset : datasets) {
+            vector<pair<string, double>> ranksOrder;
+            for (const auto& model : models) {
+                double value = data[model].at(dataset).at(0).get<double>();
+                ranksOrder.push_back({ model, value });
+            }
+            // Assign the ranks
+            ranksLine = assignRanks(ranksOrder);
+            if (ranks.size() == 0) {
+                ranks = ranksLine;
+            } else {
+                for (const auto& rank : ranksLine) {
+                    ranks[rank.first] += rank.second;
+                }
+            }
+        }
+        // Average the ranks
+        for (const auto& rank : ranks) {
+            ranks[rank.first] /= nDatasets;
+        }
+    }
+    void Statistics::computeWTL()
+    {
+        // Compute the WTL matrix
+        for (int i = 0; i < nModels; ++i) {
+            wtl[i] = { 0, 0, 0 };
+        }
+        json origin = data.begin().value();
+        for (auto const& item : origin.items()) {
+            auto controlModel = models.at(controlIdx);
+            double controlValue = data[controlModel].at(item.key()).at(0).get<double>();
+            for (int i = 0; i < nModels; ++i) {
+                if (i == controlIdx) {
+                    continue;
+                }
+                double value = data[models[i]].at(item.key()).at(0).get<double>();
+                if (value < controlValue) {
+                    wtl[i].win++;
+                } else if (value == controlValue) {
+                    wtl[i].tie++;
+                } else {
+                    wtl[i].loss++;
+                }
+            }
+        }
+    }
+
+    void Statistics::postHocHolmTest(bool friedmanResult)
+    {
+        if (!fitted) {
+            fit();
+        }
+        // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
+        // Post-hoc Holm test
+        // Calculate the p-value for the models paired with the control model
+        map<int, double> stats; // p-value of each model paired with the control model
+        boost::math::normal dist(0.0, 1.0);
+        double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
+        for (int i = 0; i < nModels; i++) {
+            if (i == controlIdx) {
+                stats[i] = 0.0;
+                continue;
+            }
+            double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
+            double p_value = (long double)2 * (1 - cdf(dist, z));
+            stats[i] = p_value;
+        }
+        // Sort the models by p-value
+        vector<pair<int, double>> statsOrder;
+        for (const auto& stat : stats) {
+            statsOrder.push_back({ stat.first, stat.second });
+        }
+        sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) {
+            return a.second < b.second;
+            });
+
+        // Holm adjustment
+        for (int i = 0; i < statsOrder.size(); ++i) {
+            auto item = statsOrder.at(i);
+            double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
+            double p_value = min((double)1.0, item.second * (nModels - i));
+            p_value = max(before, p_value);
+            statsOrder[i] = { item.first, p_value };
+        }
+        auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
+        cout << color;
+        cout << "  *************************************************************************************************************" << endl;
+        cout << "  Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
+        cout << "  Control model: " << models[controlIdx] << endl;
+        cout << "  Model        p-value      rank      win tie loss Status" << endl;
+        cout << "  ============ ============ ========= === === ==== =============" << endl;
+        // sort ranks from lowest to highest
+        vector<pair<string, float>> ranksOrder;
+        for (const auto& rank : ranks) {
+            ranksOrder.push_back({ rank.first, rank.second });
+        }
+        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, float>& a, const pair<string, float>& b) {
+            return a.second < b.second;
+            });
+        for (const auto& item : ranksOrder) {
+            if (item.first == models.at(controlIdx)) {
+                continue;
+            }
+            auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
+            double pvalue = 0.0;
+            for (const auto& stat : statsOrder) {
+                if (stat.first == idx) {
+                    pvalue = stat.second;
+                }
+            }
+            auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
+            auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
+            auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
+            cout << "  " << colorStatus << left << setw(12) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
+            cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss;
+            cout << " " << status << textStatus << endl;
+        }
+        cout << color << "  *************************************************************************************************************" << endl;
+        cout << Colors::RESET();
+    }
+    bool Statistics::friedmanTest()
+    {
+        if (!fitted) {
+            fit();
+        }
+        // Friedman test
+        // Calculate the Friedman statistic
+        cout << Colors::BLUE() << endl;
+        cout << "***************************************************************************************************************" << endl;
+        cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
+        double degreesOfFreedom = nModels - 1.0;
+        double sumSquared = 0;
+        for (const auto& rank : ranks) {
+            sumSquared += pow(rank.second, 2);
+        }
+        // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
+        double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
+        cout << "Friedman statistic: " << friedmanQ << endl;
+        // Calculate the critical value
+        boost::math::chi_squared chiSquared(degreesOfFreedom);
+        long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
+        double criticalValue = quantile(chiSquared, 1 - significance);
+        std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
+            << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
+        cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
+        bool result;
+        if (p_value < significance) {
+            cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl;
+            result = true;
+        } else {
+            cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl;
+            result = false;
+        }
+        cout << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << endl;
+        return result;
+    }
+} // namespace platform
--- a/src/Platform/Statistics.h
+++ b/src/Platform/Statistics.h
@@ -0,0 +1,37 @@
+#ifndef STATISTICS_H
+#define STATISTICS_H
+#include <iostream>
+#include <vector>
+#include <nlohmann/json.hpp>
+
+using namespace std;
+using json = nlohmann::json;
+
+namespace platform {
+    struct WTL {
+        int win;
+        int tie;
+        int loss;
+    };
+    class Statistics {
+    public:
+        Statistics(vector<string>& models, vector<string>& datasets, json data, double significance = 0.05);
+        bool friedmanTest();
+        void postHocHolmTest(bool friedmanResult);
+    private:
+        void fit();
+        void computeRanks();
+        void computeWTL();
+        vector<string> models;
+        vector<string> datasets;
+        json data;
+        double significance;
+        bool fitted = false;
+        int nModels = 0;
+        int nDatasets = 0;
+        int controlIdx = 0;
+        map<int, WTL> wtl;
+        map<string, float> ranks;
+    };
+}
+#endif // !STATISTICS_H
--- a/src/Platform/Symbols.h
+++ b/src/Platform/Symbols.h
@@ -0,0 +1,18 @@
+#ifndef SYMBOLS_H
+#define SYMBOLS_H
+#include <string>
+using namespace std;
+namespace platform {
+    class Symbols {
+    public:
+        inline static const string check_mark{ "\u2714" };
+        inline static const string exclamation{ "\u2757" };
+        inline static const string black_star{ "\u2605" };
+        inline static const string cross{ "\u2717" };
+        inline static const string upward_arrow{ "\u27B6" };
+        inline static const string down_arrow{ "\u27B4" };
+        inline static const string equal_best{ check_mark };
+        inline static const string better_best{ black_star };
+    };
+}
+#endif // !SYMBOLS_H
--- a/src/Platform/best.cc
+++ b/src/Platform/best.cc
@@ -2,22 +2,28 @@
 #include <argparse/argparse.hpp>
 #include "Paths.h"
 #include "BestResults.h"
+#include "Colors.h"

 using namespace std;

 argparse::ArgumentParser manageArguments(int argc, char** argv)
 {
    argparse::ArgumentParser program("best");
-    program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
-    program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
+    program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
+    program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
    program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
    program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
+    program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
    try {
        program.parse_args(argc, argv);
        auto model = program.get<string>("model");
        auto score = program.get<string>("score");
        auto build = program.get<bool>("build");
        auto report = program.get<bool>("report");
+        auto friedman = program.get<bool>("friedman");
+        if (model == "" || score == "") {
+            throw runtime_error("Model and score name must be supplied");
+        }
    }
    catch (const exception& err) {
        cerr << err.what() << endl;
@@ -34,16 +40,32 @@ int main(int argc, char** argv)
    auto score = program.get<string>("score");
    auto build = program.get<bool>("build");
    auto report = program.get<bool>("report");
-    if (!report && !build) {
-        cout << "Either build, report or both, have to be selected to do anything!" << endl;
+    auto friedman = program.get<bool>("friedman");
+    if (friedman && model != "any") {
+        cerr << "Friedman test can only be used with all models" << endl;
+        cerr << program;
        exit(1);
    }
-    auto results = platform::BestResults(platform::Paths::results(), model, score);
+    if (!report && !build) {
+        cerr << "Either build, report or both, have to be selected to do anything!" << endl;
+        cerr << program;
+        exit(1);
+    }
+    auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
    if (build) {
-        results.build();
+        if (model == "any") {
+            results.buildAll();
+        } else {
+            string fileName = results.build();
+            cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl;
+        }
    }
    if (report) {
-        results.report();
+        if (model == "any") {
+            results.reportAll();
+        } else {
+            results.reportSingle();
+        }
    }
    return 0;
 }
Author	SHA1	Message	Date
Ricardo Montañana	926de2bebd	Add boost info to README	2023-09-28 09:44:33 +02:00
Ricardo Montañana	71704e3547	Enhance output info in Statistics	2023-09-28 01:27:18 +02:00
Ricardo Montañana	3b06534327	Remove duplicated code in BestResults	2023-09-28 00:59:34 +02:00
Ricardo Montañana	ac89a451e3	Duplicate statistics tests in class	2023-09-28 00:45:15 +02:00
Ricardo Montañana	00c6cf663b	Fix order of output in posthoc	2023-09-27 19:11:47 +02:00
Ricardo Montañana	5043c12be8	Complete posthoc with Holm adjust	2023-09-27 18:34:16 +02:00
Ricardo Montañana	11320e2cc7	Complete friedman test as in exreport	2023-09-27 12:36:03 +02:00
Ricardo Montañana	ce66483b65	Update boost version requirement for Linux	2023-09-26 14:12:53 +02:00
Ricardo Montañana	cab8e14b2d	Add friedman hyperparameter	2023-09-26 11:26:59 +02:00
Ricardo Montañana	f0d0abe891	Add boost library link to linux build	2023-09-26 01:07:50 +02:00
Ricardo Montañana	dcba146e12	Begin adding Friedman test to BestResults	2023-09-26 01:04:59 +02:00
Ricardo Montañana	3ea0285119	Fix ranks to match friedman test ranks	2023-09-25 18:38:12 +02:00
Ricardo Montañana Gómez	e3888e1503	Merge pull request 'bestResults' (#9 ) from bestResults into main Reviewed-on: https://gitea.rmontanana.es:3000/rmontanana/BayesNet/pulls/9 Add best results management, build, report, build all & report all	2023-09-25 12:02:17 +00:00
Ricardo Montañana	06de13df98	Add date/time to header of report best	2023-09-25 10:04:53 +02:00
Ricardo Montañana	de4fa6a04f	Add color to totals	2023-09-23 10:30:39 +02:00
Ricardo Montañana	3a7bf4e672	Fix ranking order mistake	2023-09-23 01:33:23 +02:00
Ricardo Montañana	cd0bc02a74	Add report/build all with totals and ranks	2023-09-23 01:14:02 +02:00
Ricardo Montañana	c8597a794e	Begin report all models	2023-09-22 18:13:32 +02:00
Ricardo Montañana	b30416364d	Fix mistake in best results file name	2023-09-22 14:14:39 +02:00
Ricardo Montañana	3a16589220	Add best config for debug in vscode	2023-09-22 01:04:36 +02:00
Ricardo Montañana	c4f9187e2a	Complete best build and report	2023-09-22 01:03:55 +02:00
Ricardo Montañana	c4d0a5b4e6	Split Result from Results	2023-09-21 23:30:17 +02:00