From dcba146e12d3ff61e3d324a9251d02666bca35f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Tue, 26 Sep 2023 01:04:59 +0200
Subject: [PATCH 01/11] Begin adding Friedman test to BestResults

---
 CMakeLists.txt              | 11 ++++++
 src/Platform/BestResults.cc | 68 ++++++++++++++++++++++++++++++++++---
 src/Platform/CMakeLists.txt |  1 +
 3 files changed, 75 insertions(+), 5 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3481dec..9eafe6e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 option(ENABLE_CLANG_TIDY "Enable to add clang tidy."              OFF)
 option(ENABLE_TESTING "Unit testing build"                        OFF)
 option(CODE_COVERAGE "Collect coverage from test library"         OFF)
+
+# Boost Library
+set(Boost_USE_STATIC_LIBS OFF) 
+set(Boost_USE_MULTITHREADED ON)  
+set(Boost_USE_STATIC_RUNTIME OFF) 
+find_package(Boost 1.81.0 REQUIRED) 
+if(Boost_FOUND)
+    message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
+    include_directories(${Boost_INCLUDE_DIRS}) 
+endif()
+
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
 # CMakes modules
 # --------------
diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc
index 6d8505e..232bce8 100644
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -6,7 +6,7 @@
 #include "BestResults.h"
 #include "Result.h"
 #include "Colors.h"
-
+#include <boost/math/distributions/chi_squared.hpp>
 
 
 namespace fs = std::filesystem;
@@ -228,6 +228,45 @@ namespace platform {
         }
         return ranks;
     }
+    void friedmanTest(int nModels, int nDatasets, map<string, float> ranks, double significance = 0.05)
+    {
+        // Friedman test
+        // Calculate the Friedman statistic
+        double sum = 0.0;
+        if (nModels < 3 || nDatasets < 3) {
+            cout << "Can't make the Friedman test with less than 3 models and/or less than 3 datasets." << endl;
+            return;
+        }
+        cout << Colors::BLUE() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << endl;
+        cout << "N datasets: " << nDatasets << endl;
+        cout << "N models: " << nModels << endl;
+        cout << "Significance: " << significance << endl;
+        cout << "Nº Ranks: " << ranks.size() << endl;
+        for (const auto& rank : ranks) {
+            sum += rank.second;
+        }
+        double degreesOfFreedom = nModels - 1.0;
+        double sumSquared = 0;
+        for (const auto& rank : ranks) {
+            sumSquared += rank.second * rank.second;
+        }
+        cout << "Sum Squared: " << sumSquared << endl;
+        cout << "Degrees of freedom: " << degreesOfFreedom << endl;
+        double friedman = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1);
+        cout << "Friedman statistic: " << friedman << endl;
+        // Calculate the critical value
+        boost::math::chi_squared chiSquared(degreesOfFreedom);
+        long double p_value = (long double)1.0 - cdf(chiSquared, friedman);
+        double criticalValue = quantile(chiSquared, 1 - significance);
+        std::cout << "Critical Chi-Square Value for df=" << degreesOfFreedom
+            << " and alpha=" << significance << ": " << criticalValue << std::endl;
+        cout << "p-value: " << scientific << p_value << endl;
+        if (friedman > criticalValue) {
+            cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl;
+        } else {
+            cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl;
+        }
+    }
     void BestResults::printTableResults(set<string> models, json table)
     {
         cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
@@ -245,6 +284,8 @@ namespace platform {
         auto i = 0;
         bool odd = true;
         map<string, double> totals;
+        map<string, float> ranks;
+        map<string, float> ranksTotal;
         for (const auto& model : models) {
             totals[model] = 0.0;
         }
@@ -264,7 +305,14 @@ namespace platform {
                 ranksOrder.push_back({ model, value });
             }
             // Assign the ranks
-            auto ranks = assignRanks(ranksOrder);
+            ranks = assignRanks(ranksOrder);
+            if (ranksTotal.size() == 0) {
+                ranksTotal = ranks;
+            } else {
+                for (const auto& rank : ranks) {
+                    ranksTotal[rank.first] += rank.second;
+                }
+            }
             // Print the row with red colors on max values
             for (const auto& model : models) {
                 string efectiveColor = color;
@@ -300,20 +348,30 @@ namespace platform {
         // Output the averaged ranks
         cout << endl;
         int min = 1;
-        for (const auto& rank : ranks) {
+        for (const auto& rank : ranksTotal) {
             if (rank.second < min) {
                 min = rank.second;
             }
         }
+        cout << Colors::BLUE() << setw(30) << "    Ranks....................";
+        for (const auto& model : models) {
+            string efectiveColor = Colors::BLUE();
+            if (ranksTotal[model] == min) {
+                efectiveColor = Colors::RED();
+            }
+            cout << efectiveColor << setw(12) << setprecision(4) << fixed << (double)ranksTotal[model] << " ";
+        }
+        cout << endl;
         cout << Colors::GREEN() << setw(30) << "    Averaged ranks...........";
         for (const auto& model : models) {
             string efectiveColor = Colors::GREEN();
-            if (ranks[model] == min) {
+            if (ranksTotal[model] == min) {
                 efectiveColor = Colors::RED();
             }
-            cout << efectiveColor << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " ";
+            cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] / (double)origin.size() << " ";
         }
         cout << endl;
+        friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05);
     }
     void BestResults::reportAll()
     {
diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt
index b40a311..eca6b72 100644
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -15,5 +15,6 @@ if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
     target_link_libraries(best stdc++fs)
 else()
     target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
+    target_link_libraries(best Boost::boost)
 endif()
 target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")
\ No newline at end of file
-- 
2.45.2


From f0d0abe8919c548a80c10458e3feaca67f82dcdc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Tue, 26 Sep 2023 01:07:50 +0200
Subject: [PATCH 02/11] Add boost library link to linux build

---
 src/Platform/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt
index eca6b72..db32852 100644
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -12,7 +12,7 @@ add_executable(best best.cc BestResults.cc Result.cc)
 target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
 if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
     target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
-    target_link_libraries(best stdc++fs)
+    target_link_libraries(best Boost::boost stdc++fs)
 else()
     target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
     target_link_libraries(best Boost::boost)
-- 
2.45.2


From cab8e14b2d09c6e06f2e03063ec43c49b8b3d4d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Tue, 26 Sep 2023 11:26:59 +0200
Subject: [PATCH 03/11] Add friedman hyperparameter

---
 src/Platform/BestResults.cc | 25 ++++++++++++-------------
 src/Platform/BestResults.h  |  3 ++-
 src/Platform/best.cc        | 10 +++++++++-
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc
index 232bce8..94ca802 100644
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -237,11 +237,9 @@ namespace platform {
             cout << "Can't make the Friedman test with less than 3 models and/or less than 3 datasets." << endl;
             return;
         }
-        cout << Colors::BLUE() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << endl;
-        cout << "N datasets: " << nDatasets << endl;
-        cout << "N models: " << nModels << endl;
-        cout << "Significance: " << significance << endl;
-        cout << "Nº Ranks: " << ranks.size() << endl;
+        cout << Colors::BLUE() << endl;
+        cout << "*************************************************************************************" << endl;
+        cout << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << endl;
         for (const auto& rank : ranks) {
             sum += rank.second;
         }
@@ -250,22 +248,21 @@ namespace platform {
         for (const auto& rank : ranks) {
             sumSquared += rank.second * rank.second;
         }
-        cout << "Sum Squared: " << sumSquared << endl;
-        cout << "Degrees of freedom: " << degreesOfFreedom << endl;
-        double friedman = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1);
-        cout << "Friedman statistic: " << friedman << endl;
+        double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1);
+        cout << "Friedman statistic: " << friedmanQ << endl;
         // Calculate the critical value
         boost::math::chi_squared chiSquared(degreesOfFreedom);
-        long double p_value = (long double)1.0 - cdf(chiSquared, friedman);
+        long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
         double criticalValue = quantile(chiSquared, 1 - significance);
-        std::cout << "Critical Chi-Square Value for df=" << degreesOfFreedom
+        std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
             << " and alpha=" << significance << ": " << criticalValue << std::endl;
         cout << "p-value: " << scientific << p_value << endl;
-        if (friedman > criticalValue) {
+        if (friedmanQ > criticalValue) {
             cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl;
         } else {
             cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl;
         }
+        cout << Colors::BLUE() << "*************************************************************************************" << endl;
     }
     void BestResults::printTableResults(set<string> models, json table)
     {
@@ -371,7 +368,9 @@ namespace platform {
             cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] / (double)origin.size() << " ";
         }
         cout << endl;
-        friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05);
+        if (friedman) {
+            friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05);
+        }
     }
     void BestResults::reportAll()
     {
diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h
index 3ba6b9d..5495222 100644
--- a/src/Platform/BestResults.h
+++ b/src/Platform/BestResults.h
@@ -8,7 +8,7 @@ using json = nlohmann::json;
 namespace platform {
     class BestResults {
     public:
-        explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {}
+        explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
         string build();
         void reportSingle();
         void reportAll();
@@ -23,6 +23,7 @@ namespace platform {
         string path;
         string score;
         string model;
+        bool friedman;
     };
 }
 #endif //BESTRESULTS_H
\ No newline at end of file
diff --git a/src/Platform/best.cc b/src/Platform/best.cc
index 6e6d432..dcd8d9b 100644
--- a/src/Platform/best.cc
+++ b/src/Platform/best.cc
@@ -13,12 +13,14 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
     program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
     program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
     program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
+    program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
     try {
         program.parse_args(argc, argv);
         auto model = program.get<string>("model");
         auto score = program.get<string>("score");
         auto build = program.get<bool>("build");
         auto report = program.get<bool>("report");
+        auto friedman = program.get<bool>("friedman");
         if (model == "" || score == "") {
             throw runtime_error("Model and score name must be supplied");
         }
@@ -38,12 +40,18 @@ int main(int argc, char** argv)
     auto score = program.get<string>("score");
     auto build = program.get<bool>("build");
     auto report = program.get<bool>("report");
+    auto friedman = program.get<bool>("friedman");
+    if (friedman && model != "any") {
+        cerr << "Friedman test can only be used with all models" << endl;
+        cerr << program;
+        exit(1);
+    }
     if (!report && !build) {
         cerr << "Either build, report or both, have to be selected to do anything!" << endl;
         cerr << program;
         exit(1);
     }
-    auto results = platform::BestResults(platform::Paths::results(), score, model);
+    auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
     if (build) {
         if (model == "any") {
             results.buildAll();
-- 
2.45.2


From ce66483b652a4fd36c7e7d40e5579beb8aa6d036 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Tue, 26 Sep 2023 14:12:53 +0200
Subject: [PATCH 04/11] Update boost version requirement for Linux

---
 CMakeLists.txt              | 2 +-
 src/Platform/BestResults.cc | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9eafe6e..eb77a77 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,7 +35,7 @@ option(CODE_COVERAGE "Collect coverage from test library"         OFF)
 set(Boost_USE_STATIC_LIBS OFF) 
 set(Boost_USE_MULTITHREADED ON)  
 set(Boost_USE_STATIC_RUNTIME OFF) 
-find_package(Boost 1.81.0 REQUIRED) 
+find_package(Boost 1.78.0 REQUIRED) 
 if(Boost_FOUND)
     message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
     include_directories(${Boost_INCLUDE_DIRS}) 
diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc
index 94ca802..673378c 100644
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -239,7 +239,7 @@ namespace platform {
         }
         cout << Colors::BLUE() << endl;
         cout << "*************************************************************************************" << endl;
-        cout << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << endl;
+        cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
         for (const auto& rank : ranks) {
             sum += rank.second;
         }
@@ -255,7 +255,7 @@ namespace platform {
         long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
         double criticalValue = quantile(chiSquared, 1 - significance);
         std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
-            << " and alpha=" << significance << ": " << criticalValue << std::endl;
+            << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
         cout << "p-value: " << scientific << p_value << endl;
         if (friedmanQ > criticalValue) {
             cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl;
-- 
2.45.2


From 11320e2cc74d274b21e9b39fcd9f33ed6fcf4b4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Wed, 27 Sep 2023 12:36:03 +0200
Subject: [PATCH 05/11] Complete friedman test as in exreport

---
 src/Platform/BestResults.cc | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc
index 673378c..b153085 100644
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -245,10 +245,18 @@ namespace platform {
         }
         double degreesOfFreedom = nModels - 1.0;
         double sumSquared = 0;
+        // For original Friedman test
+        // for (const auto& rank : ranks) {
+        //     sumSquared += rank.second * rank.second;
+        // }
         for (const auto& rank : ranks) {
-            sumSquared += rank.second * rank.second;
+            sumSquared += pow(rank.second / nDatasets, 2);
         }
-        double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1);
+        cout << "Sum of ranks: " << sum << endl;
+        cout << "Sum of squared ranks: " << sumSquared << endl;
+        // (original) double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1);
+        // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
+        double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
         cout << "Friedman statistic: " << friedmanQ << endl;
         // Calculate the critical value
         boost::math::chi_squared chiSquared(degreesOfFreedom);
@@ -257,7 +265,8 @@ namespace platform {
         std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
             << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
         cout << "p-value: " << scientific << p_value << endl;
-        if (friedmanQ > criticalValue) {
+        //if (friedmanQ > criticalValue) { (original)
+        if (p_value < significance) {
             cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl;
         } else {
             cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl;
@@ -318,7 +327,8 @@ namespace platform {
                     efectiveColor = Colors::RED();
                 }
                 totals[model] += value;
-                cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
+                // cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
+                cout << efectiveColor << setw(12) << setprecision(10) << fixed << ranks[model] << " ";
             }
             cout << endl;
             odd = !odd;
-- 
2.45.2


From 5043c12be86183918aa5f6bfe9990458e9d46190 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Wed, 27 Sep 2023 18:34:16 +0200
Subject: [PATCH 06/11] Complete posthoc with Holm adjust

---
 src/Platform/BestResults.cc | 139 ++++++++++++++++++++++++++++++------
 1 file changed, 118 insertions(+), 21 deletions(-)

diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc
index b153085..80f5136 100644
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -7,6 +7,8 @@
 #include "Result.h"
 #include "Colors.h"
 #include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/distributions/normal.hpp>
+
 
 
 namespace fs = std::filesystem;
@@ -24,6 +26,11 @@ std::string ftime_to_string(TP tp)
     buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
     return buffer.str();
 }
+struct WTL {
+    int win;
+    int tie;
+    int loss;
+};
 
 namespace platform {
 
@@ -228,33 +235,110 @@ namespace platform {
         }
         return ranks;
     }
-    void friedmanTest(int nModels, int nDatasets, map<string, float> ranks, double significance = 0.05)
+
+    map<int, WTL> computeWTL(int controlIdx, vector<string> models, json table)
+    {
+        // Compute the WTL matrix
+        map<int, WTL> wtl;
+        int nModels = models.size();
+        for (int i = 0; i < nModels; ++i) {
+            wtl[i] = { 0, 0, 0 };
+        }
+        json origin = table.begin().value();
+        for (auto const& item : origin.items()) {
+            auto controlModel = models.at(controlIdx);
+            double controlValue = table[controlModel].at(item.key()).at(0).get<double>();
+            for (int i = 0; i < nModels; ++i) {
+                if (i == controlIdx) {
+                    continue;
+                }
+                double value = table[models[i]].at(item.key()).at(0).get<double>();
+                if (value < controlValue) {
+                    wtl[i].win++;
+                } else if (value == controlValue) {
+                    wtl[i].tie++;
+                } else {
+                    wtl[i].loss++;
+                }
+            }
+        }
+        return wtl;
+    }
+
+    void postHocHolm(int controlIdx, vector<string> models, int nDatasets, map<string, float> ranks, double significance, map<int, WTL> wtl)
+    {
+        // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
+        // Post-hoc Holm test
+        // Calculate the p-value for the models paired with the control model
+        int nModels = models.size();
+        map<int, double> stats; // p-value of each model paired with the control model
+        boost::math::normal dist(0.0, 1.0);
+        double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
+        for (int i = 0; i < nModels; i++) {
+            if (i == controlIdx) {
+                stats[i] = 0.0;
+                continue;
+            }
+            double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
+            double p_value = (long double)2 * (1 - cdf(dist, z));
+            stats[i] = p_value;
+        }
+        // Sort the models by p-value
+        vector<pair<int, double>> statsOrder;
+        for (const auto& stat : stats) {
+            statsOrder.push_back({ stat.first, stat.second });
+        }
+        sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) {
+            return a.second < b.second;
+            });
+
+        // Holm adjustment
+        for (int i = 0; i < statsOrder.size(); ++i) {
+            auto item = statsOrder.at(i);
+            double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
+            double p_value = min((double)1.0, item.second * (nModels - i));
+            p_value = max(before, p_value);
+            statsOrder[i] = { item.first, p_value };
+        }
+        cout << Colors::CYAN();
+        cout << "  *************************************************************************************************************" << endl;
+        cout << "  Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
+        cout << "  Control model: " << models[controlIdx] << endl;
+        cout << "  Model        p-value      rank      win tie loss" << endl;
+        cout << "  ============ ============ ========= === === ====" << endl;
+        for (const auto& item : ranks) {
+            if (item.first == models.at(controlIdx)) {
+                continue;
+            }
+            auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
+            double pvalue = 0.0;
+            for (const auto& stat : statsOrder) {
+                if (stat.first == idx) {
+                    pvalue = stat.second;
+                }
+            }
+            cout << "  " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second;
+            cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl;
+        }
+        cout << "  *************************************************************************************************************" << endl;
+        cout << Colors::RESET();
+    }
+    bool friedmanTest(vector<string> models, int nDatasets, map<string, float> ranks, double significance = 0.05)
     {
         // Friedman test
         // Calculate the Friedman statistic
-        double sum = 0.0;
+        int nModels = models.size();
         if (nModels < 3 || nDatasets < 3) {
-            cout << "Can't make the Friedman test with less than 3 models and/or less than 3 datasets." << endl;
-            return;
+            throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
         }
         cout << Colors::BLUE() << endl;
-        cout << "*************************************************************************************" << endl;
+        cout << "***************************************************************************************************************" << endl;
         cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
-        for (const auto& rank : ranks) {
-            sum += rank.second;
-        }
         double degreesOfFreedom = nModels - 1.0;
         double sumSquared = 0;
-        // For original Friedman test
-        // for (const auto& rank : ranks) {
-        //     sumSquared += rank.second * rank.second;
-        // }
         for (const auto& rank : ranks) {
-            sumSquared += pow(rank.second / nDatasets, 2);
+            sumSquared += pow(rank.second, 2);
         }
-        cout << "Sum of ranks: " << sum << endl;
-        cout << "Sum of squared ranks: " << sumSquared << endl;
-        // (original) double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1);
         // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
         double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
         cout << "Friedman statistic: " << friedmanQ << endl;
@@ -264,14 +348,18 @@ namespace platform {
         double criticalValue = quantile(chiSquared, 1 - significance);
         std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
             << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
-        cout << "p-value: " << scientific << p_value << endl;
+        cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
         //if (friedmanQ > criticalValue) { (original)
+        bool result;
         if (p_value < significance) {
             cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl;
+            result = true;
         } else {
             cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl;
+            result = false;
         }
-        cout << Colors::BLUE() << "*************************************************************************************" << endl;
+        cout << Colors::BLUE() << "***************************************************************************************************************" << endl;
+        return result;
     }
     void BestResults::printTableResults(set<string> models, json table)
     {
@@ -292,6 +380,7 @@ namespace platform {
         map<string, double> totals;
         map<string, float> ranks;
         map<string, float> ranksTotal;
+        int nDatasets = table.begin().value().size();
         for (const auto& model : models) {
             totals[model] = 0.0;
         }
@@ -355,10 +444,11 @@ namespace platform {
         // Output the averaged ranks
         cout << endl;
         int min = 1;
-        for (const auto& rank : ranksTotal) {
+        for (auto& rank : ranksTotal) {
             if (rank.second < min) {
                 min = rank.second;
             }
+            rank.second /= nDatasets;
         }
         cout << Colors::BLUE() << setw(30) << "    Ranks....................";
         for (const auto& model : models) {
@@ -375,11 +465,18 @@ namespace platform {
             if (ranksTotal[model] == min) {
                 efectiveColor = Colors::RED();
             }
-            cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] / (double)origin.size() << " ";
+            cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] << " ";
         }
         cout << endl;
         if (friedman) {
-            friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05);
+            double significance = 0.05;
+            vector<string> vModels(models.begin(), models.end());
+            if (friedmanTest(vModels, nDatasets, ranksTotal, significance)) {
+                // Stablish the control model as the one with the lowest averaged rank
+                int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
+                auto wtl = computeWTL(controlIdx, vModels, table);
+                postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl);
+            }
         }
     }
     void BestResults::reportAll()
-- 
2.45.2


From 00c6cf663be8b7130448c23f00ed11a2354e8d13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Wed, 27 Sep 2023 19:11:47 +0200
Subject: [PATCH 07/11] Fix order of output in posthoc

---
 src/Platform/BestResults.cc | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc
index 80f5136..1ed6de6 100644
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -306,7 +306,15 @@ namespace platform {
         cout << "  Control model: " << models[controlIdx] << endl;
         cout << "  Model        p-value      rank      win tie loss" << endl;
         cout << "  ============ ============ ========= === === ====" << endl;
-        for (const auto& item : ranks) {
+        // sort ranks from lowest to highest
+        vector<pair<string, float>> ranksOrder;
+        for (const auto& rank : ranks) {
+            ranksOrder.push_back({ rank.first, rank.second });
+        }
+        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, float>& a, const pair<string, float>& b) {
+            return a.second < b.second;
+            });
+        for (const auto& item : ranksOrder) {
             if (item.first == models.at(controlIdx)) {
                 continue;
             }
@@ -349,13 +357,12 @@ namespace platform {
         std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
             << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
         cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
-        //if (friedmanQ > criticalValue) { (original)
         bool result;
         if (p_value < significance) {
-            cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl;
+            cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl;
             result = true;
         } else {
-            cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl;
+            cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl;
             result = false;
         }
         cout << Colors::BLUE() << "***************************************************************************************************************" << endl;
@@ -416,8 +423,8 @@ namespace platform {
                     efectiveColor = Colors::RED();
                 }
                 totals[model] += value;
-                // cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
-                cout << efectiveColor << setw(12) << setprecision(10) << fixed << ranks[model] << " ";
+                cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
+                // cout << efectiveColor << setw(12) << setprecision(10) << fixed << ranks[model] << " ";
             }
             cout << endl;
             odd = !odd;
@@ -471,12 +478,11 @@ namespace platform {
         if (friedman) {
             double significance = 0.05;
             vector<string> vModels(models.begin(), models.end());
-            if (friedmanTest(vModels, nDatasets, ranksTotal, significance)) {
-                // Stablish the control model as the one with the lowest averaged rank
-                int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
-                auto wtl = computeWTL(controlIdx, vModels, table);
-                postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl);
-            }
+            friedmanTest(vModels, nDatasets, ranksTotal, significance);
+            // Stablish the control model as the one with the lowest averaged rank
+            int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
+            auto wtl = computeWTL(controlIdx, vModels, table);
+            postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl);
         }
     }
     void BestResults::reportAll()
-- 
2.45.2


From ac89a451e3fcddba681ef115565f51f5b67e0d35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Thu, 28 Sep 2023 00:45:15 +0200
Subject: [PATCH 08/11] Duplicate statistics tests in class

---
 src/Platform/BestResults.cc |  13 ++-
 src/Platform/CMakeLists.txt |   2 +-
 src/Platform/Statistics.cc  | 209 ++++++++++++++++++++++++++++++++++++
 src/Platform/Statistics.h   |  37 +++++++
 4 files changed, 258 insertions(+), 3 deletions(-)
 create mode 100644 src/Platform/Statistics.cc
 create mode 100644 src/Platform/Statistics.h

diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc
index 1ed6de6..7aec335 100644
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -6,6 +6,7 @@
 #include "BestResults.h"
 #include "Result.h"
 #include "Colors.h"
+#include "Statistics.h"
 #include <boost/math/distributions/chi_squared.hpp>
 #include <boost/math/distributions/normal.hpp>
 
@@ -475,15 +476,23 @@ namespace platform {
             cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] << " ";
         }
         cout << endl;
+        vector<string> vModels(models.begin(), models.end());
+        vector<string> datasets;
+        for (const auto& dataset : table.begin().value().items()) {
+            datasets.push_back(dataset.key());
+        }
+        double significance = 0.05;
         if (friedman) {
-            double significance = 0.05;
-            vector<string> vModels(models.begin(), models.end());
             friedmanTest(vModels, nDatasets, ranksTotal, significance);
             // Stablish the control model as the one with the lowest averaged rank
             int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
             auto wtl = computeWTL(controlIdx, vModels, table);
             postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl);
         }
+
+        Statistics stats(vModels, datasets, table, significance);
+        stats.friedmanTest();
+        stats.postHocHolmTest();
     }
     void BestResults::reportAll()
     {
diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt
index db32852..edf014a 100644
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@@ -8,7 +8,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
 add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
 add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
 add_executable(list list.cc platformUtils Datasets.cc)
-add_executable(best best.cc BestResults.cc Result.cc)
+add_executable(best best.cc BestResults.cc Result.cc Statistics.cc)
 target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
 if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
     target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
diff --git a/src/Platform/Statistics.cc b/src/Platform/Statistics.cc
new file mode 100644
index 0000000..33b2f57
--- /dev/null
+++ b/src/Platform/Statistics.cc
@@ -0,0 +1,209 @@
+#include "Statistics.h"
+#include "Colors.h"
+#include <boost/math/distributions/chi_squared.hpp>
+#include <boost/math/distributions/normal.hpp>
+
+namespace platform {
+
+    Statistics::Statistics(vector<string>& models, vector<string>& datasets, json data, double significance) : models(models), datasets(datasets), data(data), significance(significance)
+    {
+        nModels = models.size();
+        nDatasets = datasets.size();
+    };
+
+    void Statistics::fit()
+    {
+        if (nModels < 3 || nDatasets < 3) {
+            cerr << "nModels: " << nModels << endl;
+            cerr << "nDatasets: " << nDatasets << endl;
+            throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
+        }
+        computeRanks();
+        // Set the control model as the one with the lowest average rank
+        controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
+        computeWTL();
+        fitted = true;
+    }
+    map<string, float> assignRanks2(vector<pair<string, double>>& ranksOrder)
+    {
+        // sort the ranksOrder vector by value
+        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
+            return a.second > b.second;
+            });
+        //Assign ranks to  values and if they are the same they share the same averaged rank
+        map<string, float> ranks;
+        for (int i = 0; i < ranksOrder.size(); i++) {
+            ranks[ranksOrder[i].first] = i + 1.0;
+        }
+        int i = 0;
+        while (i < static_cast<int>(ranksOrder.size())) {
+            int j = i + 1;
+            int sumRanks = ranks[ranksOrder[i].first];
+            while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
+                sumRanks += ranks[ranksOrder[j++].first];
+            }
+            if (j > i + 1) {
+                float averageRank = (float)sumRanks / (j - i);
+                for (int k = i; k < j; k++) {
+                    ranks[ranksOrder[k].first] = averageRank;
+                }
+            }
+            i = j;
+        }
+        return ranks;
+    }
+    void Statistics::computeRanks()
+    {
+        map<string, float> ranksLine;
+        for (const auto& dataset : datasets) {
+            vector<pair<string, double>> ranksOrder;
+            for (const auto& model : models) {
+                double value = data[model].at(dataset).at(0).get<double>();
+                ranksOrder.push_back({ model, value });
+            }
+            // Assign the ranks
+            ranksLine = assignRanks2(ranksOrder);
+            if (ranks.size() == 0) {
+                ranks = ranksLine;
+            } else {
+                for (const auto& rank : ranksLine) {
+                    ranks[rank.first] += rank.second;
+                }
+            }
+        }
+        // Average the ranks
+        for (const auto& rank : ranks) {
+            ranks[rank.first] /= nDatasets;
+        }
+    }
+    void Statistics::computeWTL()
+    {
+        // Compute the WTL matrix
+        for (int i = 0; i < nModels; ++i) {
+            wtl[i] = { 0, 0, 0 };
+        }
+        json origin = data.begin().value();
+        for (auto const& item : origin.items()) {
+            auto controlModel = models.at(controlIdx);
+            double controlValue = data[controlModel].at(item.key()).at(0).get<double>();
+            for (int i = 0; i < nModels; ++i) {
+                if (i == controlIdx) {
+                    continue;
+                }
+                double value = data[models[i]].at(item.key()).at(0).get<double>();
+                if (value < controlValue) {
+                    wtl[i].win++;
+                } else if (value == controlValue) {
+                    wtl[i].tie++;
+                } else {
+                    wtl[i].loss++;
+                }
+            }
+        }
+    }
+
+    void Statistics::postHocHolmTest()
+    {
+        if (!fitted) {
+            fit();
+        }
+        // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
+        // Post-hoc Holm test
+        // Calculate the p-value for the models paired with the control model
+        map<int, double> stats; // p-value of each model paired with the control model
+        boost::math::normal dist(0.0, 1.0);
+        double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
+        for (int i = 0; i < nModels; i++) {
+            if (i == controlIdx) {
+                stats[i] = 0.0;
+                continue;
+            }
+            double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
+            double p_value = (long double)2 * (1 - cdf(dist, z));
+            stats[i] = p_value;
+        }
+        // Sort the models by p-value
+        vector<pair<int, double>> statsOrder;
+        for (const auto& stat : stats) {
+            statsOrder.push_back({ stat.first, stat.second });
+        }
+        sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) {
+            return a.second < b.second;
+            });
+
+        // Holm adjustment
+        for (int i = 0; i < statsOrder.size(); ++i) {
+            auto item = statsOrder.at(i);
+            double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
+            double p_value = min((double)1.0, item.second * (nModels - i));
+            p_value = max(before, p_value);
+            statsOrder[i] = { item.first, p_value };
+        }
+        cout << Colors::MAGENTA();
+        cout << "  *************************************************************************************************************" << endl;
+        cout << "  Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
+        cout << "  Control model: " << models[controlIdx] << endl;
+        cout << "  Model        p-value      rank      win tie loss" << endl;
+        cout << "  ============ ============ ========= === === ====" << endl;
+        // sort ranks from lowest to highest
+        vector<pair<string, float>> ranksOrder;
+        for (const auto& rank : ranks) {
+            ranksOrder.push_back({ rank.first, rank.second });
+        }
+        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, float>& a, const pair<string, float>& b) {
+            return a.second < b.second;
+            });
+        for (const auto& item : ranksOrder) {
+            if (item.first == models.at(controlIdx)) {
+                continue;
+            }
+            auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
+            double pvalue = 0.0;
+            for (const auto& stat : statsOrder) {
+                if (stat.first == idx) {
+                    pvalue = stat.second;
+                }
+            }
+            cout << "  " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second;
+            cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl;
+        }
+        cout << "  *************************************************************************************************************" << endl;
+        cout << Colors::RESET();
+    }
+    bool Statistics::friedmanTest()
+    {
+        if (!fitted) {
+            fit();
+        }
+        // Friedman test
+        // Calculate the Friedman statistic
+        cout << Colors::BLUE() << endl;
+        cout << "***************************************************************************************************************" << endl;
+        cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
+        double degreesOfFreedom = nModels - 1.0;
+        double sumSquared = 0;
+        for (const auto& rank : ranks) {
+            sumSquared += pow(rank.second, 2);
+        }
+        // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
+        double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
+        cout << "Friedman statistic: " << friedmanQ << endl;
+        // Calculate the critical value
+        boost::math::chi_squared chiSquared(degreesOfFreedom);
+        long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
+        double criticalValue = quantile(chiSquared, 1 - significance);
+        std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
+            << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
+        cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
+        bool result;
+        if (p_value < significance) {
+            cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl;
+            result = true;
+        } else {
+            cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl;
+            result = false;
+        }
+        cout << Colors::BLUE() << "***************************************************************************************************************" << endl;
+        return result;
+    }
+} // namespace platform
diff --git a/src/Platform/Statistics.h b/src/Platform/Statistics.h
new file mode 100644
index 0000000..92c8a2a
--- /dev/null
+++ b/src/Platform/Statistics.h
@@ -0,0 +1,37 @@
+#ifndef STATISTICS_H
+#define STATISTICS_H
+#include <iostream>
+#include <vector>
+#include <nlohmann/json.hpp>
+
+using namespace std;
+using json = nlohmann::json;
+
+namespace platform {
+    struct WTL {
+        int win;
+        int tie;
+        int loss;
+    };
+    class Statistics {
+    public:
+        Statistics(vector<string>& models, vector<string>& datasets, json data, double significance = 0.05);
+        bool friedmanTest();
+        void postHocHolmTest();
+    private:
+        void fit();
+        void computeRanks();
+        void computeWTL();
+        vector<string> models;
+        vector<string> datasets;
+        json data;
+        double significance;
+        bool fitted = false;
+        int nModels = 0;
+        int nDatasets = 0;
+        int controlIdx = 0;
+        map<int, WTL> wtl;
+        map<string, float> ranks;
+    };
+}
+#endif // !STATISTICS_H
\ No newline at end of file
-- 
2.45.2


From 3b0653432714e8fbe9edfb076847101c8630bbf5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Thu, 28 Sep 2023 00:59:34 +0200
Subject: [PATCH 09/11] Remove duplicated code in BestResults

---
 src/Platform/BestResults.cc | 246 +++---------------------------------
 src/Platform/BestResults.h  |   6 +-
 src/Platform/Statistics.cc  |   7 +-
 src/Platform/Statistics.h   |   2 +-
 4 files changed, 25 insertions(+), 236 deletions(-)

diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc
index 7aec335..5c41d27 100644
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -7,8 +7,6 @@
 #include "Result.h"
 #include "Colors.h"
 #include "Statistics.h"
-#include <boost/math/distributions/chi_squared.hpp>
-#include <boost/math/distributions/normal.hpp>
 
 
 
@@ -27,12 +25,6 @@ std::string ftime_to_string(TP tp)
     buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
     return buffer.str();
 }
-struct WTL {
-    int win;
-    int tie;
-    int loss;
-};
-
 namespace platform {
 
     string BestResults::build()
@@ -114,9 +106,10 @@ namespace platform {
         }
         throw invalid_argument("Unable to open result file. [" + fileName + "]");
     }
-    set<string> BestResults::getModels()
+    vector<string> BestResults::getModels()
     {
         set<string> models;
+        vector<string> result;
         auto files = loadResultFiles();
         if (files.size() == 0) {
             cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
@@ -129,7 +122,8 @@ namespace platform {
             // add the model to the vector of models
             models.insert(fileModel);
         }
-        return models;
+        result = vector<string>(models.begin(), models.end());
+        return result;
     }
 
     void BestResults::buildAll()
@@ -171,7 +165,7 @@ namespace platform {
             odd = !odd;
         }
     }
-    json BestResults::buildTableResults(set<string> models)
+    json BestResults::buildTableResults(vector<string> models)
     {
         int numberOfDatasets = 0;
         bool first = true;
@@ -208,168 +202,8 @@ namespace platform {
         table["dateTable"] = ftime_to_string(maxDate);
         return table;
     }
-    map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
-    {
-        // sort the ranksOrder vector by value
-        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
-            return a.second > b.second;
-            });
-        //Assign ranks to  values and if they are the same they share the same averaged rank
-        map<string, float> ranks;
-        for (int i = 0; i < ranksOrder.size(); i++) {
-            ranks[ranksOrder[i].first] = i + 1.0;
-        }
-        int i = 0;
-        while (i < static_cast<int>(ranksOrder.size())) {
-            int j = i + 1;
-            int sumRanks = ranks[ranksOrder[i].first];
-            while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
-                sumRanks += ranks[ranksOrder[j++].first];
-            }
-            if (j > i + 1) {
-                float averageRank = (float)sumRanks / (j - i);
-                for (int k = i; k < j; k++) {
-                    ranks[ranksOrder[k].first] = averageRank;
-                }
-            }
-            i = j;
-        }
-        return ranks;
-    }
 
-    map<int, WTL> computeWTL(int controlIdx, vector<string> models, json table)
-    {
-        // Compute the WTL matrix
-        map<int, WTL> wtl;
-        int nModels = models.size();
-        for (int i = 0; i < nModels; ++i) {
-            wtl[i] = { 0, 0, 0 };
-        }
-        json origin = table.begin().value();
-        for (auto const& item : origin.items()) {
-            auto controlModel = models.at(controlIdx);
-            double controlValue = table[controlModel].at(item.key()).at(0).get<double>();
-            for (int i = 0; i < nModels; ++i) {
-                if (i == controlIdx) {
-                    continue;
-                }
-                double value = table[models[i]].at(item.key()).at(0).get<double>();
-                if (value < controlValue) {
-                    wtl[i].win++;
-                } else if (value == controlValue) {
-                    wtl[i].tie++;
-                } else {
-                    wtl[i].loss++;
-                }
-            }
-        }
-        return wtl;
-    }
-
-    void postHocHolm(int controlIdx, vector<string> models, int nDatasets, map<string, float> ranks, double significance, map<int, WTL> wtl)
-    {
-        // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
-        // Post-hoc Holm test
-        // Calculate the p-value for the models paired with the control model
-        int nModels = models.size();
-        map<int, double> stats; // p-value of each model paired with the control model
-        boost::math::normal dist(0.0, 1.0);
-        double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
-        for (int i = 0; i < nModels; i++) {
-            if (i == controlIdx) {
-                stats[i] = 0.0;
-                continue;
-            }
-            double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
-            double p_value = (long double)2 * (1 - cdf(dist, z));
-            stats[i] = p_value;
-        }
-        // Sort the models by p-value
-        vector<pair<int, double>> statsOrder;
-        for (const auto& stat : stats) {
-            statsOrder.push_back({ stat.first, stat.second });
-        }
-        sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) {
-            return a.second < b.second;
-            });
-
-        // Holm adjustment
-        for (int i = 0; i < statsOrder.size(); ++i) {
-            auto item = statsOrder.at(i);
-            double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
-            double p_value = min((double)1.0, item.second * (nModels - i));
-            p_value = max(before, p_value);
-            statsOrder[i] = { item.first, p_value };
-        }
-        cout << Colors::CYAN();
-        cout << "  *************************************************************************************************************" << endl;
-        cout << "  Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
-        cout << "  Control model: " << models[controlIdx] << endl;
-        cout << "  Model        p-value      rank      win tie loss" << endl;
-        cout << "  ============ ============ ========= === === ====" << endl;
-        // sort ranks from lowest to highest
-        vector<pair<string, float>> ranksOrder;
-        for (const auto& rank : ranks) {
-            ranksOrder.push_back({ rank.first, rank.second });
-        }
-        sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, float>& a, const pair<string, float>& b) {
-            return a.second < b.second;
-            });
-        for (const auto& item : ranksOrder) {
-            if (item.first == models.at(controlIdx)) {
-                continue;
-            }
-            auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
-            double pvalue = 0.0;
-            for (const auto& stat : statsOrder) {
-                if (stat.first == idx) {
-                    pvalue = stat.second;
-                }
-            }
-            cout << "  " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second;
-            cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl;
-        }
-        cout << "  *************************************************************************************************************" << endl;
-        cout << Colors::RESET();
-    }
-    bool friedmanTest(vector<string> models, int nDatasets, map<string, float> ranks, double significance = 0.05)
-    {
-        // Friedman test
-        // Calculate the Friedman statistic
-        int nModels = models.size();
-        if (nModels < 3 || nDatasets < 3) {
-            throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
-        }
-        cout << Colors::BLUE() << endl;
-        cout << "***************************************************************************************************************" << endl;
-        cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
-        double degreesOfFreedom = nModels - 1.0;
-        double sumSquared = 0;
-        for (const auto& rank : ranks) {
-            sumSquared += pow(rank.second, 2);
-        }
-        // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
-        double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
-        cout << "Friedman statistic: " << friedmanQ << endl;
-        // Calculate the critical value
-        boost::math::chi_squared chiSquared(degreesOfFreedom);
-        long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
-        double criticalValue = quantile(chiSquared, 1 - significance);
-        std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
-            << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
-        cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
-        bool result;
-        if (p_value < significance) {
-            cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl;
-            result = true;
-        } else {
-            cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl;
-            result = false;
-        }
-        cout << Colors::BLUE() << "***************************************************************************************************************" << endl;
-        return result;
-    }
-    void BestResults::printTableResults(set<string> models, json table)
+    void BestResults::printTableResults(vector<string> models, json table)
     {
         cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
         cout << "------------------------------------------------" << endl;
@@ -386,8 +220,6 @@ namespace platform {
         auto i = 0;
         bool odd = true;
         map<string, double> totals;
-        map<string, float> ranks;
-        map<string, float> ranksTotal;
         int nDatasets = table.begin().value().size();
         for (const auto& model : models) {
             totals[model] = 0.0;
@@ -398,23 +230,12 @@ namespace platform {
             cout << color << setw(3) << fixed << right << i++ << " ";
             cout << setw(25) << left << item.key() << " ";
             double maxValue = 0;
-            vector<pair<string, double>> ranksOrder;
             // Find out the max value for this dataset
             for (const auto& model : models) {
                 double value = table[model].at(item.key()).at(0).get<double>();
                 if (value > maxValue) {
                     maxValue = value;
                 }
-                ranksOrder.push_back({ model, value });
-            }
-            // Assign the ranks
-            ranks = assignRanks(ranksOrder);
-            if (ranksTotal.size() == 0) {
-                ranksTotal = ranks;
-            } else {
-                for (const auto& rank : ranks) {
-                    ranksTotal[rank.first] += rank.second;
-                }
             }
             // Print the row with red colors on max values
             for (const auto& model : models) {
@@ -425,7 +246,6 @@ namespace platform {
                 }
                 totals[model] += value;
                 cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
-                // cout << efectiveColor << setw(12) << setprecision(10) << fixed << ranks[model] << " ";
             }
             cout << endl;
             odd = !odd;
@@ -449,50 +269,7 @@ namespace platform {
             }
             cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
         }
-        // Output the averaged ranks
         cout << endl;
-        int min = 1;
-        for (auto& rank : ranksTotal) {
-            if (rank.second < min) {
-                min = rank.second;
-            }
-            rank.second /= nDatasets;
-        }
-        cout << Colors::BLUE() << setw(30) << "    Ranks....................";
-        for (const auto& model : models) {
-            string efectiveColor = Colors::BLUE();
-            if (ranksTotal[model] == min) {
-                efectiveColor = Colors::RED();
-            }
-            cout << efectiveColor << setw(12) << setprecision(4) << fixed << (double)ranksTotal[model] << " ";
-        }
-        cout << endl;
-        cout << Colors::GREEN() << setw(30) << "    Averaged ranks...........";
-        for (const auto& model : models) {
-            string efectiveColor = Colors::GREEN();
-            if (ranksTotal[model] == min) {
-                efectiveColor = Colors::RED();
-            }
-            cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] << " ";
-        }
-        cout << endl;
-        vector<string> vModels(models.begin(), models.end());
-        vector<string> datasets;
-        for (const auto& dataset : table.begin().value().items()) {
-            datasets.push_back(dataset.key());
-        }
-        double significance = 0.05;
-        if (friedman) {
-            friedmanTest(vModels, nDatasets, ranksTotal, significance);
-            // Stablish the control model as the one with the lowest averaged rank
-            int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
-            auto wtl = computeWTL(controlIdx, vModels, table);
-            postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl);
-        }
-
-        Statistics stats(vModels, datasets, table, significance);
-        stats.friedmanTest();
-        stats.postHocHolmTest();
     }
     void BestResults::reportAll()
     {
@@ -501,5 +278,16 @@ namespace platform {
         json table = buildTableResults(models);
         // Print the table of results
         printTableResults(models, table);
+        // Compute the Friedman test
+        if (friedman) {
+            vector<string> datasets;
+            for (const auto& dataset : table.begin().value().items()) {
+                datasets.push_back(dataset.key());
+            }
+            double significance = 0.05;
+            Statistics stats(models, datasets, table, significance);
+            auto result = stats.friedmanTest();
+            stats.postHocHolmTest(result);
+        }
     }
 }
\ No newline at end of file
diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h
index 5495222..8ad0f8f 100644
--- a/src/Platform/BestResults.h
+++ b/src/Platform/BestResults.h
@@ -14,10 +14,10 @@ namespace platform {
         void reportAll();
         void buildAll();
     private:
-        set<string> getModels();
+        vector<string> getModels();
         vector<string> loadResultFiles();
-        json buildTableResults(set<string> models);
-        void printTableResults(set<string> models, json table);
+        json buildTableResults(vector<string> models);
+        void printTableResults(vector<string> models, json table);
         string bestResultFile();
         json loadFile(const string& fileName);
         string path;
diff --git a/src/Platform/Statistics.cc b/src/Platform/Statistics.cc
index 33b2f57..f4d72f2 100644
--- a/src/Platform/Statistics.cc
+++ b/src/Platform/Statistics.cc
@@ -102,7 +102,7 @@ namespace platform {
         }
     }
 
-    void Statistics::postHocHolmTest()
+    void Statistics::postHocHolmTest(bool friedmanResult)
     {
         if (!fitted) {
             fit();
@@ -139,7 +139,8 @@ namespace platform {
             p_value = max(before, p_value);
             statsOrder[i] = { item.first, p_value };
         }
-        cout << Colors::MAGENTA();
+        auto color = friedmanResult ? Colors::GREEN() : Colors::YELLOW();
+        cout << color;
         cout << "  *************************************************************************************************************" << endl;
         cout << "  Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
         cout << "  Control model: " << models[controlIdx] << endl;
@@ -203,7 +204,7 @@ namespace platform {
             cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl;
             result = false;
         }
-        cout << Colors::BLUE() << "***************************************************************************************************************" << endl;
+        cout << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << endl;
         return result;
     }
 } // namespace platform
diff --git a/src/Platform/Statistics.h b/src/Platform/Statistics.h
index 92c8a2a..bae91fa 100644
--- a/src/Platform/Statistics.h
+++ b/src/Platform/Statistics.h
@@ -17,7 +17,7 @@ namespace platform {
     public:
         Statistics(vector<string>& models, vector<string>& datasets, json data, double significance = 0.05);
         bool friedmanTest();
-        void postHocHolmTest();
+        void postHocHolmTest(bool friedmanResult);
     private:
         void fit();
         void computeRanks();
-- 
2.45.2


From 71704e3547f5dfbf8e3918d801338a9afa5f6128 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Thu, 28 Sep 2023 01:27:18 +0200
Subject: [PATCH 10/11] Enhance output info in Statistics

---
 src/Platform/BestResults.cc |  1 -
 src/Platform/ReportBase.h   | 13 ++-----------
 src/Platform/Statistics.cc  | 21 +++++++++++++--------
 src/Platform/Symbols.h      | 18 ++++++++++++++++++
 4 files changed, 33 insertions(+), 20 deletions(-)
 create mode 100644 src/Platform/Symbols.h

diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc
index 5c41d27..cc6f60d 100644
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@@ -2,7 +2,6 @@
 #include <fstream>
 #include <iostream>
 #include <sstream>
-#include <set>
 #include "BestResults.h"
 #include "Result.h"
 #include "Colors.h"
diff --git a/src/Platform/ReportBase.h b/src/Platform/ReportBase.h
index c8400cf..5797b1b 100644
--- a/src/Platform/ReportBase.h
+++ b/src/Platform/ReportBase.h
@@ -3,22 +3,13 @@
 #include <string>
 #include <iostream>
 #include "Paths.h"
+#include "Symbols.h"
 #include <nlohmann/json.hpp>
 
 using json = nlohmann::json;
 namespace platform {
     using namespace std;
-    class Symbols {
-    public:
-        inline static const string check_mark{ "\u2714" };
-        inline static const string exclamation{ "\u2757" };
-        inline static const string black_star{ "\u2605" };
-        inline static const string cross{ "\u2717" };
-        inline static const string upward_arrow{ "\u27B6" };
-        inline static const string down_arrow{ "\u27B4" };
-        inline static const string equal_best{ check_mark };
-        inline static const string better_best{ black_star };
-    };
+
     class ReportBase {
     public:
         explicit ReportBase(json data_, bool compare);
diff --git a/src/Platform/Statistics.cc b/src/Platform/Statistics.cc
index f4d72f2..9a4a34f 100644
--- a/src/Platform/Statistics.cc
+++ b/src/Platform/Statistics.cc
@@ -1,5 +1,6 @@
 #include "Statistics.h"
 #include "Colors.h"
+#include "Symbols.h"
 #include <boost/math/distributions/chi_squared.hpp>
 #include <boost/math/distributions/normal.hpp>
 
@@ -24,7 +25,7 @@ namespace platform {
         computeWTL();
         fitted = true;
     }
-    map<string, float> assignRanks2(vector<pair<string, double>>& ranksOrder)
+    map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
     {
         // sort the ranksOrder vector by value
         sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
@@ -62,7 +63,7 @@ namespace platform {
                 ranksOrder.push_back({ model, value });
             }
             // Assign the ranks
-            ranksLine = assignRanks2(ranksOrder);
+            ranksLine = assignRanks(ranksOrder);
             if (ranks.size() == 0) {
                 ranks = ranksLine;
             } else {
@@ -139,13 +140,13 @@ namespace platform {
             p_value = max(before, p_value);
             statsOrder[i] = { item.first, p_value };
         }
-        auto color = friedmanResult ? Colors::GREEN() : Colors::YELLOW();
+        auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
         cout << color;
         cout << "  *************************************************************************************************************" << endl;
         cout << "  Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
         cout << "  Control model: " << models[controlIdx] << endl;
-        cout << "  Model        p-value      rank      win tie loss" << endl;
-        cout << "  ============ ============ ========= === === ====" << endl;
+        cout << "  Model        p-value      rank      win tie loss Status" << endl;
+        cout << "  ============ ============ ========= === === ==== =============" << endl;
         // sort ranks from lowest to highest
         vector<pair<string, float>> ranksOrder;
         for (const auto& rank : ranks) {
@@ -165,10 +166,14 @@ namespace platform {
                     pvalue = stat.second;
                 }
             }
-            cout << "  " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second;
-            cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl;
+            auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
+            auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
+            auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
+            cout << "  " << colorStatus << left << setw(12) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
+            cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss;
+            cout << " " << status << textStatus << endl;
         }
-        cout << "  *************************************************************************************************************" << endl;
+        cout << color << "  *************************************************************************************************************" << endl;
         cout << Colors::RESET();
     }
     bool Statistics::friedmanTest()
diff --git a/src/Platform/Symbols.h b/src/Platform/Symbols.h
new file mode 100644
index 0000000..a9fa1e7
--- /dev/null
+++ b/src/Platform/Symbols.h
@@ -0,0 +1,18 @@
+#ifndef SYMBOLS_H
+#define SYMBOLS_H
+#include <string>
+using namespace std;
+namespace platform {
+    class Symbols {
+    public:
+        inline static const string check_mark{ "\u2714" };
+        inline static const string exclamation{ "\u2757" };
+        inline static const string black_star{ "\u2605" };
+        inline static const string cross{ "\u2717" };
+        inline static const string upward_arrow{ "\u27B6" };
+        inline static const string down_arrow{ "\u27B4" };
+        inline static const string equal_best{ check_mark };
+        inline static const string better_best{ black_star };
+    };
+}
+#endif // !SYMBOLS_H
\ No newline at end of file
-- 
2.45.2


From 926de2bebd116f4cbc634a8eaaf95e2277c9f4b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Thu, 28 Sep 2023 09:44:33 +0200
Subject: [PATCH 11/11] Add boost info to README

---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 266bb8a..d9849bf 100644
--- a/README.md
+++ b/README.md
@@ -4,10 +4,14 @@ Bayesian Network Classifier with libtorch from scratch
 
 ## 0. Setup
 
-### libxlswriter
-
 Before compiling BayesNet.
 
+### boost library
+
+[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
+
+### libxlswriter
+
 ```bash
 cd lib/libxlsxwriter
 make
-- 
2.45.2