From a56ec98ef9f1894e50b7a8380a2b698484a12fb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?=
 <rmontanana@gmail.com>
Date: Wed, 21 May 2025 11:51:04 +0200
Subject: [PATCH] Add Wilcoxon Test

---
 src/CMakeLists.txt               |   2 +-
 src/best/BestResults.cpp         |  29 +---
 src/best/BestResultsExcel.cpp    |  10 +-
 src/best/BestResultsExcel.h      |   3 +-
 src/best/BestResultsTex.cpp      |   6 +-
 src/best/BestResultsTex.h        |   4 +-
 src/best/DeLong.cpp              |  45 ------
 src/best/DeLong.h                |  24 ---
 src/best/Statistics.cpp          | 140 ++++++++---------
 src/best/Statistics.h            |  13 +-
 src/best/WilcoxonTest.hpp        | 250 +++++++++++++++++++++++++++++++
 src/commands/b_grid.cpp          |   2 +-
 src/main/ArgumentsExperiment.cpp |  27 +++-
 src/main/Experiment.cpp          |   4 +-
 14 files changed, 369 insertions(+), 190 deletions(-)
 delete mode 100644 src/best/DeLong.cpp
 delete mode 100644 src/best/DeLong.h
 create mode 100644 src/best/WilcoxonTest.hpp
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index be63bee..b89cebc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -13,7 +13,7 @@ include_directories(
 # b_best
 add_executable(
     b_best commands/b_best.cpp best/Statistics.cpp
-    best/BestResultsExcel.cpp best/BestResultsTex.cpp best/BestResultsMd.cpp best/BestResults.cpp best/DeLong.cpp
+    best/BestResultsExcel.cpp best/BestResultsTex.cpp best/BestResultsMd.cpp best/BestResults.cpp
     common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp 
     main/Models.cpp main/Scores.cpp
     reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
diff --git a/src/best/BestResults.cpp b/src/best/BestResults.cpp
index 21da49e..5e7c349 100644
--- a/src/best/BestResults.cpp
+++ b/src/best/BestResults.cpp
@@ -321,7 +321,7 @@ namespace platform {
             // Build the table of results
             json table = buildTableResults(models);
             std::vector<std::string> datasets = getDatasets(table.begin().value());
-            BestResultsExcel excel_report(score, datasets);
+            BestResultsExcel excel_report(path, score, datasets);
             excel_report.reportSingle(model, path + Paths::bestResultsFile(score, model));
             messageOutputFile("Excel", excel_report.getFileName());
         }
@@ -337,10 +337,10 @@ namespace platform {
         // Compute the Friedman test
         std::map<std::string, std::map<std::string, float>> ranksModels;
         if (friedman) {
-            Statistics stats(models, datasets, table, significance);
+            Statistics stats(score, models, datasets, table, significance);
             auto result = stats.friedmanTest();
-            stats.postHocHolmTest();
-            stats.postHocTestReport("Holm", score, result, tex);
+            stats.postHocTest();
+            stats.postHocTestReport(result, tex);
             ranksModels = stats.getRanks();
         }
         if (tex) {
@@ -352,24 +352,11 @@ namespace platform {
             }
         }
         if (excel) {
-            BestResultsExcel excel(score, datasets);
+            BestResultsExcel excel(path, score, datasets);
             excel.reportAll(models, table, ranksModels, friedman, significance);
             if (friedman) {
-                int idx = -1;
-                double min = 2000;
-                // Find out the control model
-                auto totals = std::vector<double>(models.size(), 0.0);
-                for (const auto& dataset_ : datasets) {
-                    for (int i = 0; i < models.size(); ++i) {
-                        totals[i] += ranksModels[dataset_][models[i]];
-                    }
-                }
-                for (int i = 0; i < models.size(); ++i) {
-                    if (totals[i] < min) {
-                        min = totals[i];
-                        idx = i;
-                    }
-                }
+                Statistics stats(score, models, datasets, table, significance);
+                int idx = stats.getControlIdx();
                 model = models.at(idx);
                 excel.reportSingle(model, path + Paths::bestResultsFile(score, model));
             }
@@ -378,7 +365,7 @@ namespace platform {
     }
     void BestResults::messageOutputFile(const std::string& title, const std::string& fileName)
     {
-        std::cout << Colors::YELLOW() << "** " << std::setw(5) << std::left << title
+        std::cout << Colors::YELLOW() << "** " << std::setw(8) << std::left << title
             << " file generated: " << fileName << Colors::RESET() << std::endl;
     }
 }
\ No newline at end of file
diff --git a/src/best/BestResultsExcel.cpp b/src/best/BestResultsExcel.cpp
index fb7b864..36cfcb3 100644
--- a/src/best/BestResultsExcel.cpp
+++ b/src/best/BestResultsExcel.cpp
@@ -30,7 +30,7 @@ namespace platform {
         }
         return columnName;
     }
-    BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets)
+    BestResultsExcel::BestResultsExcel(const std::string& path, const std::string& score, const std::vector<std::string>& datasets) : path(path), score(score), datasets(datasets)
     {
         file_name = Paths::bestResultsExcel(score);
         workbook = workbook_new(getFileName().c_str());
@@ -92,7 +92,7 @@ namespace platform {
             catch (const std::out_of_range& oor) {
                 auto tabName = "table_" + std::to_string(i);
                 auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str());
-                json data = loadResultData(Paths::results() + fileName);
+                json data = loadResultData(path + fileName);
                 auto report = ReportExcel(data, false, workbook, worksheetNew);
                 report.show();
                 hyperlink = "#table_" + std::to_string(i);
@@ -241,10 +241,10 @@ namespace platform {
         }
         worksheet_merge_range(worksheet, 0, 0, 0, 7, "Friedman Test", styles["headerFirst"]);
         row = 2;
-        Statistics stats(models, datasets, table, significance, false);
+        Statistics stats(score, models, datasets, table, significance, false); // No output
         auto result = stats.friedmanTest();
-        stats.postHocHolmTest();
-        // stats.postHocTestReport("Holm", result, false);
+        stats.postHocTest();
+        stats.postHocTestReport(result, false); // No tex output
         auto friedmanResult = stats.getFriedmanResult();
         auto postHocResult = stats.getPostHocResult();
         worksheet_merge_range(worksheet, row, 0, row, 7, "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]);
diff --git a/src/best/BestResultsExcel.h b/src/best/BestResultsExcel.h
index 6c70a49..bd8bf94 100644
--- a/src/best/BestResultsExcel.h
+++ b/src/best/BestResultsExcel.h
@@ -10,7 +10,7 @@ namespace platform {
     using json = nlohmann::ordered_json;
     class BestResultsExcel : public ExcelFile {
     public:
-        BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets);
+        BestResultsExcel(const std::string& path, const std::string& score, const std::vector<std::string>& datasets);
         ~BestResultsExcel();
         void reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance);
         void reportSingle(const std::string& model, const std::string& fileName);
@@ -22,6 +22,7 @@ namespace platform {
         void formatColumns();
         void doFriedman();
         void addConditionalFormat(std::string formula);
+        std::string path;
         std::string score;
         std::vector<std::string> models;
         std::vector<std::string> datasets;
diff --git a/src/best/BestResultsTex.cpp b/src/best/BestResultsTex.cpp
index 39e17d9..cf4827f 100644
--- a/src/best/BestResultsTex.cpp
+++ b/src/best/BestResultsTex.cpp
@@ -27,10 +27,10 @@ namespace platform {
         handler << "\\tiny " << std::endl;
         handler << "\\renewcommand{\\arraystretch }{1.2} " << std::endl;
         handler << "\\renewcommand{\\tabcolsep }{0.07cm} " << std::endl;
-        auto umetric = metric;
+        auto umetric = score;
         umetric[0] = toupper(umetric[0]);
         handler << "\\caption{" << umetric << " results(mean $\\pm$ std) for all the algorithms and datasets} " << std::endl;
-        handler << "\\label{tab:results_" << metric << "}" << std::endl;
+        handler << "\\label{tab:results_" << score << "}" << std::endl;
         std::string header_dataset_name = index ? "r" : "l";
         handler << "\\begin{tabular} {{" << header_dataset_name << std::string(models.size(), 'c').c_str() << "}}" << std::endl;
         handler << "\\hline " << std::endl;
@@ -100,7 +100,7 @@ namespace platform {
         handler << "%%" << std::endl;
         handler << "\\begin{table}[htbp]" << std::endl;
         handler << "\\centering" << std::endl;
-        handler << "\\caption{Results of the post-hoc " << kind << " test for the mean " << metric << " of the algorithms.}\\label{ tab:tests }" << std::endl;
+        handler << "\\caption{Results of the post-hoc " << kind << " test for the mean " << score << " of the algorithms.}\\label{ tab:tests }" << std::endl;
         handler << "\\begin{tabular}{lrrrrr}" << std::endl;
         handler << "\\hline" << std::endl;
         handler << "classifier & pvalue & rank & win & tie & loss\\\\" << std::endl;
diff --git a/src/best/BestResultsTex.h b/src/best/BestResultsTex.h
index e587dec..e0a82e0 100644
--- a/src/best/BestResultsTex.h
+++ b/src/best/BestResultsTex.h
@@ -9,14 +9,14 @@ namespace platform {
     using json = nlohmann::ordered_json;
     class BestResultsTex {
     public:
-        BestResultsTex(const std::string metric_, bool dataset_name = true) : metric{ metric_ }, dataset_name{ dataset_name } {};
+        BestResultsTex(const std::string score, bool dataset_name = true) : score{ score }, dataset_name{ dataset_name } {};
         ~BestResultsTex() = default;
         void results_header(const std::vector<std::string>& models, const std::string& date, bool index);
         void results_body(const std::vector<std::string>& datasets, json& table, bool index);
         void results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model);
         void postHoc_test(struct PostHocResult& postHocResult, const std::string& kind, const std::string& date);
     private:
-        std::string metric;
+        std::string score;
         bool dataset_name;
         void openTexFile(const std::string& name);
         std::ofstream handler;
diff --git a/src/best/DeLong.cpp b/src/best/DeLong.cpp
deleted file mode 100644
index dbcc920..0000000
--- a/src/best/DeLong.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// DeLong.cpp
-// Integración del test de DeLong con la clase RocAuc y Statistics
-// Basado en: X. Sun and W. Xu, "Fast Implementation of DeLong’s Algorithm for Comparing the Areas Under Correlated Receiver Operating Characteristic Curves," (2014), y algoritmos inspirados en sklearn/pROC
-
-#include "DeLong.h"
-#include <vector>
-#include <cmath>
-#include <algorithm>
-#include <numeric>
-#include <stdexcept>
-#include <cassert>
-
-namespace platform {
-
-    DeLong::DeLongResult DeLong::compare(const std::vector<double>& aucs_model1,
-        const std::vector<double>& aucs_model2)
-    {
-        if (aucs_model1.size() != aucs_model2.size()) {
-            throw std::invalid_argument("AUC lists must have the same size");
-        }
-
-        size_t N = aucs_model1.size();
-        if (N < 2) {
-            throw std::invalid_argument("At least two AUC values are required");
-        }
-
-        std::vector<double> diffs(N);
-        for (size_t i = 0; i < N; ++i) {
-            diffs[i] = aucs_model1[i] - aucs_model2[i];
-        }
-
-        double mean_diff = std::accumulate(diffs.begin(), diffs.end(), 0.0) / N;
-        double var = 0.0;
-        for (size_t i = 0; i < N; ++i) {
-            var += (diffs[i] - mean_diff) * (diffs[i] - mean_diff);
-        }
-        var /= (N * (N - 1));
-        if (var <= 0.0) var = 1e-10;
-
-        double z = mean_diff / std::sqrt(var);
-        double p = 2.0 * (1.0 - std::erfc(std::abs(z) / std::sqrt(2.0)) / 2.0);
-        return { mean_diff, z, p };
-    }
-
-}
diff --git a/src/best/DeLong.h b/src/best/DeLong.h
deleted file mode 100644
index 07e3cf3..0000000
--- a/src/best/DeLong.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef DELONG_H
-#define DELONG_H
-/* ********************************************************************************************************************
-/* Integración del test de DeLong con la clase RocAuc y Statistics
-/* Basado en: X. Sun and W. Xu, "Fast Implementation of DeLong’s Algorithm for Comparing the Areas Under Correlated
-/* Receiver Operating Characteristic Curves," (2014), y algoritmos inspirados en sklearn/pROC
-/* ********************************************************************************************************************/
-#include <vector>
-
-namespace platform {
-    class DeLong {
-    public:
-        struct DeLongResult {
-            double auc_diff;
-            double z_stat;
-            double p_value;
-        };
-        // Compara dos vectores de AUCs por dataset y devuelve diferencia media,
-        // estadístico z y p-valor usando un test de rangos (DeLong simplificado)
-        static DeLongResult compare(const std::vector<double>& aucs_model1,
-            const std::vector<double>& aucs_model2);
-    };
-}
-#endif // DELONG_H
\ No newline at end of file
diff --git a/src/best/Statistics.cpp b/src/best/Statistics.cpp
index cd40cc1..0c27cef 100644
--- a/src/best/Statistics.cpp
+++ b/src/best/Statistics.cpp
@@ -7,19 +7,25 @@
 #include "BestResultsTex.h"
 #include "BestResultsMd.h"
 #include "Statistics.h"
-#include "DeLong.h"
+#include "WilcoxonTest.hpp"
 
 
 namespace platform {
 
-    Statistics::Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance, bool output) :
-        models(models), datasets(datasets), data(data), significance(significance), output(output)
+    Statistics::Statistics(const std::string& score, const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance, bool output) :
+        score(score), models(models), datasets(datasets), data(data), significance(significance), output(output)
     {
+        if (score == "accuracy") {
+            postHocType = "Holm";
+            hlen = 85;
+        } else {
+            postHocType = "Wilcoxon";
+            hlen = 88;
+        }
         nModels = models.size();
         nDatasets = datasets.size();
         auto temp = ConfigLocale();
     }
-
     void Statistics::fit()
     {
         if (nModels < 3 || nDatasets < 3) {
@@ -28,9 +34,11 @@ namespace platform {
             throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
         }
         ranksModels.clear();
-        computeRanks();
+        computeRanks(); // compute greaterAverage and ranks
         // Set the control model as the one with the lowest average rank
-        controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
+        controlIdx = score == "accuracy" ?
+            distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }))
+            : greaterAverage; // The model with the greater average score
         computeWTL();
         maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
         maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
@@ -67,11 +75,16 @@ namespace platform {
     void Statistics::computeRanks()
     {
         std::map<std::string, float> ranksLine;
+        std::map<std::string, float> averages;
+        for (const auto& model : models) {
+            averages[model] = 0;
+        }
         for (const auto& dataset : datasets) {
             std::vector<std::pair<std::string, double>> ranksOrder;
             for (const auto& model : models) {
                 double value = data[model].at(dataset).at(0).get<double>();
                 ranksOrder.push_back({ model, value });
+                averages[model] += value;
             }
             // Assign the ranks
             ranksLine = assignRanks(ranksOrder);
@@ -89,6 +102,12 @@ namespace platform {
         for (const auto& rank : ranks) {
             ranks[rank.first] /= nDatasets;
         }
+        // Average the scores
+        for (const auto& average : averages) {
+            averages[average.first] /= nDatasets;
+        }
+        // Get the model with the greater average score
+        greaterAverage = distance(averages.begin(), max_element(averages.begin(), averages.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
     }
     void Statistics::computeWTL()
     {
@@ -115,12 +134,36 @@ namespace platform {
             }
         }
     }
+    int Statistics::getControlIdx()
+    {
+        if (!fitted) {
+            fit();
+        }
+        return controlIdx;
+    }
+    void Statistics::postHocTest()
+    {
+        // if (score == "accuracy") {
+        postHocHolmTest();
+        // } else {
+        //     postHocWilcoxonTest();
+        // }
+    }
+    void Statistics::postHocWilcoxonTest()
+    {
+        if (!fitted) {
+            fit();
+        }
+        // Reference: Wilcoxon, F. (1945). “Individual Comparisons by Ranking Methods”. Biometrics Bulletin, 1(6), 80-83.
+        auto wilcoxon = WilcoxonTest(models, datasets, data, significance);
+        controlIdx = wilcoxon.getControlIdx();
+        postHocResult = wilcoxon.getPostHocResult();
+    }
     void Statistics::postHocHolmTest()
     {
         if (!fitted) {
             fit();
         }
-        std::stringstream oss;
         // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
         // Post-hoc Holm test
         // Calculate the p-value for the models paired with the control model
@@ -155,15 +198,15 @@ namespace platform {
         postHocResult.model = models.at(controlIdx);
     }
 
-    void Statistics::postHocTestReport(const std::string& kind, const std::string& metric, bool friedmanResult, bool tex)
+    void Statistics::postHocTestReport(bool friedmanResult, bool tex)
     {
 
         std::stringstream oss;
         postHocResult.model = models.at(controlIdx);
         auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
         oss << color;
-        oss << "  *************************************************************************************************************" << std::endl;
-        oss << "  Post-hoc " << kind << " test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl;
+        oss << "  " << std::string(hlen + 25, '*') << std::endl;
+        oss << "  Post-hoc " << postHocType << " test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl;
         oss << "  Control model: " << models.at(controlIdx) << std::endl;
         oss << "  " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value      rank      win tie loss Status" << std::endl;
         oss << "  " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl;
@@ -198,83 +241,18 @@ namespace platform {
             oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss;
             oss << " " << status << textStatus << std::endl;
         }
-        oss << color << "  *************************************************************************************************************" << std::endl;
+        oss << color << "  " << std::string(hlen + 25, '*') << std::endl;
         oss << Colors::RESET();
         if (output) {
             std::cout << oss.str();
         }
         if (tex) {
-            BestResultsTex bestResultsTex(metric);
+            BestResultsTex bestResultsTex(score);
             BestResultsMd bestResultsMd;
-            bestResultsTex.postHoc_test(postHocResult, kind, get_date() + " " + get_time());
-            bestResultsMd.postHoc_test(postHocResult, kind, get_date() + " " + get_time());
+            bestResultsTex.postHoc_test(postHocResult, postHocType, get_date() + " " + get_time());
+            bestResultsMd.postHoc_test(postHocResult, postHocType, get_date() + " " + get_time());
         }
     }
-    // void Statistics::postHocDeLongTest(const std::vector<std::vector<int>>& y_trues,
-    //     const std::vector<std::vector<std::vector<double>>>& y_probas,
-    //     bool tex)
-    // {
-    //     std::map<int, double> pvalues;
-    //     postHocResult.model = models.at(controlIdx);
-    //     postHocResult.postHocLines.clear();
-
-    //     for (size_t i = 0; i < models.size(); ++i) {
-    //         if ((int)i == controlIdx) continue;
-    //         double acc_p = 0.0;
-    //         int valid = 0;
-    //         for (size_t d = 0; d < y_trues.size(); ++d) {
-    //             try {
-    //                 auto result = compareModelsWithDeLong(y_probas[controlIdx][d], y_probas[i][d], y_trues[d]);
-    //                 acc_p += result.p_value;
-    //                 ++valid;
-    //             }
-    //             catch (...) {}
-    //         }
-    //         if (valid > 0) {
-    //             pvalues[i] = acc_p / valid;
-    //         }
-    //     }
-
-    //     std::vector<std::pair<int, double>> sorted_pvalues(pvalues.begin(), pvalues.end());
-    //     std::sort(sorted_pvalues.begin(), sorted_pvalues.end(), [](const auto& a, const auto& b) {
-    //         return a.second < b.second;
-    //         });
-
-    //     std::stringstream oss;
-    //     oss << "\n*************************************************************************************************************\n";
-    //     oss << "  Post-hoc DeLong-Holm test: H0: 'No significant differences in AUC with control model.'\n";
-    //     oss << "  Control model: " << models[controlIdx] << "\n";
-    //     oss << "  " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value      Adjusted    Result\n";
-    //     oss << "  " << std::string(maxModelName, '=') << " ============ ========== =============\n";
-
-    //     double prev = 0.0;
-    //     for (size_t i = 0; i < sorted_pvalues.size(); ++i) {
-    //         int idx = sorted_pvalues[i].first;
-    //         double raw = sorted_pvalues[i].second;
-    //         double adj = std::min(1.0, raw * (models.size() - i - 1));
-    //         adj = std::max(prev, adj);
-    //         prev = adj;
-    //         bool reject = adj < significance;
-
-    //         postHocResult.postHocLines.push_back({ models[idx], adj, 0.0f, {}, reject });
-
-    //         auto color = reject ? Colors::MAGENTA() : Colors::GREEN();
-    //         auto status = reject ? Symbols::cross : Symbols::check_mark;
-    //         auto textStatus = reject ? " rejected H0" : " accepted H0";
-    //         oss << "  " << color << std::left << std::setw(maxModelName) << models[idx] << " ";
-    //         oss << std::setprecision(6) << std::scientific << raw << " ";
-    //         oss << std::setprecision(6) << std::scientific << adj << " " << status << textStatus << "\n";
-    //     }
-    //     oss << Colors::CYAN() << "  *************************************************************************************************************\n";
-    //     oss << Colors::RESET();
-    //     if (output) std::cout << oss.str();
-    //     if (tex) {
-    //         BestResultsTex bestResultsTex;
-    //         BestResultsMd bestResultsMd;
-    //         bestResultsTex.holm_test(postHocResult, get_date() + " " + get_time());
-    //         bestResultsMd.holm_test(postHocResult, get_date() + " " + get_time());
-    //     }
-    // }
     bool Statistics::friedmanTest()
     {
         if (!fitted) {
@@ -284,7 +262,7 @@ namespace platform {
         // Friedman test
         // Calculate the Friedman statistic
         oss << Colors::BLUE() << std::endl;
-        oss << "***************************************************************************************************************" << std::endl;
+        oss << std::string(hlen, '*') << std::endl;
         oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl;
         double degreesOfFreedom = nModels - 1.0;
         double sumSquared = 0;
@@ -309,7 +287,7 @@ namespace platform {
             oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl;
             result = false;
         }
-        oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << std::endl;
+        oss << Colors::BLUE() << std::string(hlen, '*') << Colors::RESET() << std::endl;
         if (output) {
             std::cout << oss.str();
         }
diff --git a/src/best/Statistics.h b/src/best/Statistics.h
index 285f34c..765ed1d 100644
--- a/src/best/Statistics.h
+++ b/src/best/Statistics.h
@@ -32,17 +32,22 @@ namespace platform {
     };
     class Statistics {
     public:
-        Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true);
+        Statistics(const std::string& score, const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true);
         bool friedmanTest();
-        void postHocHolmTest();
-        void postHocTestReport(const std::string& kind, const std::string& metric, bool friedmanResult, bool tex);
+        void postHocTest();
+        void postHocTestReport(bool friedmanResult, bool tex);
+        int getControlIdx();
         FriedmanResult& getFriedmanResult();
         PostHocResult& getPostHocResult();
         std::map<std::string, std::map<std::string, float>>& getRanks();
     private:
         void fit();
+        void postHocHolmTest();
+        void postHocWilcoxonTest();
         void computeRanks();
         void computeWTL();
+        const std::string& score;
+        std::string postHocType;
         const std::vector<std::string>& models;
         const std::vector<std::string>& datasets;
         const json& data;
@@ -52,11 +57,13 @@ namespace platform {
         int nModels = 0;
         int nDatasets = 0;
         int controlIdx = 0;
+        int greaterAverage = -1; // The model with the greater average score
         std::map<int, WTL> wtl;
         std::map<std::string, float> ranks;
         std::vector<std::pair<int, double>> postHocData;
         int maxModelName = 0;
         int maxDatasetName = 0;
+        int hlen; // length of the line
         FriedmanResult friedmanResult;
         PostHocResult postHocResult;
         std::map<std::string, std::map<std::string, float>> ranksModels;
diff --git a/src/best/WilcoxonTest.hpp b/src/best/WilcoxonTest.hpp
new file mode 100644
index 0000000..34c2969
--- /dev/null
+++ b/src/best/WilcoxonTest.hpp
@@ -0,0 +1,250 @@
+#ifndef BEST_WILCOXON_TEST_HPP
+#define BEST_WILCOXON_TEST_HPP
+// WilcoxonTest.hpp
+// Stand‑alone class for paired Wilcoxon signed‑rank post‑hoc analysis
+// ------------------------------------------------------------------
+//  * Constructor takes the *already‑loaded* nlohmann::json object plus the
+//    vectors of model and dataset names.
+//  * Internally selects a control model (highest average AUC) and builds all
+//    statistics (ranks, W/T/L counts, Wilcoxon p‑values).
+//  * Public API:
+//        int                  getControlIdx()      const;
+//        PostHocResult        getPostHocResult()   const;
+//
+#include <vector>
+#include <string>
+#include <cmath>
+#include <algorithm>
+#include <numeric>
+#include <limits>
+#include <nlohmann/json.hpp>
+#include "Statistics.h"
+
+namespace platform {
+    class WilcoxonTest {
+    public:
+        WilcoxonTest(const std::vector<std::string>& models,
+            const std::vector<std::string>& datasets,
+            const json& data,
+            double                          alpha = 0.05)
+            : models_(models), datasets_(datasets), data_(data), alpha_(alpha)
+        {
+            buildAUCTable();              // extracts all AUCs into a dense matrix
+            computeAverageAUCs();         // per‑model mean (→ control selection)
+            computeAverageRanks();        // Friedman‑style ranks per model
+            selectControlModel();         // sets control_idx_
+            buildPostHocResult();         // fills postHocResult_
+        }
+
+        //---------------------------------------------------- public API ----
+        int getControlIdx() const noexcept { return control_idx_; }
+
+        const PostHocResult& getPostHocResult() const noexcept { return postHocResult_; }
+
+    private:
+        //-------------------------------------------------- helper structs ----
+        // When a value is missing we keep NaN so that ordinary arithmetic still
+        // works (NaN simply propagates and we can test with std::isnan).
+        using Matrix = std::vector<std::vector<double>>; // [model][dataset]
+
+        //------------------------------------------------- implementation ----
+        void buildAUCTable()
+        {
+            const std::size_t M = models_.size();
+            const std::size_t D = datasets_.size();
+            auc_.assign(M, std::vector<double>(D, std::numeric_limits<double>::quiet_NaN()));
+
+            for (std::size_t i = 0; i < M; ++i) {
+                const auto& model = models_[i];
+                for (std::size_t j = 0; j < D; ++j) {
+                    const auto& ds = datasets_[j];
+                    try {
+                        auc_[i][j] = data_.at(model).at(ds).at(0).get<double>();
+                    }
+                    catch (...) {
+                        // leave as NaN when value missing
+                    }
+                }
+            }
+        }
+
+        void computeAverageAUCs()
+        {
+            const std::size_t M = models_.size();
+            avg_auc_.resize(M, std::numeric_limits<double>::quiet_NaN());
+
+            for (std::size_t i = 0; i < M; ++i) {
+                double sum = 0.0;
+                std::size_t cnt = 0;
+                for (double v : auc_[i]) {
+                    if (!std::isnan(v)) { sum += v; ++cnt; }
+                }
+                avg_auc_[i] = cnt ? sum / cnt : std::numeric_limits<double>::quiet_NaN();
+            }
+        }
+
+        // Average rank across datasets (1 = best).
+        void computeAverageRanks()
+        {
+            const std::size_t M = models_.size();
+            const std::size_t D = datasets_.size();
+            rank_sum_.assign(M, 0.0);
+            rank_cnt_.assign(M, 0);
+
+            const double EPS = 1e-10;
+
+            for (std::size_t j = 0; j < D; ++j) {
+                // Collect present values for this dataset
+                std::vector<std::pair<double, std::size_t>> vals; // (auc, model_idx)
+                vals.reserve(M);
+                for (std::size_t i = 0; i < M; ++i) {
+                    if (!std::isnan(auc_[i][j]))
+                        vals.emplace_back(auc_[i][j], i);
+                }
+                if (vals.empty()) continue; // no info for this dataset
+
+                // Sort descending (higher AUC better)
+                std::sort(vals.begin(), vals.end(), [](auto a, auto b) {
+                    return a.first > b.first;
+                    });
+
+                // Assign ranks with average for ties
+                std::size_t k = 0;
+                while (k < vals.size()) {
+                    std::size_t l = k + 1;
+                    while (l < vals.size() && std::fabs(vals[l].first - vals[k].first) < EPS) ++l;
+                    const double avg_rank = (k + 1 + l) * 0.5; // average of ranks (1‑based)
+                    for (std::size_t m = k; m < l; ++m) {
+                        const auto idx = vals[m].second;
+                        rank_sum_[idx] += avg_rank;
+                        ++rank_cnt_[idx];
+                    }
+                    k = l;
+                }
+            }
+
+            // Final average
+            avg_rank_.resize(M, std::numeric_limits<double>::quiet_NaN());
+            for (std::size_t i = 0; i < M; ++i) {
+                avg_rank_[i] = rank_cnt_[i] ? rank_sum_[i] / rank_cnt_[i]
+                    : std::numeric_limits<double>::quiet_NaN();
+            }
+        }
+
+        void selectControlModel()
+        {
+            // pick model with highest average AUC (ties → first)
+            control_idx_ = 0;
+            for (std::size_t i = 1; i < avg_auc_.size(); ++i) {
+                if (avg_auc_[i] > avg_auc_[control_idx_]) control_idx_ = static_cast<int>(i);
+            }
+        }
+
+        void buildPostHocResult()
+        {
+            const std::size_t M = models_.size();
+            const std::size_t D = datasets_.size();
+            const std::string& control_name = models_[control_idx_];
+
+            postHocResult_.model = control_name;
+
+            const double practical_threshold = 0.0005; // same heuristic as original code
+
+            for (std::size_t i = 0; i < M; ++i) {
+                if (static_cast<int>(i) == control_idx_) continue;
+
+                PostHocLine line;
+                line.model = models_[i];
+                line.rank = avg_rank_[i];
+
+                WTL wtl;
+                std::vector<double> differences;
+                differences.reserve(D);
+
+                for (std::size_t j = 0; j < D; ++j) {
+                    double auc_control = auc_[control_idx_][j];
+                    double auc_other = auc_[i][j];
+                    if (std::isnan(auc_control) || std::isnan(auc_other)) continue;
+
+                    double diff = auc_control - auc_other; // control − comparison
+                    if (std::fabs(diff) <= practical_threshold) {
+                        ++wtl.tie;
+                    } else if (diff < 0) {
+                        ++wtl.win;   // comparison wins
+                    } else {
+                        ++wtl.loss; // control wins
+                    }
+                    differences.push_back(diff);
+                }
+
+                line.wtl = wtl;
+                line.pvalue = differences.empty() ? 1.0L : static_cast<long double>(wilcoxonSignedRankTest(differences));
+                line.reject = (line.pvalue < alpha_);
+
+                postHocResult_.postHocLines.push_back(std::move(line));
+            }
+        }
+
+        // ------------------------------------------------ Wilcoxon (private) --
+        static double wilcoxonSignedRankTest(const std::vector<double>& diffs)
+        {
+            if (diffs.empty()) return 1.0;
+
+            // Build |diff| + sign vector (exclude zeros)
+            struct Node { double absval; int sign; };
+            std::vector<Node> v;
+            v.reserve(diffs.size());
+            for (double d : diffs) {
+                if (d != 0.0) v.push_back({ std::fabs(d), d > 0 ? 1 : -1 });
+            }
+            if (v.empty()) return 1.0;
+
+            // Sort by absolute value
+            std::sort(v.begin(), v.end(), [](const Node& a, const Node& b) { return a.absval < b.absval; });
+
+            const double EPS = 1e-10;
+            const std::size_t n = v.size();
+            std::vector<double> ranks(n, 0.0);
+
+            std::size_t i = 0;
+            while (i < n) {
+                std::size_t j = i + 1;
+                while (j < n && std::fabs(v[j].absval - v[i].absval) < EPS) ++j;
+                double avg_rank = (i + 1 + j) * 0.5; // 1‑based ranks
+                for (std::size_t k = i; k < j; ++k) ranks[k] = avg_rank;
+                i = j;
+            }
+
+            double w_plus = 0.0, w_minus = 0.0;
+            for (std::size_t k = 0; k < n; ++k) {
+                if (v[k].sign > 0) w_plus += ranks[k];
+                else                w_minus += ranks[k];
+            }
+            double w = std::min(w_plus, w_minus);
+            double mean_w = n * (n + 1) / 4.0;
+            double sd_w = std::sqrt(n * (n + 1) * (2 * n + 1) / 24.0);
+            if (sd_w == 0.0) return 1.0; // degenerate (all diffs identical)
+
+            double z = (w - mean_w) / sd_w;
+            double p_two = std::erfc(std::fabs(z) / std::sqrt(2.0)); // 2‑sided tail
+            return p_two;
+        }
+
+        //-------------------------------------------------------- data ----
+        std::vector<std::string> models_;
+        std::vector<std::string> datasets_;
+        json                       data_;
+        double                     alpha_;
+
+        Matrix                     auc_;         // [model][dataset]
+        std::vector<double>        avg_auc_;     // mean AUC per model
+        std::vector<double>        avg_rank_;    // mean rank per model
+        std::vector<double>        rank_sum_;    // helper for ranks
+        std::vector<int>           rank_cnt_;    // datasets counted per model
+
+        int                        control_idx_ = -1;
+        PostHocResult              postHocResult_;
+    };
+
+} // namespace stats
+#endif // BEST_WILCOXON_TEST_HPP
\ No newline at end of file
diff --git a/src/commands/b_grid.cpp b/src/commands/b_grid.cpp
index 7e246a5..b1c6244 100644
--- a/src/commands/b_grid.cpp
+++ b/src/commands/b_grid.cpp
@@ -231,8 +231,8 @@ void experiment(argparse::ArgumentParser& program)
 {
     struct platform::ConfigGrid config;
     auto arguments = platform::ArgumentsExperiment(program, platform::experiment_t::GRID);
-    auto path_results = arguments.getPathResults();
     arguments.parse();
+    auto path_results = arguments.getPathResults();
     auto grid_experiment = platform::GridExperiment(arguments, config);
     platform::Timer timer;
     timer.start();
diff --git a/src/main/ArgumentsExperiment.cpp b/src/main/ArgumentsExperiment.cpp
index 8d778ba..58bf990 100644
--- a/src/main/ArgumentsExperiment.cpp
+++ b/src/main/ArgumentsExperiment.cpp
@@ -215,10 +215,35 @@ namespace platform {
             test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
         }
     }
+    std::string getGppVersion()
+    {
+        std::string result;
+        std::array<char, 128> buffer;
+
+        // Run g++ --version and capture the output
+        std::unique_ptr<FILE, decltype(&pclose)> pipe(popen("g++ --version", "r"), pclose);
+
+        if (!pipe) {
+            return "Error executing g++ --version command";
+        }
+
+        // Read the first line of output (which contains the version info)
+        if (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
+            result = buffer.data();
+            // Remove trailing newline if present
+            if (!result.empty() && result[result.length() - 1] == '\n') {
+                result.erase(result.length() - 1);
+            }
+        } else {
+            return "No output from g++ --version command";
+        }
+
+        return result;
+    }
     Experiment& ArgumentsExperiment::initializedExperiment()
     {
         auto env = platform::DotEnv();
-        experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1");
+        experiment.setTitle(title).setLanguage("c++").setLanguageVersion(getGppVersion());
         experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat);
         experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
         experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score);
diff --git a/src/main/Experiment.cpp b/src/main/Experiment.cpp
index 438735d..e240e7c 100644
--- a/src/main/Experiment.cpp
+++ b/src/main/Experiment.cpp
@@ -245,8 +245,6 @@ namespace platform {
                 // Train model
                 //
                 clf->fit(X_train, y_train, features, className, states, smooth_type);
-                if (!quiet)
-                    showProgress(nfold + 1, getColor(clf->getStatus()), "b");
                 auto clf_notes = clf->getNotes();
                 std::transform(clf_notes.begin(), clf_notes.end(), std::back_inserter(notes), [nfold](const std::string& note)
                     { return "Fold " + std::to_string(nfold) + ": " + note; });
@@ -259,6 +257,8 @@ namespace platform {
                 // Score train
                 //
                 if (!no_train_score) {
+                    if (!quiet)
+                        showProgress(nfold + 1, getColor(clf->getStatus()), "b");
                     auto y_proba_train = clf->predict_proba(X_train);
                     Scores scores(y_train, y_proba_train, num_classes, labels);
                     score_train_value = score == score_t::ACCURACY ? scores.accuracy() : scores.auc();