From a56ec98ef9f1894e50b7a8380a2b698484a12fb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Wed, 21 May 2025 11:51:04 +0200 Subject: [PATCH] Add Wilcoxon Test --- src/CMakeLists.txt | 2 +- src/best/BestResults.cpp | 29 +--- src/best/BestResultsExcel.cpp | 10 +- src/best/BestResultsExcel.h | 3 +- src/best/BestResultsTex.cpp | 6 +- src/best/BestResultsTex.h | 4 +- src/best/DeLong.cpp | 45 ------ src/best/DeLong.h | 24 --- src/best/Statistics.cpp | 140 ++++++++--------- src/best/Statistics.h | 13 +- src/best/WilcoxonTest.hpp | 250 +++++++++++++++++++++++++++++++ src/commands/b_grid.cpp | 2 +- src/main/ArgumentsExperiment.cpp | 27 +++- src/main/Experiment.cpp | 4 +- 14 files changed, 369 insertions(+), 190 deletions(-) delete mode 100644 src/best/DeLong.cpp delete mode 100644 src/best/DeLong.h create mode 100644 src/best/WilcoxonTest.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index be63bee..b89cebc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,7 +13,7 @@ include_directories( # b_best add_executable( b_best commands/b_best.cpp best/Statistics.cpp - best/BestResultsExcel.cpp best/BestResultsTex.cpp best/BestResultsMd.cpp best/BestResults.cpp best/DeLong.cpp + best/BestResultsExcel.cpp best/BestResultsTex.cpp best/BestResultsMd.cpp best/BestResults.cpp common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp main/Models.cpp main/Scores.cpp reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp diff --git a/src/best/BestResults.cpp b/src/best/BestResults.cpp index 21da49e..5e7c349 100644 --- a/src/best/BestResults.cpp +++ b/src/best/BestResults.cpp @@ -321,7 +321,7 @@ namespace platform { // Build the table of results json table = buildTableResults(models); std::vector datasets = getDatasets(table.begin().value()); - BestResultsExcel excel_report(score, datasets); + BestResultsExcel excel_report(path, score, datasets); excel_report.reportSingle(model, path + Paths::bestResultsFile(score, model)); messageOutputFile("Excel", excel_report.getFileName()); } @@ -337,10 +337,10 @@ namespace platform { // Compute the Friedman test std::map> ranksModels; if (friedman) { - Statistics stats(models, datasets, table, significance); + Statistics stats(score, models, datasets, table, significance); auto result = stats.friedmanTest(); - stats.postHocHolmTest(); - stats.postHocTestReport("Holm", score, result, tex); + stats.postHocTest(); + stats.postHocTestReport(result, tex); ranksModels = stats.getRanks(); } if (tex) { @@ -352,24 +352,11 @@ namespace platform { } } if (excel) { - BestResultsExcel excel(score, datasets); + BestResultsExcel excel(path, score, datasets); excel.reportAll(models, table, ranksModels, friedman, significance); if (friedman) { - int idx = -1; - double min = 2000; - // Find out the control model - auto totals = std::vector(models.size(), 0.0); - for (const auto& dataset_ : datasets) { - for (int i = 0; i < models.size(); ++i) { - totals[i] += ranksModels[dataset_][models[i]]; - } - } - for (int i = 0; i < models.size(); ++i) { - if (totals[i] < min) { - min = totals[i]; - idx = i; - } - } + Statistics stats(score, models, datasets, table, significance); + int idx = stats.getControlIdx(); model = models.at(idx); excel.reportSingle(model, path + Paths::bestResultsFile(score, model)); } @@ -378,7 +365,7 @@ namespace platform { } void BestResults::messageOutputFile(const std::string& title, const std::string& fileName) { - std::cout << Colors::YELLOW() << "** " << std::setw(5) << std::left << title + std::cout << Colors::YELLOW() << "** " << std::setw(8) << std::left << title << " file generated: " << fileName << Colors::RESET() << std::endl; } } \ No newline at end of file diff --git a/src/best/BestResultsExcel.cpp b/src/best/BestResultsExcel.cpp index fb7b864..36cfcb3 100644 --- a/src/best/BestResultsExcel.cpp +++ b/src/best/BestResultsExcel.cpp @@ -30,7 +30,7 @@ namespace platform { } return columnName; } - BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector& datasets) : score(score), datasets(datasets) + BestResultsExcel::BestResultsExcel(const std::string& path, const std::string& score, const std::vector& datasets) : path(path), score(score), datasets(datasets) { file_name = Paths::bestResultsExcel(score); workbook = workbook_new(getFileName().c_str()); @@ -92,7 +92,7 @@ namespace platform { catch (const std::out_of_range& oor) { auto tabName = "table_" + std::to_string(i); auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str()); - json data = loadResultData(Paths::results() + fileName); + json data = loadResultData(path + fileName); auto report = ReportExcel(data, false, workbook, worksheetNew); report.show(); hyperlink = "#table_" + std::to_string(i); @@ -241,10 +241,10 @@ namespace platform { } worksheet_merge_range(worksheet, 0, 0, 0, 7, "Friedman Test", styles["headerFirst"]); row = 2; - Statistics stats(models, datasets, table, significance, false); + Statistics stats(score, models, datasets, table, significance, false); // No output auto result = stats.friedmanTest(); - stats.postHocHolmTest(); - // stats.postHocTestReport("Holm", result, false); + stats.postHocTest(); + stats.postHocTestReport(result, false); // No tex output auto friedmanResult = stats.getFriedmanResult(); auto postHocResult = stats.getPostHocResult(); worksheet_merge_range(worksheet, row, 0, row, 7, "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]); diff --git a/src/best/BestResultsExcel.h b/src/best/BestResultsExcel.h index 6c70a49..bd8bf94 100644 --- a/src/best/BestResultsExcel.h +++ b/src/best/BestResultsExcel.h @@ -10,7 +10,7 @@ namespace platform { using json = nlohmann::ordered_json; class BestResultsExcel : public ExcelFile { public: - BestResultsExcel(const std::string& score, const std::vector& datasets); + BestResultsExcel(const std::string& path, const std::string& score, const std::vector& datasets); ~BestResultsExcel(); void reportAll(const std::vector& models, const json& table, const std::map>& ranks, bool friedman, double significance); void reportSingle(const std::string& model, const std::string& fileName); @@ -22,6 +22,7 @@ namespace platform { void formatColumns(); void doFriedman(); void addConditionalFormat(std::string formula); + std::string path; std::string score; std::vector models; std::vector datasets; diff --git a/src/best/BestResultsTex.cpp b/src/best/BestResultsTex.cpp index 39e17d9..cf4827f 100644 --- a/src/best/BestResultsTex.cpp +++ b/src/best/BestResultsTex.cpp @@ -27,10 +27,10 @@ namespace platform { handler << "\\tiny " << std::endl; handler << "\\renewcommand{\\arraystretch }{1.2} " << std::endl; handler << "\\renewcommand{\\tabcolsep }{0.07cm} " << std::endl; - auto umetric = metric; + auto umetric = score; umetric[0] = toupper(umetric[0]); handler << "\\caption{" << umetric << " results(mean $\\pm$ std) for all the algorithms and datasets} " << std::endl; - handler << "\\label{tab:results_" << metric << "}" << std::endl; + handler << "\\label{tab:results_" << score << "}" << std::endl; std::string header_dataset_name = index ? "r" : "l"; handler << "\\begin{tabular} {{" << header_dataset_name << std::string(models.size(), 'c').c_str() << "}}" << std::endl; handler << "\\hline " << std::endl; @@ -100,7 +100,7 @@ namespace platform { handler << "%%" << std::endl; handler << "\\begin{table}[htbp]" << std::endl; handler << "\\centering" << std::endl; - handler << "\\caption{Results of the post-hoc " << kind << " test for the mean " << metric << " of the algorithms.}\\label{ tab:tests }" << std::endl; + handler << "\\caption{Results of the post-hoc " << kind << " test for the mean " << score << " of the algorithms.}\\label{ tab:tests }" << std::endl; handler << "\\begin{tabular}{lrrrrr}" << std::endl; handler << "\\hline" << std::endl; handler << "classifier & pvalue & rank & win & tie & loss\\\\" << std::endl; diff --git a/src/best/BestResultsTex.h b/src/best/BestResultsTex.h index e587dec..e0a82e0 100644 --- a/src/best/BestResultsTex.h +++ b/src/best/BestResultsTex.h @@ -9,14 +9,14 @@ namespace platform { using json = nlohmann::ordered_json; class BestResultsTex { public: - BestResultsTex(const std::string metric_, bool dataset_name = true) : metric{ metric_ }, dataset_name{ dataset_name } {}; + BestResultsTex(const std::string score, bool dataset_name = true) : score{ score }, dataset_name{ dataset_name } {}; ~BestResultsTex() = default; void results_header(const std::vector& models, const std::string& date, bool index); void results_body(const std::vector& datasets, json& table, bool index); void results_footer(const std::map>& totals, const std::string& best_model); void postHoc_test(struct PostHocResult& postHocResult, const std::string& kind, const std::string& date); private: - std::string metric; + std::string score; bool dataset_name; void openTexFile(const std::string& name); std::ofstream handler; diff --git a/src/best/DeLong.cpp b/src/best/DeLong.cpp deleted file mode 100644 index dbcc920..0000000 --- a/src/best/DeLong.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// DeLong.cpp -// Integración del test de DeLong con la clase RocAuc y Statistics -// Basado en: X. Sun and W. Xu, "Fast Implementation of DeLong’s Algorithm for Comparing the Areas Under Correlated Receiver Operating Characteristic Curves," (2014), y algoritmos inspirados en sklearn/pROC - -#include "DeLong.h" -#include -#include -#include -#include -#include -#include - -namespace platform { - - DeLong::DeLongResult DeLong::compare(const std::vector& aucs_model1, - const std::vector& aucs_model2) - { - if (aucs_model1.size() != aucs_model2.size()) { - throw std::invalid_argument("AUC lists must have the same size"); - } - - size_t N = aucs_model1.size(); - if (N < 2) { - throw std::invalid_argument("At least two AUC values are required"); - } - - std::vector diffs(N); - for (size_t i = 0; i < N; ++i) { - diffs[i] = aucs_model1[i] - aucs_model2[i]; - } - - double mean_diff = std::accumulate(diffs.begin(), diffs.end(), 0.0) / N; - double var = 0.0; - for (size_t i = 0; i < N; ++i) { - var += (diffs[i] - mean_diff) * (diffs[i] - mean_diff); - } - var /= (N * (N - 1)); - if (var <= 0.0) var = 1e-10; - - double z = mean_diff / std::sqrt(var); - double p = 2.0 * (1.0 - std::erfc(std::abs(z) / std::sqrt(2.0)) / 2.0); - return { mean_diff, z, p }; - } - -} diff --git a/src/best/DeLong.h b/src/best/DeLong.h deleted file mode 100644 index 07e3cf3..0000000 --- a/src/best/DeLong.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef DELONG_H -#define DELONG_H -/* ******************************************************************************************************************** -/* Integración del test de DeLong con la clase RocAuc y Statistics -/* Basado en: X. Sun and W. Xu, "Fast Implementation of DeLong’s Algorithm for Comparing the Areas Under Correlated -/* Receiver Operating Characteristic Curves," (2014), y algoritmos inspirados en sklearn/pROC -/* ********************************************************************************************************************/ -#include - -namespace platform { - class DeLong { - public: - struct DeLongResult { - double auc_diff; - double z_stat; - double p_value; - }; - // Compara dos vectores de AUCs por dataset y devuelve diferencia media, - // estadístico z y p-valor usando un test de rangos (DeLong simplificado) - static DeLongResult compare(const std::vector& aucs_model1, - const std::vector& aucs_model2); - }; -} -#endif // DELONG_H \ No newline at end of file diff --git a/src/best/Statistics.cpp b/src/best/Statistics.cpp index cd40cc1..0c27cef 100644 --- a/src/best/Statistics.cpp +++ b/src/best/Statistics.cpp @@ -7,19 +7,25 @@ #include "BestResultsTex.h" #include "BestResultsMd.h" #include "Statistics.h" -#include "DeLong.h" +#include "WilcoxonTest.hpp" namespace platform { - Statistics::Statistics(const std::vector& models, const std::vector& datasets, const json& data, double significance, bool output) : - models(models), datasets(datasets), data(data), significance(significance), output(output) + Statistics::Statistics(const std::string& score, const std::vector& models, const std::vector& datasets, const json& data, double significance, bool output) : + score(score), models(models), datasets(datasets), data(data), significance(significance), output(output) { + if (score == "accuracy") { + postHocType = "Holm"; + hlen = 85; + } else { + postHocType = "Wilcoxon"; + hlen = 88; + } nModels = models.size(); nDatasets = datasets.size(); auto temp = ConfigLocale(); } - void Statistics::fit() { if (nModels < 3 || nDatasets < 3) { @@ -28,9 +34,11 @@ namespace platform { throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); } ranksModels.clear(); - computeRanks(); + computeRanks(); // compute greaterAverage and ranks // Set the control model as the one with the lowest average rank - controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); + controlIdx = score == "accuracy" ? + distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })) + : greaterAverage; // The model with the greater average score computeWTL(); maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); @@ -67,11 +75,16 @@ namespace platform { void Statistics::computeRanks() { std::map ranksLine; + std::map averages; + for (const auto& model : models) { + averages[model] = 0; + } for (const auto& dataset : datasets) { std::vector> ranksOrder; for (const auto& model : models) { double value = data[model].at(dataset).at(0).get(); ranksOrder.push_back({ model, value }); + averages[model] += value; } // Assign the ranks ranksLine = assignRanks(ranksOrder); @@ -89,6 +102,12 @@ namespace platform { for (const auto& rank : ranks) { ranks[rank.first] /= nDatasets; } + // Average the scores + for (const auto& average : averages) { + averages[average.first] /= nDatasets; + } + // Get the model with the greater average score + greaterAverage = distance(averages.begin(), max_element(averages.begin(), averages.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); } void Statistics::computeWTL() { @@ -115,12 +134,36 @@ namespace platform { } } } + int Statistics::getControlIdx() + { + if (!fitted) { + fit(); + } + return controlIdx; + } + void Statistics::postHocTest() + { + // if (score == "accuracy") { + postHocHolmTest(); + // } else { + // postHocWilcoxonTest(); + // } + } + void Statistics::postHocWilcoxonTest() + { + if (!fitted) { + fit(); + } + // Reference: Wilcoxon, F. (1945). “Individual Comparisons by Ranking Methods”. Biometrics Bulletin, 1(6), 80-83. + auto wilcoxon = WilcoxonTest(models, datasets, data, significance); + controlIdx = wilcoxon.getControlIdx(); + postHocResult = wilcoxon.getPostHocResult(); + } void Statistics::postHocHolmTest() { if (!fitted) { fit(); } - std::stringstream oss; // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8 // Post-hoc Holm test // Calculate the p-value for the models paired with the control model @@ -155,15 +198,15 @@ namespace platform { postHocResult.model = models.at(controlIdx); } - void Statistics::postHocTestReport(const std::string& kind, const std::string& metric, bool friedmanResult, bool tex) + void Statistics::postHocTestReport(bool friedmanResult, bool tex) { std::stringstream oss; postHocResult.model = models.at(controlIdx); auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW(); oss << color; - oss << " *************************************************************************************************************" << std::endl; - oss << " Post-hoc " << kind << " test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl; + oss << " " << std::string(hlen + 25, '*') << std::endl; + oss << " Post-hoc " << postHocType << " test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl; oss << " Control model: " << models.at(controlIdx) << std::endl; oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl; oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl; @@ -198,83 +241,18 @@ namespace platform { oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss; oss << " " << status << textStatus << std::endl; } - oss << color << " *************************************************************************************************************" << std::endl; + oss << color << " " << std::string(hlen + 25, '*') << std::endl; oss << Colors::RESET(); if (output) { std::cout << oss.str(); } if (tex) { - BestResultsTex bestResultsTex(metric); + BestResultsTex bestResultsTex(score); BestResultsMd bestResultsMd; - bestResultsTex.postHoc_test(postHocResult, kind, get_date() + " " + get_time()); - bestResultsMd.postHoc_test(postHocResult, kind, get_date() + " " + get_time()); + bestResultsTex.postHoc_test(postHocResult, postHocType, get_date() + " " + get_time()); + bestResultsMd.postHoc_test(postHocResult, postHocType, get_date() + " " + get_time()); } } - // void Statistics::postHocDeLongTest(const std::vector>& y_trues, - // const std::vector>>& y_probas, - // bool tex) - // { - // std::map pvalues; - // postHocResult.model = models.at(controlIdx); - // postHocResult.postHocLines.clear(); - - // for (size_t i = 0; i < models.size(); ++i) { - // if ((int)i == controlIdx) continue; - // double acc_p = 0.0; - // int valid = 0; - // for (size_t d = 0; d < y_trues.size(); ++d) { - // try { - // auto result = compareModelsWithDeLong(y_probas[controlIdx][d], y_probas[i][d], y_trues[d]); - // acc_p += result.p_value; - // ++valid; - // } - // catch (...) {} - // } - // if (valid > 0) { - // pvalues[i] = acc_p / valid; - // } - // } - - // std::vector> sorted_pvalues(pvalues.begin(), pvalues.end()); - // std::sort(sorted_pvalues.begin(), sorted_pvalues.end(), [](const auto& a, const auto& b) { - // return a.second < b.second; - // }); - - // std::stringstream oss; - // oss << "\n*************************************************************************************************************\n"; - // oss << " Post-hoc DeLong-Holm test: H0: 'No significant differences in AUC with control model.'\n"; - // oss << " Control model: " << models[controlIdx] << "\n"; - // oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value Adjusted Result\n"; - // oss << " " << std::string(maxModelName, '=') << " ============ ========== =============\n"; - - // double prev = 0.0; - // for (size_t i = 0; i < sorted_pvalues.size(); ++i) { - // int idx = sorted_pvalues[i].first; - // double raw = sorted_pvalues[i].second; - // double adj = std::min(1.0, raw * (models.size() - i - 1)); - // adj = std::max(prev, adj); - // prev = adj; - // bool reject = adj < significance; - - // postHocResult.postHocLines.push_back({ models[idx], adj, 0.0f, {}, reject }); - - // auto color = reject ? Colors::MAGENTA() : Colors::GREEN(); - // auto status = reject ? Symbols::cross : Symbols::check_mark; - // auto textStatus = reject ? " rejected H0" : " accepted H0"; - // oss << " " << color << std::left << std::setw(maxModelName) << models[idx] << " "; - // oss << std::setprecision(6) << std::scientific << raw << " "; - // oss << std::setprecision(6) << std::scientific << adj << " " << status << textStatus << "\n"; - // } - // oss << Colors::CYAN() << " *************************************************************************************************************\n"; - // oss << Colors::RESET(); - // if (output) std::cout << oss.str(); - // if (tex) { - // BestResultsTex bestResultsTex; - // BestResultsMd bestResultsMd; - // bestResultsTex.holm_test(postHocResult, get_date() + " " + get_time()); - // bestResultsMd.holm_test(postHocResult, get_date() + " " + get_time()); - // } - // } bool Statistics::friedmanTest() { if (!fitted) { @@ -284,7 +262,7 @@ namespace platform { // Friedman test // Calculate the Friedman statistic oss << Colors::BLUE() << std::endl; - oss << "***************************************************************************************************************" << std::endl; + oss << std::string(hlen, '*') << std::endl; oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl; double degreesOfFreedom = nModels - 1.0; double sumSquared = 0; @@ -309,7 +287,7 @@ namespace platform { oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl; result = false; } - oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << std::endl; + oss << Colors::BLUE() << std::string(hlen, '*') << Colors::RESET() << std::endl; if (output) { std::cout << oss.str(); } diff --git a/src/best/Statistics.h b/src/best/Statistics.h index 285f34c..765ed1d 100644 --- a/src/best/Statistics.h +++ b/src/best/Statistics.h @@ -32,17 +32,22 @@ namespace platform { }; class Statistics { public: - Statistics(const std::vector& models, const std::vector& datasets, const json& data, double significance = 0.05, bool output = true); + Statistics(const std::string& score, const std::vector& models, const std::vector& datasets, const json& data, double significance = 0.05, bool output = true); bool friedmanTest(); - void postHocHolmTest(); - void postHocTestReport(const std::string& kind, const std::string& metric, bool friedmanResult, bool tex); + void postHocTest(); + void postHocTestReport(bool friedmanResult, bool tex); + int getControlIdx(); FriedmanResult& getFriedmanResult(); PostHocResult& getPostHocResult(); std::map>& getRanks(); private: void fit(); + void postHocHolmTest(); + void postHocWilcoxonTest(); void computeRanks(); void computeWTL(); + const std::string& score; + std::string postHocType; const std::vector& models; const std::vector& datasets; const json& data; @@ -52,11 +57,13 @@ namespace platform { int nModels = 0; int nDatasets = 0; int controlIdx = 0; + int greaterAverage = -1; // The model with the greater average score std::map wtl; std::map ranks; std::vector> postHocData; int maxModelName = 0; int maxDatasetName = 0; + int hlen; // length of the line FriedmanResult friedmanResult; PostHocResult postHocResult; std::map> ranksModels; diff --git a/src/best/WilcoxonTest.hpp b/src/best/WilcoxonTest.hpp new file mode 100644 index 0000000..34c2969 --- /dev/null +++ b/src/best/WilcoxonTest.hpp @@ -0,0 +1,250 @@ +#ifndef BEST_WILCOXON_TEST_HPP +#define BEST_WILCOXON_TEST_HPP +// WilcoxonTest.hpp +// Stand‑alone class for paired Wilcoxon signed‑rank post‑hoc analysis +// ------------------------------------------------------------------ +// * Constructor takes the *already‑loaded* nlohmann::json object plus the +// vectors of model and dataset names. +// * Internally selects a control model (highest average AUC) and builds all +// statistics (ranks, W/T/L counts, Wilcoxon p‑values). +// * Public API: +// int getControlIdx() const; +// PostHocResult getPostHocResult() const; +// +#include +#include +#include +#include +#include +#include +#include +#include "Statistics.h" + +namespace platform { + class WilcoxonTest { + public: + WilcoxonTest(const std::vector& models, + const std::vector& datasets, + const json& data, + double alpha = 0.05) + : models_(models), datasets_(datasets), data_(data), alpha_(alpha) + { + buildAUCTable(); // extracts all AUCs into a dense matrix + computeAverageAUCs(); // per‑model mean (→ control selection) + computeAverageRanks(); // Friedman‑style ranks per model + selectControlModel(); // sets control_idx_ + buildPostHocResult(); // fills postHocResult_ + } + + //---------------------------------------------------- public API ---- + int getControlIdx() const noexcept { return control_idx_; } + + const PostHocResult& getPostHocResult() const noexcept { return postHocResult_; } + + private: + //-------------------------------------------------- helper structs ---- + // When a value is missing we keep NaN so that ordinary arithmetic still + // works (NaN simply propagates and we can test with std::isnan). + using Matrix = std::vector>; // [model][dataset] + + //------------------------------------------------- implementation ---- + void buildAUCTable() + { + const std::size_t M = models_.size(); + const std::size_t D = datasets_.size(); + auc_.assign(M, std::vector(D, std::numeric_limits::quiet_NaN())); + + for (std::size_t i = 0; i < M; ++i) { + const auto& model = models_[i]; + for (std::size_t j = 0; j < D; ++j) { + const auto& ds = datasets_[j]; + try { + auc_[i][j] = data_.at(model).at(ds).at(0).get(); + } + catch (...) { + // leave as NaN when value missing + } + } + } + } + + void computeAverageAUCs() + { + const std::size_t M = models_.size(); + avg_auc_.resize(M, std::numeric_limits::quiet_NaN()); + + for (std::size_t i = 0; i < M; ++i) { + double sum = 0.0; + std::size_t cnt = 0; + for (double v : auc_[i]) { + if (!std::isnan(v)) { sum += v; ++cnt; } + } + avg_auc_[i] = cnt ? sum / cnt : std::numeric_limits::quiet_NaN(); + } + } + + // Average rank across datasets (1 = best). + void computeAverageRanks() + { + const std::size_t M = models_.size(); + const std::size_t D = datasets_.size(); + rank_sum_.assign(M, 0.0); + rank_cnt_.assign(M, 0); + + const double EPS = 1e-10; + + for (std::size_t j = 0; j < D; ++j) { + // Collect present values for this dataset + std::vector> vals; // (auc, model_idx) + vals.reserve(M); + for (std::size_t i = 0; i < M; ++i) { + if (!std::isnan(auc_[i][j])) + vals.emplace_back(auc_[i][j], i); + } + if (vals.empty()) continue; // no info for this dataset + + // Sort descending (higher AUC better) + std::sort(vals.begin(), vals.end(), [](auto a, auto b) { + return a.first > b.first; + }); + + // Assign ranks with average for ties + std::size_t k = 0; + while (k < vals.size()) { + std::size_t l = k + 1; + while (l < vals.size() && std::fabs(vals[l].first - vals[k].first) < EPS) ++l; + const double avg_rank = (k + 1 + l) * 0.5; // average of ranks (1‑based) + for (std::size_t m = k; m < l; ++m) { + const auto idx = vals[m].second; + rank_sum_[idx] += avg_rank; + ++rank_cnt_[idx]; + } + k = l; + } + } + + // Final average + avg_rank_.resize(M, std::numeric_limits::quiet_NaN()); + for (std::size_t i = 0; i < M; ++i) { + avg_rank_[i] = rank_cnt_[i] ? rank_sum_[i] / rank_cnt_[i] + : std::numeric_limits::quiet_NaN(); + } + } + + void selectControlModel() + { + // pick model with highest average AUC (ties → first) + control_idx_ = 0; + for (std::size_t i = 1; i < avg_auc_.size(); ++i) { + if (avg_auc_[i] > avg_auc_[control_idx_]) control_idx_ = static_cast(i); + } + } + + void buildPostHocResult() + { + const std::size_t M = models_.size(); + const std::size_t D = datasets_.size(); + const std::string& control_name = models_[control_idx_]; + + postHocResult_.model = control_name; + + const double practical_threshold = 0.0005; // same heuristic as original code + + for (std::size_t i = 0; i < M; ++i) { + if (static_cast(i) == control_idx_) continue; + + PostHocLine line; + line.model = models_[i]; + line.rank = avg_rank_[i]; + + WTL wtl; + std::vector differences; + differences.reserve(D); + + for (std::size_t j = 0; j < D; ++j) { + double auc_control = auc_[control_idx_][j]; + double auc_other = auc_[i][j]; + if (std::isnan(auc_control) || std::isnan(auc_other)) continue; + + double diff = auc_control - auc_other; // control − comparison + if (std::fabs(diff) <= practical_threshold) { + ++wtl.tie; + } else if (diff < 0) { + ++wtl.win; // comparison wins + } else { + ++wtl.loss; // control wins + } + differences.push_back(diff); + } + + line.wtl = wtl; + line.pvalue = differences.empty() ? 1.0L : static_cast(wilcoxonSignedRankTest(differences)); + line.reject = (line.pvalue < alpha_); + + postHocResult_.postHocLines.push_back(std::move(line)); + } + } + + // ------------------------------------------------ Wilcoxon (private) -- + static double wilcoxonSignedRankTest(const std::vector& diffs) + { + if (diffs.empty()) return 1.0; + + // Build |diff| + sign vector (exclude zeros) + struct Node { double absval; int sign; }; + std::vector v; + v.reserve(diffs.size()); + for (double d : diffs) { + if (d != 0.0) v.push_back({ std::fabs(d), d > 0 ? 1 : -1 }); + } + if (v.empty()) return 1.0; + + // Sort by absolute value + std::sort(v.begin(), v.end(), [](const Node& a, const Node& b) { return a.absval < b.absval; }); + + const double EPS = 1e-10; + const std::size_t n = v.size(); + std::vector ranks(n, 0.0); + + std::size_t i = 0; + while (i < n) { + std::size_t j = i + 1; + while (j < n && std::fabs(v[j].absval - v[i].absval) < EPS) ++j; + double avg_rank = (i + 1 + j) * 0.5; // 1‑based ranks + for (std::size_t k = i; k < j; ++k) ranks[k] = avg_rank; + i = j; + } + + double w_plus = 0.0, w_minus = 0.0; + for (std::size_t k = 0; k < n; ++k) { + if (v[k].sign > 0) w_plus += ranks[k]; + else w_minus += ranks[k]; + } + double w = std::min(w_plus, w_minus); + double mean_w = n * (n + 1) / 4.0; + double sd_w = std::sqrt(n * (n + 1) * (2 * n + 1) / 24.0); + if (sd_w == 0.0) return 1.0; // degenerate (all diffs identical) + + double z = (w - mean_w) / sd_w; + double p_two = std::erfc(std::fabs(z) / std::sqrt(2.0)); // 2‑sided tail + return p_two; + } + + //-------------------------------------------------------- data ---- + std::vector models_; + std::vector datasets_; + json data_; + double alpha_; + + Matrix auc_; // [model][dataset] + std::vector avg_auc_; // mean AUC per model + std::vector avg_rank_; // mean rank per model + std::vector rank_sum_; // helper for ranks + std::vector rank_cnt_; // datasets counted per model + + int control_idx_ = -1; + PostHocResult postHocResult_; + }; + +} // namespace stats +#endif // BEST_WILCOXON_TEST_HPP \ No newline at end of file diff --git a/src/commands/b_grid.cpp b/src/commands/b_grid.cpp index 7e246a5..b1c6244 100644 --- a/src/commands/b_grid.cpp +++ b/src/commands/b_grid.cpp @@ -231,8 +231,8 @@ void experiment(argparse::ArgumentParser& program) { struct platform::ConfigGrid config; auto arguments = platform::ArgumentsExperiment(program, platform::experiment_t::GRID); - auto path_results = arguments.getPathResults(); arguments.parse(); + auto path_results = arguments.getPathResults(); auto grid_experiment = platform::GridExperiment(arguments, config); platform::Timer timer; timer.start(); diff --git a/src/main/ArgumentsExperiment.cpp b/src/main/ArgumentsExperiment.cpp index 8d778ba..58bf990 100644 --- a/src/main/ArgumentsExperiment.cpp +++ b/src/main/ArgumentsExperiment.cpp @@ -215,10 +215,35 @@ namespace platform { test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json); } } + std::string getGppVersion() + { + std::string result; + std::array buffer; + + // Run g++ --version and capture the output + std::unique_ptr pipe(popen("g++ --version", "r"), pclose); + + if (!pipe) { + return "Error executing g++ --version command"; + } + + // Read the first line of output (which contains the version info) + if (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { + result = buffer.data(); + // Remove trailing newline if present + if (!result.empty() && result[result.length() - 1] == '\n') { + result.erase(result.length() - 1); + } + } else { + return "No output from g++ --version command"; + } + + return result; + } Experiment& ArgumentsExperiment::initializedExperiment() { auto env = platform::DotEnv(); - experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1"); + experiment.setTitle(title).setLanguage("c++").setLanguageVersion(getGppVersion()); experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score); diff --git a/src/main/Experiment.cpp b/src/main/Experiment.cpp index 438735d..e240e7c 100644 --- a/src/main/Experiment.cpp +++ b/src/main/Experiment.cpp @@ -245,8 +245,6 @@ namespace platform { // Train model // clf->fit(X_train, y_train, features, className, states, smooth_type); - if (!quiet) - showProgress(nfold + 1, getColor(clf->getStatus()), "b"); auto clf_notes = clf->getNotes(); std::transform(clf_notes.begin(), clf_notes.end(), std::back_inserter(notes), [nfold](const std::string& note) { return "Fold " + std::to_string(nfold) + ": " + note; }); @@ -259,6 +257,8 @@ namespace platform { // Score train // if (!no_train_score) { + if (!quiet) + showProgress(nfold + 1, getColor(clf->getStatus()), "b"); auto y_proba_train = clf->predict_proba(X_train); Scores scores(y_train, y_proba_train, num_classes, labels); score_train_value = score == score_t::ACCURACY ? scores.accuracy() : scores.auc();