From 473d194ddeb4bbc3231b12841b22df0e0033a868 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sat, 24 May 2025 12:59:28 +0200 Subject: [PATCH] Complete integration of Wilcoxon test --- src/best/BestResultsExcel.cpp | 22 ++--- src/best/BestResultsMd.cpp | 8 +- src/best/BestResultsMd.h | 2 +- src/best/BestResultsTex.cpp | 8 +- src/best/BestResultsTex.h | 2 +- src/best/Statistics.cpp | 139 +++++++++++++++++--------------- src/best/Statistics.h | 24 +++--- src/best/WilcoxonTest.hpp | 29 +++---- src/reports/DatasetsConsole.cpp | 11 ++- 9 files changed, 128 insertions(+), 117 deletions(-) diff --git a/src/best/BestResultsExcel.cpp b/src/best/BestResultsExcel.cpp index 36cfcb3..71e6cc3 100644 --- a/src/best/BestResultsExcel.cpp +++ b/src/best/BestResultsExcel.cpp @@ -164,13 +164,15 @@ namespace platform { addConditionalFormat("max"); footer(false); if (friedman) { - // Create Sheet with ranks - worksheet = workbook_add_worksheet(workbook, "Ranks"); - formatColumns(); - header(true); - body(true); - addConditionalFormat("min"); - footer(true); + if (score == "accuracy") { + // Create Sheet with ranks + worksheet = workbook_add_worksheet(workbook, "Ranks"); + formatColumns(); + header(true); + body(true); + addConditionalFormat("min"); + footer(true); + } // Create Sheet with Friedman Test doFriedman(); } @@ -246,7 +248,7 @@ namespace platform { stats.postHocTest(); stats.postHocTestReport(result, false); // No tex output auto friedmanResult = stats.getFriedmanResult(); - auto postHocResult = stats.getPostHocResult(); + auto postHocResults = stats.getPostHocResults(); worksheet_merge_range(worksheet, row, 0, row, 7, "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]); row += 2; writeString(row, 1, "Friedman Q", "bodyHeader"); @@ -265,7 +267,7 @@ namespace platform { row += 2; worksheet_merge_range(worksheet, row, 0, row, 7, "Null hypothesis: H0 'There is no significant differences between the control model and the other models.'", styles["headerSmall"]); row += 2; - std::string controlModel = "Control Model: " + postHocResult.model; + std::string controlModel = "Control Model: " + postHocResults.at(0).model; worksheet_merge_range(worksheet, row, 1, row, 7, controlModel.c_str(), styles["bodyHeader_odd"]); row++; writeString(row, 1, "Model", "bodyHeader"); @@ -277,7 +279,7 @@ namespace platform { writeString(row, 7, "Reject H0", "bodyHeader"); row++; bool first = true; - for (const auto& item : postHocResult.postHocLines) { + for (const auto& item : postHocResults) { writeString(row, 1, item.model, "text"); if (first) { // Control model info diff --git a/src/best/BestResultsMd.cpp b/src/best/BestResultsMd.cpp index 3c70901..195d3f6 100644 --- a/src/best/BestResultsMd.cpp +++ b/src/best/BestResultsMd.cpp @@ -75,7 +75,7 @@ namespace platform { handler.close(); } - void BestResultsMd::postHoc_test(struct PostHocResult& postHocResult, const std::string& kind, const std::string& date) + void BestResultsMd::postHoc_test(std::vector& postHocResults, const std::string& kind, const std::string& date) { auto file_name = Paths::tex() + Paths::md_post_hoc(); openMdFile(file_name); @@ -87,10 +87,12 @@ namespace platform { handler << "Post-hoc " << kind << " test: H0: There is no significant differences between the control model and the other models." << std::endl << std::endl; handler << "| classifier | pvalue | rank | win | tie | loss | H0 |" << std::endl; handler << "| :-- | --: | --: | --:| --: | --: | :--: |" << std::endl; - for (auto const& line : postHocResult.postHocLines) { + bool first = true; + for (auto const& line : postHocResults) { auto textStatus = !line.reject ? "**" : " "; - if (line.model == postHocResult.model) { + if (first) { handler << "| " << line.model << " | - | " << std::fixed << std::setprecision(2) << line.rank << " | - | - | - |" << std::endl; + first = false; } else { handler << "| " << line.model << " | " << textStatus << std::scientific << std::setprecision(4) << line.pvalue << textStatus << " |"; handler << std::fixed << std::setprecision(2) << line.rank << " | " << line.wtl.win << " | " << line.wtl.tie << " | " << line.wtl.loss << " |"; diff --git a/src/best/BestResultsMd.h b/src/best/BestResultsMd.h index 894ae80..8ff2612 100644 --- a/src/best/BestResultsMd.h +++ b/src/best/BestResultsMd.h @@ -14,7 +14,7 @@ namespace platform { void results_header(const std::vector& models, const std::string& date); void results_body(const std::vector& datasets, json& table); void results_footer(const std::map>& totals, const std::string& best_model); - void postHoc_test(struct PostHocResult& postHocResult, const std::string& kind, const std::string& date); + void postHoc_test(std::vector& postHocResults, const std::string& kind, const std::string& date); private: void openMdFile(const std::string& name); std::ofstream handler; diff --git a/src/best/BestResultsTex.cpp b/src/best/BestResultsTex.cpp index cf4827f..afe19ad 100644 --- a/src/best/BestResultsTex.cpp +++ b/src/best/BestResultsTex.cpp @@ -89,7 +89,7 @@ namespace platform { handler << "\\end{table}" << std::endl; handler.close(); } - void BestResultsTex::postHoc_test(struct PostHocResult& postHocResult, const std::string& kind, const std::string& date) + void BestResultsTex::postHoc_test(std::vector& postHocResults, const std::string& kind, const std::string& date) { auto file_name = Paths::tex() + Paths::tex_post_hoc(); openTexFile(file_name); @@ -105,10 +105,12 @@ namespace platform { handler << "\\hline" << std::endl; handler << "classifier & pvalue & rank & win & tie & loss\\\\" << std::endl; handler << "\\hline" << std::endl; - for (auto const& line : postHocResult.postHocLines) { + bool first = true; + for (auto const& line : postHocResults) { auto textStatus = !line.reject ? "\\bf " : " "; - if (line.model == postHocResult.model) { + if (first) { handler << line.model << " & - & " << std::fixed << std::setprecision(2) << line.rank << " & - & - & - \\\\" << std::endl; + first = false; } else { handler << line.model << " & " << textStatus << std::scientific << std::setprecision(4) << line.pvalue << " & "; handler << std::fixed << std::setprecision(2) << line.rank << " & " << line.wtl.win << " & " << line.wtl.tie << " & " << line.wtl.loss << "\\\\" << std::endl; diff --git a/src/best/BestResultsTex.h b/src/best/BestResultsTex.h index e0a82e0..7392d7c 100644 --- a/src/best/BestResultsTex.h +++ b/src/best/BestResultsTex.h @@ -14,7 +14,7 @@ namespace platform { void results_header(const std::vector& models, const std::string& date, bool index); void results_body(const std::vector& datasets, json& table, bool index); void results_footer(const std::map>& totals, const std::string& best_model); - void postHoc_test(struct PostHocResult& postHocResult, const std::string& kind, const std::string& date); + void postHoc_test(std::vector& postHocResults, const std::string& kind, const std::string& date); private: std::string score; bool dataset_name; diff --git a/src/best/Statistics.cpp b/src/best/Statistics.cpp index 0c27cef..04418ce 100644 --- a/src/best/Statistics.cpp +++ b/src/best/Statistics.cpp @@ -111,6 +111,7 @@ namespace platform { } void Statistics::computeWTL() { + const double practical_threshold = 0.0005; // Compute the WTL matrix (Win Tie Loss) for (int i = 0; i < nModels; ++i) { wtl[i] = { 0, 0, 0 }; @@ -124,10 +125,11 @@ namespace platform { continue; } double value = data[models[i]].at(item.key()).at(0).get(); - if (value < controlValue) { - wtl[i].win++; - } else if (value == controlValue) { + double diff = controlValue - value; // control − comparison + if (std::fabs(diff) <= practical_threshold) { wtl[i].tie++; + } else if (diff < 0) { + wtl[i].win++; } else { wtl[i].loss++; } @@ -143,11 +145,11 @@ namespace platform { } void Statistics::postHocTest() { - // if (score == "accuracy") { - postHocHolmTest(); - // } else { - // postHocWilcoxonTest(); - // } + if (score == "accuracy") { + postHocHolmTest(); + } else { + postHocWilcoxonTest(); + } } void Statistics::postHocWilcoxonTest() { @@ -157,7 +159,42 @@ namespace platform { // Reference: Wilcoxon, F. (1945). “Individual Comparisons by Ranking Methods”. Biometrics Bulletin, 1(6), 80-83. auto wilcoxon = WilcoxonTest(models, datasets, data, significance); controlIdx = wilcoxon.getControlIdx(); - postHocResult = wilcoxon.getPostHocResult(); + postHocResults = wilcoxon.getPostHocResults(); + std::cout << std::string(80, '=') << std::endl; + setResultsOrder(); + Holm_Bonferroni(); + restoreResultsOrder(); + } + void Statistics::Holm_Bonferroni() + { + // The algorithm need the p-values sorted from the lowest to the highest + // Sort the models by p-value + std::sort(postHocResults.begin(), postHocResults.end(), [](const PostHocLine& a, const PostHocLine& b) { + return a.pvalue < b.pvalue; + }); + // Holm adjustment + for (int i = 0; i < postHocResults.size(); ++i) { + auto item = postHocResults.at(i); + double before = i == 0 ? 0.0 : postHocResults.at(i - 1).pvalue; + double p_value = std::min((long double)1.0, item.pvalue * (nModels - i)); + p_value = std::max(before, p_value); + postHocResults[i].pvalue = p_value; + } + } + void Statistics::setResultsOrder() + { + int c = 0; + for (auto& item : postHocResults) { + item.idx = c++; + } + + } + void Statistics::restoreResultsOrder() + { + // Restore the order of the results + std::sort(postHocResults.begin(), postHocResults.end(), [](const PostHocLine& a, const PostHocLine& b) { + return a.idx < b.idx; + }); } void Statistics::postHocHolmTest() { @@ -171,38 +208,32 @@ namespace platform { boost::math::normal dist(0.0, 1.0); double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets)); for (int i = 0; i < nModels; i++) { + PostHocLine line; + line.model = models[i]; + line.rank = ranks.at(models[i]); + line.wtl = wtl.at(i); + line.reject = false; if (i == controlIdx) { - stats[i] = 0.0; + postHocResults.push_back(line); continue; } double z = std::abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff; - double p_value = (long double)2 * (1 - cdf(dist, z)); - stats[i] = p_value; + line.pvalue = (long double)2 * (1 - cdf(dist, z)); + line.reject = (line.pvalue < significance); + postHocResults.push_back(line); } - // Sort the models by p-value - for (const auto& stat : stats) { - postHocData.push_back({ stat.first, stat.second }); - } - std::sort(postHocData.begin(), postHocData.end(), [](const std::pair& a, const std::pair& b) { - return a.second < b.second; + std::sort(postHocResults.begin(), postHocResults.end(), [](const PostHocLine& a, const PostHocLine& b) { + return a.rank < b.rank; }); - - // Holm adjustment - for (int i = 0; i < postHocData.size(); ++i) { - auto item = postHocData.at(i); - double before = i == 0 ? 0.0 : postHocData.at(i - 1).second; - double p_value = std::min((double)1.0, item.second * (nModels - i)); - p_value = std::max(before, p_value); - postHocData[i] = { item.first, p_value }; - } - postHocResult.model = models.at(controlIdx); + setResultsOrder(); + Holm_Bonferroni(); + restoreResultsOrder(); } void Statistics::postHocTestReport(bool friedmanResult, bool tex) { std::stringstream oss; - postHocResult.model = models.at(controlIdx); auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW(); oss << color; oss << " " << std::string(hlen + 25, '*') << std::endl; @@ -210,35 +241,21 @@ namespace platform { oss << " Control model: " << models.at(controlIdx) << std::endl; oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl; oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl; - // sort ranks from lowest to highest - std::vector> ranksOrder; - for (const auto& rank : ranks) { - ranksOrder.push_back({ rank.first, rank.second }); - } - std::sort(ranksOrder.begin(), ranksOrder.end(), [](const std::pair& a, const std::pair& b) { - return a.second < b.second; - }); - // Show the control model info. - oss << " " << Colors::BLUE() << std::left << std::setw(maxModelName) << ranksOrder.at(0).first << " "; - oss << std::setw(12) << " " << std::setprecision(7) << std::fixed << " " << ranksOrder.at(0).second << std::endl; - for (const auto& item : ranksOrder) { - auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first)); - double pvalue = 0.0; - for (const auto& stat : postHocData) { - if (stat.first == idx) { - pvalue = stat.second; - } - } - postHocResult.postHocLines.push_back({ item.first, pvalue, item.second, wtl.at(idx), pvalue < significance }); - if (item.first == models.at(controlIdx)) { + bool first = true; + for (const auto& item : postHocResults) { + if (first) { + oss << " " << Colors::BLUE() << std::left << std::setw(maxModelName) << item.model << " "; + oss << std::setw(12) << " " << std::setprecision(7) << std::fixed << " " << item.rank << std::endl; + first = false; continue; } + auto pvalue = item.pvalue; auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA(); auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross; auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0"; - oss << " " << colorStatus << std::left << std::setw(maxModelName) << item.first << " "; - oss << std::setprecision(6) << std::scientific << pvalue << std::setprecision(7) << std::fixed << " " << item.second; - oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss; + oss << " " << colorStatus << std::left << std::setw(maxModelName) << item.model << " "; + oss << std::setprecision(6) << std::scientific << pvalue << std::setprecision(7) << std::fixed << " " << item.rank; + oss << " " << std::right << std::setw(3) << item.wtl.win << " " << std::setw(3) << item.wtl.tie << " " << std::setw(4) << item.wtl.loss; oss << " " << status << textStatus << std::endl; } oss << color << " " << std::string(hlen + 25, '*') << std::endl; @@ -249,8 +266,8 @@ namespace platform { if (tex) { BestResultsTex bestResultsTex(score); BestResultsMd bestResultsMd; - bestResultsTex.postHoc_test(postHocResult, postHocType, get_date() + " " + get_time()); - bestResultsMd.postHoc_test(postHocResult, postHocType, get_date() + " " + get_time()); + bestResultsTex.postHoc_test(postHocResults, postHocType, get_date() + " " + get_time()); + bestResultsMd.postHoc_test(postHocResults, postHocType, get_date() + " " + get_time()); } } bool Statistics::friedmanTest() @@ -294,16 +311,4 @@ namespace platform { friedmanResult = { friedmanQ, criticalValue, p_value, result }; return result; } - FriedmanResult& Statistics::getFriedmanResult() - { - return friedmanResult; - } - PostHocResult& Statistics::getPostHocResult() - { - return postHocResult; - } - std::map>& Statistics::getRanks() - { - return ranksModels; - } } // namespace platform diff --git a/src/best/Statistics.h b/src/best/Statistics.h index 765ed1d..a6b5c4a 100644 --- a/src/best/Statistics.h +++ b/src/best/Statistics.h @@ -9,9 +9,9 @@ namespace platform { using json = nlohmann::ordered_json; struct WTL { - int win; - int tie; - int loss; + uint win; + uint tie; + uint loss; }; struct FriedmanResult { double statistic; @@ -20,16 +20,14 @@ namespace platform { bool reject; }; struct PostHocLine { + uint idx; //index of the main order std::string model; long double pvalue; double rank; WTL wtl; bool reject; }; - struct PostHocResult { - std::string model; - std::vector postHocLines; - }; + class Statistics { public: Statistics(const std::string& score, const std::vector& models, const std::vector& datasets, const json& data, double significance = 0.05, bool output = true); @@ -37,15 +35,18 @@ namespace platform { void postHocTest(); void postHocTestReport(bool friedmanResult, bool tex); int getControlIdx(); - FriedmanResult& getFriedmanResult(); - PostHocResult& getPostHocResult(); - std::map>& getRanks(); + FriedmanResult& getFriedmanResult() { return friedmanResult; } + std::vector& getPostHocResults() { return postHocResults; } + std::map>& getRanks() { return ranksModels; } // ranks of the models per dataset private: void fit(); void postHocHolmTest(); void postHocWilcoxonTest(); void computeRanks(); void computeWTL(); + void Holm_Bonferroni(); + void setResultsOrder(); // Set the order of the results based on the statistic analysis needed + void restoreResultsOrder(); // Restore the order of the results after the Holm-Bonferroni adjustment const std::string& score; std::string postHocType; const std::vector& models; @@ -60,12 +61,11 @@ namespace platform { int greaterAverage = -1; // The model with the greater average score std::map wtl; std::map ranks; - std::vector> postHocData; int maxModelName = 0; int maxDatasetName = 0; int hlen; // length of the line FriedmanResult friedmanResult; - PostHocResult postHocResult; + std::vector postHocResults; std::map> ranksModels; }; } diff --git a/src/best/WilcoxonTest.hpp b/src/best/WilcoxonTest.hpp index 34c2969..dbf1c0c 100644 --- a/src/best/WilcoxonTest.hpp +++ b/src/best/WilcoxonTest.hpp @@ -23,11 +23,8 @@ namespace platform { class WilcoxonTest { public: - WilcoxonTest(const std::vector& models, - const std::vector& datasets, - const json& data, - double alpha = 0.05) - : models_(models), datasets_(datasets), data_(data), alpha_(alpha) + WilcoxonTest(const std::vector& models, const std::vector& datasets, + const json& data, double alpha = 0.05) : models_(models), datasets_(datasets), data_(data), alpha_(alpha) { buildAUCTable(); // extracts all AUCs into a dense matrix computeAverageAUCs(); // per‑model mean (→ control selection) @@ -36,10 +33,8 @@ namespace platform { buildPostHocResult(); // fills postHocResult_ } - //---------------------------------------------------- public API ---- int getControlIdx() const noexcept { return control_idx_; } - - const PostHocResult& getPostHocResult() const noexcept { return postHocResult_; } + const std::vector& getPostHocResults() const noexcept { return postHocResults_; } private: //-------------------------------------------------- helper structs ---- @@ -146,18 +141,14 @@ namespace platform { const std::size_t D = datasets_.size(); const std::string& control_name = models_[control_idx_]; - postHocResult_.model = control_name; - const double practical_threshold = 0.0005; // same heuristic as original code for (std::size_t i = 0; i < M; ++i) { - if (static_cast(i) == control_idx_) continue; - PostHocLine line; line.model = models_[i]; - line.rank = avg_rank_[i]; + line.rank = avg_auc_[i]; - WTL wtl; + WTL wtl = { 0, 0, 0 }; // win, tie, loss std::vector differences; differences.reserve(D); @@ -181,8 +172,12 @@ namespace platform { line.pvalue = differences.empty() ? 1.0L : static_cast(wilcoxonSignedRankTest(differences)); line.reject = (line.pvalue < alpha_); - postHocResult_.postHocLines.push_back(std::move(line)); + postHocResults_.push_back(std::move(line)); } + // Sort results by rank (descending) + std::sort(postHocResults_.begin(), postHocResults_.end(), [](const PostHocLine& a, const PostHocLine& b) { + return a.rank > b.rank; + }); } // ------------------------------------------------ Wilcoxon (private) -- @@ -243,8 +238,8 @@ namespace platform { std::vector rank_cnt_; // datasets counted per model int control_idx_ = -1; - PostHocResult postHocResult_; + std::vector postHocResults_; }; -} // namespace stats +} // namespace platform #endif // BEST_WILCOXON_TEST_HPP \ No newline at end of file diff --git a/src/reports/DatasetsConsole.cpp b/src/reports/DatasetsConsole.cpp index d402a25..82dab4f 100644 --- a/src/reports/DatasetsConsole.cpp +++ b/src/reports/DatasetsConsole.cpp @@ -26,6 +26,7 @@ namespace platform { auto datasets = platform::Datasets(false, platform::Paths::datasets()); std::stringstream sheader; auto datasets_names = datasets.getNames(); + std::cout << Colors::GREEN() << "Datasets available in the platform: " << datasets_names.size() << std::endl; int maxName = std::max(size_t(7), (*max_element(datasets_names.begin(), datasets_names.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size()); std::vector header_labels = { " #", "Dataset", "Sampl.", "Feat.", "#Num.", "Cls", "Balance" }; std::vector header_lengths = { 3, maxName, 6, 6, 6, 3, DatasetsConsole::BALANCE_LENGTH }; @@ -61,9 +62,13 @@ namespace platform { line << setw(header_lengths[5]) << right << nClasses << " "; std::string sep = ""; oss.str(""); - for (auto number : dataset.getClassesCounts()) { - oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; - sep = " / "; + if (nSamples == 0) { + oss << "No samples"; + } else { + for (auto number : dataset.getClassesCounts()) { + oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; + sep = " / "; + } } split_lines(maxName, line.str(), oss.str()); // Store data for Excel report