Add Wilcoxon Test

This commit is contained in:
2025-05-21 11:51:04 +02:00
parent 70d8022926
commit a56ec98ef9
14 changed files with 369 additions and 190 deletions

View File

@@ -13,7 +13,7 @@ include_directories(
# b_best # b_best
add_executable( add_executable(
b_best commands/b_best.cpp best/Statistics.cpp b_best commands/b_best.cpp best/Statistics.cpp
best/BestResultsExcel.cpp best/BestResultsTex.cpp best/BestResultsMd.cpp best/BestResults.cpp best/DeLong.cpp best/BestResultsExcel.cpp best/BestResultsTex.cpp best/BestResultsMd.cpp best/BestResults.cpp
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
main/Models.cpp main/Scores.cpp main/Models.cpp main/Scores.cpp
reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp

View File

@@ -321,7 +321,7 @@ namespace platform {
// Build the table of results // Build the table of results
json table = buildTableResults(models); json table = buildTableResults(models);
std::vector<std::string> datasets = getDatasets(table.begin().value()); std::vector<std::string> datasets = getDatasets(table.begin().value());
BestResultsExcel excel_report(score, datasets); BestResultsExcel excel_report(path, score, datasets);
excel_report.reportSingle(model, path + Paths::bestResultsFile(score, model)); excel_report.reportSingle(model, path + Paths::bestResultsFile(score, model));
messageOutputFile("Excel", excel_report.getFileName()); messageOutputFile("Excel", excel_report.getFileName());
} }
@@ -337,10 +337,10 @@ namespace platform {
// Compute the Friedman test // Compute the Friedman test
std::map<std::string, std::map<std::string, float>> ranksModels; std::map<std::string, std::map<std::string, float>> ranksModels;
if (friedman) { if (friedman) {
Statistics stats(models, datasets, table, significance); Statistics stats(score, models, datasets, table, significance);
auto result = stats.friedmanTest(); auto result = stats.friedmanTest();
stats.postHocHolmTest(); stats.postHocTest();
stats.postHocTestReport("Holm", score, result, tex); stats.postHocTestReport(result, tex);
ranksModels = stats.getRanks(); ranksModels = stats.getRanks();
} }
if (tex) { if (tex) {
@@ -352,24 +352,11 @@ namespace platform {
} }
} }
if (excel) { if (excel) {
BestResultsExcel excel(score, datasets); BestResultsExcel excel(path, score, datasets);
excel.reportAll(models, table, ranksModels, friedman, significance); excel.reportAll(models, table, ranksModels, friedman, significance);
if (friedman) { if (friedman) {
int idx = -1; Statistics stats(score, models, datasets, table, significance);
double min = 2000; int idx = stats.getControlIdx();
// Find out the control model
auto totals = std::vector<double>(models.size(), 0.0);
for (const auto& dataset_ : datasets) {
for (int i = 0; i < models.size(); ++i) {
totals[i] += ranksModels[dataset_][models[i]];
}
}
for (int i = 0; i < models.size(); ++i) {
if (totals[i] < min) {
min = totals[i];
idx = i;
}
}
model = models.at(idx); model = models.at(idx);
excel.reportSingle(model, path + Paths::bestResultsFile(score, model)); excel.reportSingle(model, path + Paths::bestResultsFile(score, model));
} }
@@ -378,7 +365,7 @@ namespace platform {
} }
void BestResults::messageOutputFile(const std::string& title, const std::string& fileName) void BestResults::messageOutputFile(const std::string& title, const std::string& fileName)
{ {
std::cout << Colors::YELLOW() << "** " << std::setw(5) << std::left << title std::cout << Colors::YELLOW() << "** " << std::setw(8) << std::left << title
<< " file generated: " << fileName << Colors::RESET() << std::endl; << " file generated: " << fileName << Colors::RESET() << std::endl;
} }
} }

View File

@@ -30,7 +30,7 @@ namespace platform {
} }
return columnName; return columnName;
} }
BestResultsExcel::BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets) : score(score), datasets(datasets) BestResultsExcel::BestResultsExcel(const std::string& path, const std::string& score, const std::vector<std::string>& datasets) : path(path), score(score), datasets(datasets)
{ {
file_name = Paths::bestResultsExcel(score); file_name = Paths::bestResultsExcel(score);
workbook = workbook_new(getFileName().c_str()); workbook = workbook_new(getFileName().c_str());
@@ -92,7 +92,7 @@ namespace platform {
catch (const std::out_of_range& oor) { catch (const std::out_of_range& oor) {
auto tabName = "table_" + std::to_string(i); auto tabName = "table_" + std::to_string(i);
auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str()); auto worksheetNew = workbook_add_worksheet(workbook, tabName.c_str());
json data = loadResultData(Paths::results() + fileName); json data = loadResultData(path + fileName);
auto report = ReportExcel(data, false, workbook, worksheetNew); auto report = ReportExcel(data, false, workbook, worksheetNew);
report.show(); report.show();
hyperlink = "#table_" + std::to_string(i); hyperlink = "#table_" + std::to_string(i);
@@ -241,10 +241,10 @@ namespace platform {
} }
worksheet_merge_range(worksheet, 0, 0, 0, 7, "Friedman Test", styles["headerFirst"]); worksheet_merge_range(worksheet, 0, 0, 0, 7, "Friedman Test", styles["headerFirst"]);
row = 2; row = 2;
Statistics stats(models, datasets, table, significance, false); Statistics stats(score, models, datasets, table, significance, false); // No output
auto result = stats.friedmanTest(); auto result = stats.friedmanTest();
stats.postHocHolmTest(); stats.postHocTest();
// stats.postHocTestReport("Holm", result, false); stats.postHocTestReport(result, false); // No tex output
auto friedmanResult = stats.getFriedmanResult(); auto friedmanResult = stats.getFriedmanResult();
auto postHocResult = stats.getPostHocResult(); auto postHocResult = stats.getPostHocResult();
worksheet_merge_range(worksheet, row, 0, row, 7, "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]); worksheet_merge_range(worksheet, row, 0, row, 7, "Null hypothesis: H0 'There is no significant differences between all the classifiers.'", styles["headerSmall"]);

View File

@@ -10,7 +10,7 @@ namespace platform {
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
class BestResultsExcel : public ExcelFile { class BestResultsExcel : public ExcelFile {
public: public:
BestResultsExcel(const std::string& score, const std::vector<std::string>& datasets); BestResultsExcel(const std::string& path, const std::string& score, const std::vector<std::string>& datasets);
~BestResultsExcel(); ~BestResultsExcel();
void reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance); void reportAll(const std::vector<std::string>& models, const json& table, const std::map<std::string, std::map<std::string, float>>& ranks, bool friedman, double significance);
void reportSingle(const std::string& model, const std::string& fileName); void reportSingle(const std::string& model, const std::string& fileName);
@@ -22,6 +22,7 @@ namespace platform {
void formatColumns(); void formatColumns();
void doFriedman(); void doFriedman();
void addConditionalFormat(std::string formula); void addConditionalFormat(std::string formula);
std::string path;
std::string score; std::string score;
std::vector<std::string> models; std::vector<std::string> models;
std::vector<std::string> datasets; std::vector<std::string> datasets;

View File

@@ -27,10 +27,10 @@ namespace platform {
handler << "\\tiny " << std::endl; handler << "\\tiny " << std::endl;
handler << "\\renewcommand{\\arraystretch }{1.2} " << std::endl; handler << "\\renewcommand{\\arraystretch }{1.2} " << std::endl;
handler << "\\renewcommand{\\tabcolsep }{0.07cm} " << std::endl; handler << "\\renewcommand{\\tabcolsep }{0.07cm} " << std::endl;
auto umetric = metric; auto umetric = score;
umetric[0] = toupper(umetric[0]); umetric[0] = toupper(umetric[0]);
handler << "\\caption{" << umetric << " results(mean $\\pm$ std) for all the algorithms and datasets} " << std::endl; handler << "\\caption{" << umetric << " results(mean $\\pm$ std) for all the algorithms and datasets} " << std::endl;
handler << "\\label{tab:results_" << metric << "}" << std::endl; handler << "\\label{tab:results_" << score << "}" << std::endl;
std::string header_dataset_name = index ? "r" : "l"; std::string header_dataset_name = index ? "r" : "l";
handler << "\\begin{tabular} {{" << header_dataset_name << std::string(models.size(), 'c').c_str() << "}}" << std::endl; handler << "\\begin{tabular} {{" << header_dataset_name << std::string(models.size(), 'c').c_str() << "}}" << std::endl;
handler << "\\hline " << std::endl; handler << "\\hline " << std::endl;
@@ -100,7 +100,7 @@ namespace platform {
handler << "%%" << std::endl; handler << "%%" << std::endl;
handler << "\\begin{table}[htbp]" << std::endl; handler << "\\begin{table}[htbp]" << std::endl;
handler << "\\centering" << std::endl; handler << "\\centering" << std::endl;
handler << "\\caption{Results of the post-hoc " << kind << " test for the mean " << metric << " of the algorithms.}\\label{ tab:tests }" << std::endl; handler << "\\caption{Results of the post-hoc " << kind << " test for the mean " << score << " of the algorithms.}\\label{ tab:tests }" << std::endl;
handler << "\\begin{tabular}{lrrrrr}" << std::endl; handler << "\\begin{tabular}{lrrrrr}" << std::endl;
handler << "\\hline" << std::endl; handler << "\\hline" << std::endl;
handler << "classifier & pvalue & rank & win & tie & loss\\\\" << std::endl; handler << "classifier & pvalue & rank & win & tie & loss\\\\" << std::endl;

View File

@@ -9,14 +9,14 @@ namespace platform {
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
class BestResultsTex { class BestResultsTex {
public: public:
BestResultsTex(const std::string metric_, bool dataset_name = true) : metric{ metric_ }, dataset_name{ dataset_name } {}; BestResultsTex(const std::string score, bool dataset_name = true) : score{ score }, dataset_name{ dataset_name } {};
~BestResultsTex() = default; ~BestResultsTex() = default;
void results_header(const std::vector<std::string>& models, const std::string& date, bool index); void results_header(const std::vector<std::string>& models, const std::string& date, bool index);
void results_body(const std::vector<std::string>& datasets, json& table, bool index); void results_body(const std::vector<std::string>& datasets, json& table, bool index);
void results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model); void results_footer(const std::map<std::string, std::vector<double>>& totals, const std::string& best_model);
void postHoc_test(struct PostHocResult& postHocResult, const std::string& kind, const std::string& date); void postHoc_test(struct PostHocResult& postHocResult, const std::string& kind, const std::string& date);
private: private:
std::string metric; std::string score;
bool dataset_name; bool dataset_name;
void openTexFile(const std::string& name); void openTexFile(const std::string& name);
std::ofstream handler; std::ofstream handler;

View File

@@ -1,45 +0,0 @@
// DeLong.cpp
// Integración del test de DeLong con la clase RocAuc y Statistics
// Basado en: X. Sun and W. Xu, "Fast Implementation of DeLongs Algorithm for Comparing the Areas Under Correlated Receiver Operating Characteristic Curves," (2014), y algoritmos inspirados en sklearn/pROC
#include "DeLong.h"
#include <vector>
#include <cmath>
#include <algorithm>
#include <numeric>
#include <stdexcept>
#include <cassert>
namespace platform {
DeLong::DeLongResult DeLong::compare(const std::vector<double>& aucs_model1,
const std::vector<double>& aucs_model2)
{
if (aucs_model1.size() != aucs_model2.size()) {
throw std::invalid_argument("AUC lists must have the same size");
}
size_t N = aucs_model1.size();
if (N < 2) {
throw std::invalid_argument("At least two AUC values are required");
}
std::vector<double> diffs(N);
for (size_t i = 0; i < N; ++i) {
diffs[i] = aucs_model1[i] - aucs_model2[i];
}
double mean_diff = std::accumulate(diffs.begin(), diffs.end(), 0.0) / N;
double var = 0.0;
for (size_t i = 0; i < N; ++i) {
var += (diffs[i] - mean_diff) * (diffs[i] - mean_diff);
}
var /= (N * (N - 1));
if (var <= 0.0) var = 1e-10;
double z = mean_diff / std::sqrt(var);
double p = 2.0 * (1.0 - std::erfc(std::abs(z) / std::sqrt(2.0)) / 2.0);
return { mean_diff, z, p };
}
}

View File

@@ -1,24 +0,0 @@
#ifndef DELONG_H
#define DELONG_H
/* ********************************************************************************************************************
/* Integración del test de DeLong con la clase RocAuc y Statistics
/* Basado en: X. Sun and W. Xu, "Fast Implementation of DeLongs Algorithm for Comparing the Areas Under Correlated
/* Receiver Operating Characteristic Curves," (2014), y algoritmos inspirados en sklearn/pROC
/* ********************************************************************************************************************/
#include <vector>
namespace platform {
class DeLong {
public:
struct DeLongResult {
double auc_diff;
double z_stat;
double p_value;
};
// Compara dos vectores de AUCs por dataset y devuelve diferencia media,
// estadístico z y p-valor usando un test de rangos (DeLong simplificado)
static DeLongResult compare(const std::vector<double>& aucs_model1,
const std::vector<double>& aucs_model2);
};
}
#endif // DELONG_H

View File

@@ -7,19 +7,25 @@
#include "BestResultsTex.h" #include "BestResultsTex.h"
#include "BestResultsMd.h" #include "BestResultsMd.h"
#include "Statistics.h" #include "Statistics.h"
#include "DeLong.h" #include "WilcoxonTest.hpp"
namespace platform { namespace platform {
Statistics::Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance, bool output) : Statistics::Statistics(const std::string& score, const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance, bool output) :
models(models), datasets(datasets), data(data), significance(significance), output(output) score(score), models(models), datasets(datasets), data(data), significance(significance), output(output)
{ {
if (score == "accuracy") {
postHocType = "Holm";
hlen = 85;
} else {
postHocType = "Wilcoxon";
hlen = 88;
}
nModels = models.size(); nModels = models.size();
nDatasets = datasets.size(); nDatasets = datasets.size();
auto temp = ConfigLocale(); auto temp = ConfigLocale();
} }
void Statistics::fit() void Statistics::fit()
{ {
if (nModels < 3 || nDatasets < 3) { if (nModels < 3 || nDatasets < 3) {
@@ -28,9 +34,11 @@ namespace platform {
throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); throw std::runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
} }
ranksModels.clear(); ranksModels.clear();
computeRanks(); computeRanks(); // compute greaterAverage and ranks
// Set the control model as the one with the lowest average rank // Set the control model as the one with the lowest average rank
controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); controlIdx = score == "accuracy" ?
distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }))
: greaterAverage; // The model with the greater average score
computeWTL(); computeWTL();
maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); maxModelName = (*std::max_element(models.begin(), models.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); maxDatasetName = (*std::max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size();
@@ -67,11 +75,16 @@ namespace platform {
void Statistics::computeRanks() void Statistics::computeRanks()
{ {
std::map<std::string, float> ranksLine; std::map<std::string, float> ranksLine;
std::map<std::string, float> averages;
for (const auto& model : models) {
averages[model] = 0;
}
for (const auto& dataset : datasets) { for (const auto& dataset : datasets) {
std::vector<std::pair<std::string, double>> ranksOrder; std::vector<std::pair<std::string, double>> ranksOrder;
for (const auto& model : models) { for (const auto& model : models) {
double value = data[model].at(dataset).at(0).get<double>(); double value = data[model].at(dataset).at(0).get<double>();
ranksOrder.push_back({ model, value }); ranksOrder.push_back({ model, value });
averages[model] += value;
} }
// Assign the ranks // Assign the ranks
ranksLine = assignRanks(ranksOrder); ranksLine = assignRanks(ranksOrder);
@@ -89,6 +102,12 @@ namespace platform {
for (const auto& rank : ranks) { for (const auto& rank : ranks) {
ranks[rank.first] /= nDatasets; ranks[rank.first] /= nDatasets;
} }
// Average the scores
for (const auto& average : averages) {
averages[average.first] /= nDatasets;
}
// Get the model with the greater average score
greaterAverage = distance(averages.begin(), max_element(averages.begin(), averages.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
} }
void Statistics::computeWTL() void Statistics::computeWTL()
{ {
@@ -115,12 +134,36 @@ namespace platform {
} }
} }
} }
int Statistics::getControlIdx()
{
if (!fitted) {
fit();
}
return controlIdx;
}
void Statistics::postHocTest()
{
// if (score == "accuracy") {
postHocHolmTest();
// } else {
// postHocWilcoxonTest();
// }
}
void Statistics::postHocWilcoxonTest()
{
if (!fitted) {
fit();
}
// Reference: Wilcoxon, F. (1945). “Individual Comparisons by Ranking Methods”. Biometrics Bulletin, 1(6), 80-83.
auto wilcoxon = WilcoxonTest(models, datasets, data, significance);
controlIdx = wilcoxon.getControlIdx();
postHocResult = wilcoxon.getPostHocResult();
}
void Statistics::postHocHolmTest() void Statistics::postHocHolmTest()
{ {
if (!fitted) { if (!fitted) {
fit(); fit();
} }
std::stringstream oss;
// Reference https://link.springer.com/article/10.1007/s44196-022-00083-8 // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
// Post-hoc Holm test // Post-hoc Holm test
// Calculate the p-value for the models paired with the control model // Calculate the p-value for the models paired with the control model
@@ -155,15 +198,15 @@ namespace platform {
postHocResult.model = models.at(controlIdx); postHocResult.model = models.at(controlIdx);
} }
void Statistics::postHocTestReport(const std::string& kind, const std::string& metric, bool friedmanResult, bool tex) void Statistics::postHocTestReport(bool friedmanResult, bool tex)
{ {
std::stringstream oss; std::stringstream oss;
postHocResult.model = models.at(controlIdx); postHocResult.model = models.at(controlIdx);
auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW(); auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
oss << color; oss << color;
oss << " *************************************************************************************************************" << std::endl; oss << " " << std::string(hlen + 25, '*') << std::endl;
oss << " Post-hoc " << kind << " test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl; oss << " Post-hoc " << postHocType << " test: H0: 'There is no significant differences between the control model and the other models.'" << std::endl;
oss << " Control model: " << models.at(controlIdx) << std::endl; oss << " Control model: " << models.at(controlIdx) << std::endl;
oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl; oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value rank win tie loss Status" << std::endl;
oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl; oss << " " << std::string(maxModelName, '=') << " ============ ========= === === ==== =============" << std::endl;
@@ -198,83 +241,18 @@ namespace platform {
oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss; oss << " " << std::right << std::setw(3) << wtl.at(idx).win << " " << std::setw(3) << wtl.at(idx).tie << " " << std::setw(4) << wtl.at(idx).loss;
oss << " " << status << textStatus << std::endl; oss << " " << status << textStatus << std::endl;
} }
oss << color << " *************************************************************************************************************" << std::endl; oss << color << " " << std::string(hlen + 25, '*') << std::endl;
oss << Colors::RESET(); oss << Colors::RESET();
if (output) { if (output) {
std::cout << oss.str(); std::cout << oss.str();
} }
if (tex) { if (tex) {
BestResultsTex bestResultsTex(metric); BestResultsTex bestResultsTex(score);
BestResultsMd bestResultsMd; BestResultsMd bestResultsMd;
bestResultsTex.postHoc_test(postHocResult, kind, get_date() + " " + get_time()); bestResultsTex.postHoc_test(postHocResult, postHocType, get_date() + " " + get_time());
bestResultsMd.postHoc_test(postHocResult, kind, get_date() + " " + get_time()); bestResultsMd.postHoc_test(postHocResult, postHocType, get_date() + " " + get_time());
} }
} }
// void Statistics::postHocDeLongTest(const std::vector<std::vector<int>>& y_trues,
// const std::vector<std::vector<std::vector<double>>>& y_probas,
// bool tex)
// {
// std::map<int, double> pvalues;
// postHocResult.model = models.at(controlIdx);
// postHocResult.postHocLines.clear();
// for (size_t i = 0; i < models.size(); ++i) {
// if ((int)i == controlIdx) continue;
// double acc_p = 0.0;
// int valid = 0;
// for (size_t d = 0; d < y_trues.size(); ++d) {
// try {
// auto result = compareModelsWithDeLong(y_probas[controlIdx][d], y_probas[i][d], y_trues[d]);
// acc_p += result.p_value;
// ++valid;
// }
// catch (...) {}
// }
// if (valid > 0) {
// pvalues[i] = acc_p / valid;
// }
// }
// std::vector<std::pair<int, double>> sorted_pvalues(pvalues.begin(), pvalues.end());
// std::sort(sorted_pvalues.begin(), sorted_pvalues.end(), [](const auto& a, const auto& b) {
// return a.second < b.second;
// });
// std::stringstream oss;
// oss << "\n*************************************************************************************************************\n";
// oss << " Post-hoc DeLong-Holm test: H0: 'No significant differences in AUC with control model.'\n";
// oss << " Control model: " << models[controlIdx] << "\n";
// oss << " " << std::left << std::setw(maxModelName) << std::string("Model") << " p-value Adjusted Result\n";
// oss << " " << std::string(maxModelName, '=') << " ============ ========== =============\n";
// double prev = 0.0;
// for (size_t i = 0; i < sorted_pvalues.size(); ++i) {
// int idx = sorted_pvalues[i].first;
// double raw = sorted_pvalues[i].second;
// double adj = std::min(1.0, raw * (models.size() - i - 1));
// adj = std::max(prev, adj);
// prev = adj;
// bool reject = adj < significance;
// postHocResult.postHocLines.push_back({ models[idx], adj, 0.0f, {}, reject });
// auto color = reject ? Colors::MAGENTA() : Colors::GREEN();
// auto status = reject ? Symbols::cross : Symbols::check_mark;
// auto textStatus = reject ? " rejected H0" : " accepted H0";
// oss << " " << color << std::left << std::setw(maxModelName) << models[idx] << " ";
// oss << std::setprecision(6) << std::scientific << raw << " ";
// oss << std::setprecision(6) << std::scientific << adj << " " << status << textStatus << "\n";
// }
// oss << Colors::CYAN() << " *************************************************************************************************************\n";
// oss << Colors::RESET();
// if (output) std::cout << oss.str();
// if (tex) {
// BestResultsTex bestResultsTex;
// BestResultsMd bestResultsMd;
// bestResultsTex.holm_test(postHocResult, get_date() + " " + get_time());
// bestResultsMd.holm_test(postHocResult, get_date() + " " + get_time());
// }
// }
bool Statistics::friedmanTest() bool Statistics::friedmanTest()
{ {
if (!fitted) { if (!fitted) {
@@ -284,7 +262,7 @@ namespace platform {
// Friedman test // Friedman test
// Calculate the Friedman statistic // Calculate the Friedman statistic
oss << Colors::BLUE() << std::endl; oss << Colors::BLUE() << std::endl;
oss << "***************************************************************************************************************" << std::endl; oss << std::string(hlen, '*') << std::endl;
oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl; oss << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << std::endl;
double degreesOfFreedom = nModels - 1.0; double degreesOfFreedom = nModels - 1.0;
double sumSquared = 0; double sumSquared = 0;
@@ -309,7 +287,7 @@ namespace platform {
oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl; oss << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << std::endl;
result = false; result = false;
} }
oss << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << std::endl; oss << Colors::BLUE() << std::string(hlen, '*') << Colors::RESET() << std::endl;
if (output) { if (output) {
std::cout << oss.str(); std::cout << oss.str();
} }

View File

@@ -32,17 +32,22 @@ namespace platform {
}; };
class Statistics { class Statistics {
public: public:
Statistics(const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true); Statistics(const std::string& score, const std::vector<std::string>& models, const std::vector<std::string>& datasets, const json& data, double significance = 0.05, bool output = true);
bool friedmanTest(); bool friedmanTest();
void postHocHolmTest(); void postHocTest();
void postHocTestReport(const std::string& kind, const std::string& metric, bool friedmanResult, bool tex); void postHocTestReport(bool friedmanResult, bool tex);
int getControlIdx();
FriedmanResult& getFriedmanResult(); FriedmanResult& getFriedmanResult();
PostHocResult& getPostHocResult(); PostHocResult& getPostHocResult();
std::map<std::string, std::map<std::string, float>>& getRanks(); std::map<std::string, std::map<std::string, float>>& getRanks();
private: private:
void fit(); void fit();
void postHocHolmTest();
void postHocWilcoxonTest();
void computeRanks(); void computeRanks();
void computeWTL(); void computeWTL();
const std::string& score;
std::string postHocType;
const std::vector<std::string>& models; const std::vector<std::string>& models;
const std::vector<std::string>& datasets; const std::vector<std::string>& datasets;
const json& data; const json& data;
@@ -52,11 +57,13 @@ namespace platform {
int nModels = 0; int nModels = 0;
int nDatasets = 0; int nDatasets = 0;
int controlIdx = 0; int controlIdx = 0;
int greaterAverage = -1; // The model with the greater average score
std::map<int, WTL> wtl; std::map<int, WTL> wtl;
std::map<std::string, float> ranks; std::map<std::string, float> ranks;
std::vector<std::pair<int, double>> postHocData; std::vector<std::pair<int, double>> postHocData;
int maxModelName = 0; int maxModelName = 0;
int maxDatasetName = 0; int maxDatasetName = 0;
int hlen; // length of the line
FriedmanResult friedmanResult; FriedmanResult friedmanResult;
PostHocResult postHocResult; PostHocResult postHocResult;
std::map<std::string, std::map<std::string, float>> ranksModels; std::map<std::string, std::map<std::string, float>> ranksModels;

250
src/best/WilcoxonTest.hpp Normal file
View File

@@ -0,0 +1,250 @@
#ifndef BEST_WILCOXON_TEST_HPP
#define BEST_WILCOXON_TEST_HPP
// WilcoxonTest.hpp
// Standalone class for paired Wilcoxon signedrank posthoc analysis
// ------------------------------------------------------------------
// * Constructor takes the *alreadyloaded* nlohmann::json object plus the
// vectors of model and dataset names.
// * Internally selects a control model (highest average AUC) and builds all
// statistics (ranks, W/T/L counts, Wilcoxon pvalues).
// * Public API:
// int getControlIdx() const;
// PostHocResult getPostHocResult() const;
//
#include <vector>
#include <string>
#include <cmath>
#include <algorithm>
#include <numeric>
#include <limits>
#include <nlohmann/json.hpp>
#include "Statistics.h"
namespace platform {
class WilcoxonTest {
public:
WilcoxonTest(const std::vector<std::string>& models,
const std::vector<std::string>& datasets,
const json& data,
double alpha = 0.05)
: models_(models), datasets_(datasets), data_(data), alpha_(alpha)
{
buildAUCTable(); // extracts all AUCs into a dense matrix
computeAverageAUCs(); // permodel mean (→ control selection)
computeAverageRanks(); // Friedmanstyle ranks per model
selectControlModel(); // sets control_idx_
buildPostHocResult(); // fills postHocResult_
}
//---------------------------------------------------- public API ----
int getControlIdx() const noexcept { return control_idx_; }
const PostHocResult& getPostHocResult() const noexcept { return postHocResult_; }
private:
//-------------------------------------------------- helper structs ----
// When a value is missing we keep NaN so that ordinary arithmetic still
// works (NaN simply propagates and we can test with std::isnan).
using Matrix = std::vector<std::vector<double>>; // [model][dataset]
//------------------------------------------------- implementation ----
void buildAUCTable()
{
const std::size_t M = models_.size();
const std::size_t D = datasets_.size();
auc_.assign(M, std::vector<double>(D, std::numeric_limits<double>::quiet_NaN()));
for (std::size_t i = 0; i < M; ++i) {
const auto& model = models_[i];
for (std::size_t j = 0; j < D; ++j) {
const auto& ds = datasets_[j];
try {
auc_[i][j] = data_.at(model).at(ds).at(0).get<double>();
}
catch (...) {
// leave as NaN when value missing
}
}
}
}
void computeAverageAUCs()
{
const std::size_t M = models_.size();
avg_auc_.resize(M, std::numeric_limits<double>::quiet_NaN());
for (std::size_t i = 0; i < M; ++i) {
double sum = 0.0;
std::size_t cnt = 0;
for (double v : auc_[i]) {
if (!std::isnan(v)) { sum += v; ++cnt; }
}
avg_auc_[i] = cnt ? sum / cnt : std::numeric_limits<double>::quiet_NaN();
}
}
// Average rank across datasets (1 = best).
void computeAverageRanks()
{
const std::size_t M = models_.size();
const std::size_t D = datasets_.size();
rank_sum_.assign(M, 0.0);
rank_cnt_.assign(M, 0);
const double EPS = 1e-10;
for (std::size_t j = 0; j < D; ++j) {
// Collect present values for this dataset
std::vector<std::pair<double, std::size_t>> vals; // (auc, model_idx)
vals.reserve(M);
for (std::size_t i = 0; i < M; ++i) {
if (!std::isnan(auc_[i][j]))
vals.emplace_back(auc_[i][j], i);
}
if (vals.empty()) continue; // no info for this dataset
// Sort descending (higher AUC better)
std::sort(vals.begin(), vals.end(), [](auto a, auto b) {
return a.first > b.first;
});
// Assign ranks with average for ties
std::size_t k = 0;
while (k < vals.size()) {
std::size_t l = k + 1;
while (l < vals.size() && std::fabs(vals[l].first - vals[k].first) < EPS) ++l;
const double avg_rank = (k + 1 + l) * 0.5; // average of ranks (1based)
for (std::size_t m = k; m < l; ++m) {
const auto idx = vals[m].second;
rank_sum_[idx] += avg_rank;
++rank_cnt_[idx];
}
k = l;
}
}
// Final average
avg_rank_.resize(M, std::numeric_limits<double>::quiet_NaN());
for (std::size_t i = 0; i < M; ++i) {
avg_rank_[i] = rank_cnt_[i] ? rank_sum_[i] / rank_cnt_[i]
: std::numeric_limits<double>::quiet_NaN();
}
}
void selectControlModel()
{
// pick model with highest average AUC (ties → first)
control_idx_ = 0;
for (std::size_t i = 1; i < avg_auc_.size(); ++i) {
if (avg_auc_[i] > avg_auc_[control_idx_]) control_idx_ = static_cast<int>(i);
}
}
void buildPostHocResult()
{
const std::size_t M = models_.size();
const std::size_t D = datasets_.size();
const std::string& control_name = models_[control_idx_];
postHocResult_.model = control_name;
const double practical_threshold = 0.0005; // same heuristic as original code
for (std::size_t i = 0; i < M; ++i) {
if (static_cast<int>(i) == control_idx_) continue;
PostHocLine line;
line.model = models_[i];
line.rank = avg_rank_[i];
WTL wtl;
std::vector<double> differences;
differences.reserve(D);
for (std::size_t j = 0; j < D; ++j) {
double auc_control = auc_[control_idx_][j];
double auc_other = auc_[i][j];
if (std::isnan(auc_control) || std::isnan(auc_other)) continue;
double diff = auc_control - auc_other; // control comparison
if (std::fabs(diff) <= practical_threshold) {
++wtl.tie;
} else if (diff < 0) {
++wtl.win; // comparison wins
} else {
++wtl.loss; // control wins
}
differences.push_back(diff);
}
line.wtl = wtl;
line.pvalue = differences.empty() ? 1.0L : static_cast<long double>(wilcoxonSignedRankTest(differences));
line.reject = (line.pvalue < alpha_);
postHocResult_.postHocLines.push_back(std::move(line));
}
}
// ------------------------------------------------ Wilcoxon (private) --
static double wilcoxonSignedRankTest(const std::vector<double>& diffs)
{
if (diffs.empty()) return 1.0;
// Build |diff| + sign vector (exclude zeros)
struct Node { double absval; int sign; };
std::vector<Node> v;
v.reserve(diffs.size());
for (double d : diffs) {
if (d != 0.0) v.push_back({ std::fabs(d), d > 0 ? 1 : -1 });
}
if (v.empty()) return 1.0;
// Sort by absolute value
std::sort(v.begin(), v.end(), [](const Node& a, const Node& b) { return a.absval < b.absval; });
const double EPS = 1e-10;
const std::size_t n = v.size();
std::vector<double> ranks(n, 0.0);
std::size_t i = 0;
while (i < n) {
std::size_t j = i + 1;
while (j < n && std::fabs(v[j].absval - v[i].absval) < EPS) ++j;
double avg_rank = (i + 1 + j) * 0.5; // 1based ranks
for (std::size_t k = i; k < j; ++k) ranks[k] = avg_rank;
i = j;
}
double w_plus = 0.0, w_minus = 0.0;
for (std::size_t k = 0; k < n; ++k) {
if (v[k].sign > 0) w_plus += ranks[k];
else w_minus += ranks[k];
}
double w = std::min(w_plus, w_minus);
double mean_w = n * (n + 1) / 4.0;
double sd_w = std::sqrt(n * (n + 1) * (2 * n + 1) / 24.0);
if (sd_w == 0.0) return 1.0; // degenerate (all diffs identical)
double z = (w - mean_w) / sd_w;
double p_two = std::erfc(std::fabs(z) / std::sqrt(2.0)); // 2sided tail
return p_two;
}
//-------------------------------------------------------- data ----
std::vector<std::string> models_;
std::vector<std::string> datasets_;
json data_;
double alpha_;
Matrix auc_; // [model][dataset]
std::vector<double> avg_auc_; // mean AUC per model
std::vector<double> avg_rank_; // mean rank per model
std::vector<double> rank_sum_; // helper for ranks
std::vector<int> rank_cnt_; // datasets counted per model
int control_idx_ = -1;
PostHocResult postHocResult_;
};
} // namespace stats
#endif // BEST_WILCOXON_TEST_HPP

View File

@@ -231,8 +231,8 @@ void experiment(argparse::ArgumentParser& program)
{ {
struct platform::ConfigGrid config; struct platform::ConfigGrid config;
auto arguments = platform::ArgumentsExperiment(program, platform::experiment_t::GRID); auto arguments = platform::ArgumentsExperiment(program, platform::experiment_t::GRID);
auto path_results = arguments.getPathResults();
arguments.parse(); arguments.parse();
auto path_results = arguments.getPathResults();
auto grid_experiment = platform::GridExperiment(arguments, config); auto grid_experiment = platform::GridExperiment(arguments, config);
platform::Timer timer; platform::Timer timer;
timer.start(); timer.start();

View File

@@ -215,10 +215,35 @@ namespace platform {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json); test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
} }
} }
std::string getGppVersion()
{
std::string result;
std::array<char, 128> buffer;
// Run g++ --version and capture the output
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen("g++ --version", "r"), pclose);
if (!pipe) {
return "Error executing g++ --version command";
}
// Read the first line of output (which contains the version info)
if (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
result = buffer.data();
// Remove trailing newline if present
if (!result.empty() && result[result.length() - 1] == '\n') {
result.erase(result.length() - 1);
}
} else {
return "No output from g++ --version command";
}
return result;
}
Experiment& ArgumentsExperiment::initializedExperiment() Experiment& ArgumentsExperiment::initializedExperiment()
{ {
auto env = platform::DotEnv(); auto env = platform::DotEnv();
experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1"); experiment.setTitle(title).setLanguage("c++").setLanguageVersion(getGppVersion());
experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat); experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat);
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score);

View File

@@ -245,8 +245,6 @@ namespace platform {
// Train model // Train model
// //
clf->fit(X_train, y_train, features, className, states, smooth_type); clf->fit(X_train, y_train, features, className, states, smooth_type);
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
auto clf_notes = clf->getNotes(); auto clf_notes = clf->getNotes();
std::transform(clf_notes.begin(), clf_notes.end(), std::back_inserter(notes), [nfold](const std::string& note) std::transform(clf_notes.begin(), clf_notes.end(), std::back_inserter(notes), [nfold](const std::string& note)
{ return "Fold " + std::to_string(nfold) + ": " + note; }); { return "Fold " + std::to_string(nfold) + ": " + note; });
@@ -259,6 +257,8 @@ namespace platform {
// Score train // Score train
// //
if (!no_train_score) { if (!no_train_score) {
if (!quiet)
showProgress(nfold + 1, getColor(clf->getStatus()), "b");
auto y_proba_train = clf->predict_proba(X_train); auto y_proba_train = clf->predict_proba(X_train);
Scores scores(y_train, y_proba_train, num_classes, labels); Scores scores(y_train, y_proba_train, num_classes, labels);
score_train_value = score == score_t::ACCURACY ? scores.accuracy() : scores.auc(); score_train_value = score == score_t::ACCURACY ? scores.accuracy() : scores.auc();