#ifndef BEST_WILCOXON_TEST_HPP #define BEST_WILCOXON_TEST_HPP // WilcoxonTest.hpp // Stand‑alone class for paired Wilcoxon signed‑rank post‑hoc analysis // ------------------------------------------------------------------ // * Constructor takes the *already‑loaded* nlohmann::json object plus the // vectors of model and dataset names. // * Internally selects a control model (highest average AUC) and builds all // statistics (ranks, W/T/L counts, Wilcoxon p‑values). // * Public API: // int getControlIdx() const; // PostHocResult getPostHocResult() const; // #include #include #include #include #include #include #include #include "Statistics.h" namespace platform { class WilcoxonTest { public: WilcoxonTest(const std::vector& models, const std::vector& datasets, const json& data, double alpha = 0.05) : models_(models), datasets_(datasets), data_(data), alpha_(alpha) { buildAUCTable(); // extracts all AUCs into a dense matrix computeAverageAUCs(); // per‑model mean (→ control selection) computeAverageRanks(); // Friedman‑style ranks per model selectControlModel(); // sets control_idx_ buildPostHocResult(); // fills postHocResult_ } int getControlIdx() const noexcept { return control_idx_; } const std::vector& getPostHocResults() const noexcept { return postHocResults_; } private: //-------------------------------------------------- helper structs ---- // When a value is missing we keep NaN so that ordinary arithmetic still // works (NaN simply propagates and we can test with std::isnan). using Matrix = std::vector>; // [model][dataset] //------------------------------------------------- implementation ---- void buildAUCTable() { const std::size_t M = models_.size(); const std::size_t D = datasets_.size(); auc_.assign(M, std::vector(D, std::numeric_limits::quiet_NaN())); for (std::size_t i = 0; i < M; ++i) { const auto& model = models_[i]; for (std::size_t j = 0; j < D; ++j) { const auto& ds = datasets_[j]; try { auc_[i][j] = data_.at(model).at(ds).at(0).get(); } catch (...) { // leave as NaN when value missing } } } } void computeAverageAUCs() { const std::size_t M = models_.size(); avg_auc_.resize(M, std::numeric_limits::quiet_NaN()); for (std::size_t i = 0; i < M; ++i) { double sum = 0.0; std::size_t cnt = 0; for (double v : auc_[i]) { if (!std::isnan(v)) { sum += v; ++cnt; } } avg_auc_[i] = cnt ? sum / cnt : std::numeric_limits::quiet_NaN(); } } // Average rank across datasets (1 = best). void computeAverageRanks() { const std::size_t M = models_.size(); const std::size_t D = datasets_.size(); rank_sum_.assign(M, 0.0); rank_cnt_.assign(M, 0); const double EPS = 1e-10; for (std::size_t j = 0; j < D; ++j) { // Collect present values for this dataset std::vector> vals; // (auc, model_idx) vals.reserve(M); for (std::size_t i = 0; i < M; ++i) { if (!std::isnan(auc_[i][j])) vals.emplace_back(auc_[i][j], i); } if (vals.empty()) continue; // no info for this dataset // Sort descending (higher AUC better) std::sort(vals.begin(), vals.end(), [](auto a, auto b) { return a.first > b.first; }); // Assign ranks with average for ties std::size_t k = 0; while (k < vals.size()) { std::size_t l = k + 1; while (l < vals.size() && std::fabs(vals[l].first - vals[k].first) < EPS) ++l; const double avg_rank = (k + 1 + l) * 0.5; // average of ranks (1‑based) for (std::size_t m = k; m < l; ++m) { const auto idx = vals[m].second; rank_sum_[idx] += avg_rank; ++rank_cnt_[idx]; } k = l; } } // Final average avg_rank_.resize(M, std::numeric_limits::quiet_NaN()); for (std::size_t i = 0; i < M; ++i) { avg_rank_[i] = rank_cnt_[i] ? rank_sum_[i] / rank_cnt_[i] : std::numeric_limits::quiet_NaN(); } } void selectControlModel() { // pick model with highest average AUC (ties → first) control_idx_ = 0; for (std::size_t i = 1; i < avg_auc_.size(); ++i) { if (avg_auc_[i] > avg_auc_[control_idx_]) control_idx_ = static_cast(i); } } void buildPostHocResult() { const std::size_t M = models_.size(); const std::size_t D = datasets_.size(); const std::string& control_name = models_[control_idx_]; const double practical_threshold = 0.0005; // same heuristic as original code for (std::size_t i = 0; i < M; ++i) { PostHocLine line; line.model = models_[i]; line.rank = avg_auc_[i]; WTL wtl = { 0, 0, 0 }; // win, tie, loss std::vector differences; differences.reserve(D); for (std::size_t j = 0; j < D; ++j) { double auc_control = auc_[control_idx_][j]; double auc_other = auc_[i][j]; if (std::isnan(auc_control) || std::isnan(auc_other)) continue; double diff = auc_control - auc_other; // control − comparison if (std::fabs(diff) <= practical_threshold) { ++wtl.tie; } else if (diff < 0) { ++wtl.win; // comparison wins } else { ++wtl.loss; // control wins } differences.push_back(diff); } line.wtl = wtl; line.pvalue = differences.empty() ? 1.0L : static_cast(wilcoxonSignedRankTest(differences)); line.reject = (line.pvalue < alpha_); postHocResults_.push_back(std::move(line)); } // Sort results by rank (descending) std::sort(postHocResults_.begin(), postHocResults_.end(), [](const PostHocLine& a, const PostHocLine& b) { return a.rank > b.rank; }); } // ------------------------------------------------ Wilcoxon (private) -- static double wilcoxonSignedRankTest(const std::vector& diffs) { if (diffs.empty()) return 1.0; // Build |diff| + sign vector (exclude zeros) struct Node { double absval; int sign; }; std::vector v; v.reserve(diffs.size()); for (double d : diffs) { if (d != 0.0) v.push_back({ std::fabs(d), d > 0 ? 1 : -1 }); } if (v.empty()) return 1.0; // Sort by absolute value std::sort(v.begin(), v.end(), [](const Node& a, const Node& b) { return a.absval < b.absval; }); const double EPS = 1e-10; const std::size_t n = v.size(); std::vector ranks(n, 0.0); std::size_t i = 0; while (i < n) { std::size_t j = i + 1; while (j < n && std::fabs(v[j].absval - v[i].absval) < EPS) ++j; double avg_rank = (i + 1 + j) * 0.5; // 1‑based ranks for (std::size_t k = i; k < j; ++k) ranks[k] = avg_rank; i = j; } double w_plus = 0.0, w_minus = 0.0; for (std::size_t k = 0; k < n; ++k) { if (v[k].sign > 0) w_plus += ranks[k]; else w_minus += ranks[k]; } double w = std::min(w_plus, w_minus); double mean_w = n * (n + 1) / 4.0; double sd_w = std::sqrt(n * (n + 1) * (2 * n + 1) / 24.0); if (sd_w == 0.0) return 1.0; // degenerate (all diffs identical) double z = (w - mean_w) / sd_w; double p_two = std::erfc(std::fabs(z) / std::sqrt(2.0)); // 2‑sided tail return p_two; } //-------------------------------------------------------- data ---- std::vector models_; std::vector datasets_; json data_; double alpha_; Matrix auc_; // [model][dataset] std::vector avg_auc_; // mean AUC per model std::vector avg_rank_; // mean rank per model std::vector rank_sum_; // helper for ranks std::vector rank_cnt_; // datasets counted per model int control_idx_ = -1; std::vector postHocResults_; }; } // namespace platform #endif // BEST_WILCOXON_TEST_HPP