Files
Platform/src/best/WilcoxonTest.hpp

245 lines
9.8 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#ifndef BEST_WILCOXON_TEST_HPP
#define BEST_WILCOXON_TEST_HPP
// WilcoxonTest.hpp
// Standalone class for paired Wilcoxon signedrank posthoc analysis
// ------------------------------------------------------------------
// * Constructor takes the *alreadyloaded* nlohmann::json object plus the
// vectors of model and dataset names.
// * Internally selects a control model (highest average AUC) and builds all
// statistics (ranks, W/T/L counts, Wilcoxon pvalues).
// * Public API:
// int getControlIdx() const;
// PostHocResult getPostHocResult() const;
//
#include <vector>
#include <string>
#include <cmath>
#include <algorithm>
#include <numeric>
#include <limits>
#include <nlohmann/json.hpp>
#include "Statistics.h"
namespace platform {
class WilcoxonTest {
public:
WilcoxonTest(const std::vector<std::string>& models, const std::vector<std::string>& datasets,
const json& data, double alpha = 0.05) : models_(models), datasets_(datasets), data_(data), alpha_(alpha)
{
buildAUCTable(); // extracts all AUCs into a dense matrix
computeAverageAUCs(); // permodel mean (→ control selection)
computeAverageRanks(); // Friedmanstyle ranks per model
selectControlModel(); // sets control_idx_
buildPostHocResult(); // fills postHocResult_
}
int getControlIdx() const noexcept { return control_idx_; }
const std::vector<PostHocLine>& getPostHocResults() const noexcept { return postHocResults_; }
private:
//-------------------------------------------------- helper structs ----
// When a value is missing we keep NaN so that ordinary arithmetic still
// works (NaN simply propagates and we can test with std::isnan).
using Matrix = std::vector<std::vector<double>>; // [model][dataset]
//------------------------------------------------- implementation ----
void buildAUCTable()
{
const std::size_t M = models_.size();
const std::size_t D = datasets_.size();
auc_.assign(M, std::vector<double>(D, std::numeric_limits<double>::quiet_NaN()));
for (std::size_t i = 0; i < M; ++i) {
const auto& model = models_[i];
for (std::size_t j = 0; j < D; ++j) {
const auto& ds = datasets_[j];
try {
auc_[i][j] = data_.at(model).at(ds).at(0).get<double>();
}
catch (...) {
// leave as NaN when value missing
}
}
}
}
void computeAverageAUCs()
{
const std::size_t M = models_.size();
avg_auc_.resize(M, std::numeric_limits<double>::quiet_NaN());
for (std::size_t i = 0; i < M; ++i) {
double sum = 0.0;
std::size_t cnt = 0;
for (double v : auc_[i]) {
if (!std::isnan(v)) { sum += v; ++cnt; }
}
avg_auc_[i] = cnt ? sum / cnt : std::numeric_limits<double>::quiet_NaN();
}
}
// Average rank across datasets (1 = best).
void computeAverageRanks()
{
const std::size_t M = models_.size();
const std::size_t D = datasets_.size();
rank_sum_.assign(M, 0.0);
rank_cnt_.assign(M, 0);
const double EPS = 1e-10;
for (std::size_t j = 0; j < D; ++j) {
// Collect present values for this dataset
std::vector<std::pair<double, std::size_t>> vals; // (auc, model_idx)
vals.reserve(M);
for (std::size_t i = 0; i < M; ++i) {
if (!std::isnan(auc_[i][j]))
vals.emplace_back(auc_[i][j], i);
}
if (vals.empty()) continue; // no info for this dataset
// Sort descending (higher AUC better)
std::sort(vals.begin(), vals.end(), [](auto a, auto b) {
return a.first > b.first;
});
// Assign ranks with average for ties
std::size_t k = 0;
while (k < vals.size()) {
std::size_t l = k + 1;
while (l < vals.size() && std::fabs(vals[l].first - vals[k].first) < EPS) ++l;
const double avg_rank = (k + 1 + l) * 0.5; // average of ranks (1based)
for (std::size_t m = k; m < l; ++m) {
const auto idx = vals[m].second;
rank_sum_[idx] += avg_rank;
++rank_cnt_[idx];
}
k = l;
}
}
// Final average
avg_rank_.resize(M, std::numeric_limits<double>::quiet_NaN());
for (std::size_t i = 0; i < M; ++i) {
avg_rank_[i] = rank_cnt_[i] ? rank_sum_[i] / rank_cnt_[i]
: std::numeric_limits<double>::quiet_NaN();
}
}
void selectControlModel()
{
// pick model with highest average AUC (ties → first)
control_idx_ = 0;
for (std::size_t i = 1; i < avg_auc_.size(); ++i) {
if (avg_auc_[i] > avg_auc_[control_idx_]) control_idx_ = static_cast<int>(i);
}
}
void buildPostHocResult()
{
const std::size_t M = models_.size();
const std::size_t D = datasets_.size();
const std::string& control_name = models_[control_idx_];
const double practical_threshold = 0.0005; // same heuristic as original code
for (std::size_t i = 0; i < M; ++i) {
PostHocLine line;
line.model = models_[i];
line.rank = avg_auc_[i];
WTL wtl = { 0, 0, 0 }; // win, tie, loss
std::vector<double> differences;
differences.reserve(D);
for (std::size_t j = 0; j < D; ++j) {
double auc_control = auc_[control_idx_][j];
double auc_other = auc_[i][j];
if (std::isnan(auc_control) || std::isnan(auc_other)) continue;
double diff = auc_control - auc_other; // control comparison
if (std::fabs(diff) <= practical_threshold) {
++wtl.tie;
} else if (diff < 0) {
++wtl.win; // comparison wins
} else {
++wtl.loss; // control wins
}
differences.push_back(diff);
}
line.wtl = wtl;
line.pvalue = differences.empty() ? 1.0L : static_cast<long double>(wilcoxonSignedRankTest(differences));
line.reject = (line.pvalue < alpha_);
postHocResults_.push_back(std::move(line));
}
// Sort results by rank (descending)
std::sort(postHocResults_.begin(), postHocResults_.end(), [](const PostHocLine& a, const PostHocLine& b) {
return a.rank > b.rank;
});
}
// ------------------------------------------------ Wilcoxon (private) --
static double wilcoxonSignedRankTest(const std::vector<double>& diffs)
{
if (diffs.empty()) return 1.0;
// Build |diff| + sign vector (exclude zeros)
struct Node { double absval; int sign; };
std::vector<Node> v;
v.reserve(diffs.size());
for (double d : diffs) {
if (d != 0.0) v.push_back({ std::fabs(d), d > 0 ? 1 : -1 });
}
if (v.empty()) return 1.0;
// Sort by absolute value
std::sort(v.begin(), v.end(), [](const Node& a, const Node& b) { return a.absval < b.absval; });
const double EPS = 1e-10;
const std::size_t n = v.size();
std::vector<double> ranks(n, 0.0);
std::size_t i = 0;
while (i < n) {
std::size_t j = i + 1;
while (j < n && std::fabs(v[j].absval - v[i].absval) < EPS) ++j;
double avg_rank = (i + 1 + j) * 0.5; // 1based ranks
for (std::size_t k = i; k < j; ++k) ranks[k] = avg_rank;
i = j;
}
double w_plus = 0.0, w_minus = 0.0;
for (std::size_t k = 0; k < n; ++k) {
if (v[k].sign > 0) w_plus += ranks[k];
else w_minus += ranks[k];
}
double w = std::min(w_plus, w_minus);
double mean_w = n * (n + 1) / 4.0;
double sd_w = std::sqrt(n * (n + 1) * (2 * n + 1) / 24.0);
if (sd_w == 0.0) return 1.0; // degenerate (all diffs identical)
double z = (w - mean_w) / sd_w;
double p_two = std::erfc(std::fabs(z) / std::sqrt(2.0)); // 2sided tail
return p_two;
}
//-------------------------------------------------------- data ----
std::vector<std::string> models_;
std::vector<std::string> datasets_;
json data_;
double alpha_;
Matrix auc_; // [model][dataset]
std::vector<double> avg_auc_; // mean AUC per model
std::vector<double> avg_rank_; // mean rank per model
std::vector<double> rank_sum_; // helper for ranks
std::vector<int> rank_cnt_; // datasets counted per model
int control_idx_ = -1;
std::vector<PostHocLine> postHocResults_;
};
} // namespace platform
#endif // BEST_WILCOXON_TEST_HPP