From ac89a451e3fcddba681ef115565f51f5b67e0d35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Thu, 28 Sep 2023 00:45:15 +0200 Subject: [PATCH] Duplicate statistics tests in class --- src/Platform/BestResults.cc | 13 ++- src/Platform/CMakeLists.txt | 2 +- src/Platform/Statistics.cc | 209 ++++++++++++++++++++++++++++++++++++ src/Platform/Statistics.h | 37 +++++++ 4 files changed, 258 insertions(+), 3 deletions(-) create mode 100644 src/Platform/Statistics.cc create mode 100644 src/Platform/Statistics.h diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 1ed6de6..7aec335 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -6,6 +6,7 @@ #include "BestResults.h" #include "Result.h" #include "Colors.h" +#include "Statistics.h" #include #include @@ -475,15 +476,23 @@ namespace platform { cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] << " "; } cout << endl; + vector vModels(models.begin(), models.end()); + vector datasets; + for (const auto& dataset : table.begin().value().items()) { + datasets.push_back(dataset.key()); + } + double significance = 0.05; if (friedman) { - double significance = 0.05; - vector vModels(models.begin(), models.end()); friedmanTest(vModels, nDatasets, ranksTotal, significance); // Stablish the control model as the one with the lowest averaged rank int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); auto wtl = computeWTL(controlIdx, vModels, table); postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl); } + + Statistics stats(vModels, datasets, table, significance); + stats.friedmanTest(); + stats.postHocHolmTest(); } void BestResults::reportAll() { diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index db32852..edf014a 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -8,7 +8,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc) add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc) add_executable(list list.cc platformUtils Datasets.cc) -add_executable(best best.cc BestResults.cc Result.cc) +add_executable(best best.cc BestResults.cc Result.cc Statistics.cc) target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs) diff --git a/src/Platform/Statistics.cc b/src/Platform/Statistics.cc new file mode 100644 index 0000000..33b2f57 --- /dev/null +++ b/src/Platform/Statistics.cc @@ -0,0 +1,209 @@ +#include "Statistics.h" +#include "Colors.h" +#include +#include + +namespace platform { + + Statistics::Statistics(vector& models, vector& datasets, json data, double significance) : models(models), datasets(datasets), data(data), significance(significance) + { + nModels = models.size(); + nDatasets = datasets.size(); + }; + + void Statistics::fit() + { + if (nModels < 3 || nDatasets < 3) { + cerr << "nModels: " << nModels << endl; + cerr << "nDatasets: " << nDatasets << endl; + throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); + } + computeRanks(); + // Set the control model as the one with the lowest average rank + controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); + computeWTL(); + fitted = true; + } + map assignRanks2(vector>& ranksOrder) + { + // sort the ranksOrder vector by value + sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { + return a.second > b.second; + }); + //Assign ranks to values and if they are the same they share the same averaged rank + map ranks; + for (int i = 0; i < ranksOrder.size(); i++) { + ranks[ranksOrder[i].first] = i + 1.0; + } + int i = 0; + while (i < static_cast(ranksOrder.size())) { + int j = i + 1; + int sumRanks = ranks[ranksOrder[i].first]; + while (j < static_cast(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) { + sumRanks += ranks[ranksOrder[j++].first]; + } + if (j > i + 1) { + float averageRank = (float)sumRanks / (j - i); + for (int k = i; k < j; k++) { + ranks[ranksOrder[k].first] = averageRank; + } + } + i = j; + } + return ranks; + } + void Statistics::computeRanks() + { + map ranksLine; + for (const auto& dataset : datasets) { + vector> ranksOrder; + for (const auto& model : models) { + double value = data[model].at(dataset).at(0).get(); + ranksOrder.push_back({ model, value }); + } + // Assign the ranks + ranksLine = assignRanks2(ranksOrder); + if (ranks.size() == 0) { + ranks = ranksLine; + } else { + for (const auto& rank : ranksLine) { + ranks[rank.first] += rank.second; + } + } + } + // Average the ranks + for (const auto& rank : ranks) { + ranks[rank.first] /= nDatasets; + } + } + void Statistics::computeWTL() + { + // Compute the WTL matrix + for (int i = 0; i < nModels; ++i) { + wtl[i] = { 0, 0, 0 }; + } + json origin = data.begin().value(); + for (auto const& item : origin.items()) { + auto controlModel = models.at(controlIdx); + double controlValue = data[controlModel].at(item.key()).at(0).get(); + for (int i = 0; i < nModels; ++i) { + if (i == controlIdx) { + continue; + } + double value = data[models[i]].at(item.key()).at(0).get(); + if (value < controlValue) { + wtl[i].win++; + } else if (value == controlValue) { + wtl[i].tie++; + } else { + wtl[i].loss++; + } + } + } + } + + void Statistics::postHocHolmTest() + { + if (!fitted) { + fit(); + } + // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8 + // Post-hoc Holm test + // Calculate the p-value for the models paired with the control model + map stats; // p-value of each model paired with the control model + boost::math::normal dist(0.0, 1.0); + double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets)); + for (int i = 0; i < nModels; i++) { + if (i == controlIdx) { + stats[i] = 0.0; + continue; + } + double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff; + double p_value = (long double)2 * (1 - cdf(dist, z)); + stats[i] = p_value; + } + // Sort the models by p-value + vector> statsOrder; + for (const auto& stat : stats) { + statsOrder.push_back({ stat.first, stat.second }); + } + sort(statsOrder.begin(), statsOrder.end(), [](const pair& a, const pair& b) { + return a.second < b.second; + }); + + // Holm adjustment + for (int i = 0; i < statsOrder.size(); ++i) { + auto item = statsOrder.at(i); + double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second; + double p_value = min((double)1.0, item.second * (nModels - i)); + p_value = max(before, p_value); + statsOrder[i] = { item.first, p_value }; + } + cout << Colors::MAGENTA(); + cout << " *************************************************************************************************************" << endl; + cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl; + cout << " Control model: " << models[controlIdx] << endl; + cout << " Model p-value rank win tie loss" << endl; + cout << " ============ ============ ========= === === ====" << endl; + // sort ranks from lowest to highest + vector> ranksOrder; + for (const auto& rank : ranks) { + ranksOrder.push_back({ rank.first, rank.second }); + } + sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { + return a.second < b.second; + }); + for (const auto& item : ranksOrder) { + if (item.first == models.at(controlIdx)) { + continue; + } + auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first)); + double pvalue = 0.0; + for (const auto& stat : statsOrder) { + if (stat.first == idx) { + pvalue = stat.second; + } + } + cout << " " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second; + cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl; + } + cout << " *************************************************************************************************************" << endl; + cout << Colors::RESET(); + } + bool Statistics::friedmanTest() + { + if (!fitted) { + fit(); + } + // Friedman test + // Calculate the Friedman statistic + cout << Colors::BLUE() << endl; + cout << "***************************************************************************************************************" << endl; + cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl; + double degreesOfFreedom = nModels - 1.0; + double sumSquared = 0; + for (const auto& rank : ranks) { + sumSquared += pow(rank.second, 2); + } + // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8 + double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4); + cout << "Friedman statistic: " << friedmanQ << endl; + // Calculate the critical value + boost::math::chi_squared chiSquared(degreesOfFreedom); + long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ); + double criticalValue = quantile(chiSquared, 1 - significance); + std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom + << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl; + cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl; + bool result; + if (p_value < significance) { + cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl; + result = true; + } else { + cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl; + result = false; + } + cout << Colors::BLUE() << "***************************************************************************************************************" << endl; + return result; + } +} // namespace platform diff --git a/src/Platform/Statistics.h b/src/Platform/Statistics.h new file mode 100644 index 0000000..92c8a2a --- /dev/null +++ b/src/Platform/Statistics.h @@ -0,0 +1,37 @@ +#ifndef STATISTICS_H +#define STATISTICS_H +#include +#include +#include + +using namespace std; +using json = nlohmann::json; + +namespace platform { + struct WTL { + int win; + int tie; + int loss; + }; + class Statistics { + public: + Statistics(vector& models, vector& datasets, json data, double significance = 0.05); + bool friedmanTest(); + void postHocHolmTest(); + private: + void fit(); + void computeRanks(); + void computeWTL(); + vector models; + vector datasets; + json data; + double significance; + bool fitted = false; + int nModels = 0; + int nDatasets = 0; + int controlIdx = 0; + map wtl; + map ranks; + }; +} +#endif // !STATISTICS_H \ No newline at end of file