From dcba146e12d3ff61e3d324a9251d02666bca35f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 26 Sep 2023 01:04:59 +0200 Subject: [PATCH 01/11] Begin adding Friedman test to BestResults --- CMakeLists.txt | 11 ++++++ src/Platform/BestResults.cc | 68 ++++++++++++++++++++++++++++++++++--- src/Platform/CMakeLists.txt | 1 + 3 files changed, 75 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3481dec..9eafe6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF) option(ENABLE_TESTING "Unit testing build" OFF) option(CODE_COVERAGE "Collect coverage from test library" OFF) + +# Boost Library +set(Boost_USE_STATIC_LIBS OFF) +set(Boost_USE_MULTITHREADED ON) +set(Boost_USE_STATIC_RUNTIME OFF) +find_package(Boost 1.81.0 REQUIRED) +if(Boost_FOUND) + message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}") + include_directories(${Boost_INCLUDE_DIRS}) +endif() + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # CMakes modules # -------------- diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 6d8505e..232bce8 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -6,7 +6,7 @@ #include "BestResults.h" #include "Result.h" #include "Colors.h" - +#include namespace fs = std::filesystem; @@ -228,6 +228,45 @@ namespace platform { } return ranks; } + void friedmanTest(int nModels, int nDatasets, map ranks, double significance = 0.05) + { + // Friedman test + // Calculate the Friedman statistic + double sum = 0.0; + if (nModels < 3 || nDatasets < 3) { + cout << "Can't make the Friedman test with less than 3 models and/or less than 3 datasets." << endl; + return; + } + cout << Colors::BLUE() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << endl; + cout << "N datasets: " << nDatasets << endl; + cout << "N models: " << nModels << endl; + cout << "Significance: " << significance << endl; + cout << "Nº Ranks: " << ranks.size() << endl; + for (const auto& rank : ranks) { + sum += rank.second; + } + double degreesOfFreedom = nModels - 1.0; + double sumSquared = 0; + for (const auto& rank : ranks) { + sumSquared += rank.second * rank.second; + } + cout << "Sum Squared: " << sumSquared << endl; + cout << "Degrees of freedom: " << degreesOfFreedom << endl; + double friedman = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1); + cout << "Friedman statistic: " << friedman << endl; + // Calculate the critical value + boost::math::chi_squared chiSquared(degreesOfFreedom); + long double p_value = (long double)1.0 - cdf(chiSquared, friedman); + double criticalValue = quantile(chiSquared, 1 - significance); + std::cout << "Critical Chi-Square Value for df=" << degreesOfFreedom + << " and alpha=" << significance << ": " << criticalValue << std::endl; + cout << "p-value: " << scientific << p_value << endl; + if (friedman > criticalValue) { + cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl; + } else { + cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl; + } + } void BestResults::printTableResults(set models, json table) { cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get() << endl; @@ -245,6 +284,8 @@ namespace platform { auto i = 0; bool odd = true; map totals; + map ranks; + map ranksTotal; for (const auto& model : models) { totals[model] = 0.0; } @@ -264,7 +305,14 @@ namespace platform { ranksOrder.push_back({ model, value }); } // Assign the ranks - auto ranks = assignRanks(ranksOrder); + ranks = assignRanks(ranksOrder); + if (ranksTotal.size() == 0) { + ranksTotal = ranks; + } else { + for (const auto& rank : ranks) { + ranksTotal[rank.first] += rank.second; + } + } // Print the row with red colors on max values for (const auto& model : models) { string efectiveColor = color; @@ -300,20 +348,30 @@ namespace platform { // Output the averaged ranks cout << endl; int min = 1; - for (const auto& rank : ranks) { + for (const auto& rank : ranksTotal) { if (rank.second < min) { min = rank.second; } } + cout << Colors::BLUE() << setw(30) << " Ranks...................."; + for (const auto& model : models) { + string efectiveColor = Colors::BLUE(); + if (ranksTotal[model] == min) { + efectiveColor = Colors::RED(); + } + cout << efectiveColor << setw(12) << setprecision(4) << fixed << (double)ranksTotal[model] << " "; + } + cout << endl; cout << Colors::GREEN() << setw(30) << " Averaged ranks..........."; for (const auto& model : models) { string efectiveColor = Colors::GREEN(); - if (ranks[model] == min) { + if (ranksTotal[model] == min) { efectiveColor = Colors::RED(); } - cout << efectiveColor << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " "; + cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] / (double)origin.size() << " "; } cout << endl; + friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05); } void BestResults::reportAll() { diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index b40a311..eca6b72 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -15,5 +15,6 @@ if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") target_link_libraries(best stdc++fs) else() target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) + target_link_libraries(best Boost::boost) endif() target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file -- 2.45.2 From f0d0abe8919c548a80c10458e3feaca67f82dcdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 26 Sep 2023 01:07:50 +0200 Subject: [PATCH 02/11] Add boost library link to linux build --- src/Platform/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index eca6b72..db32852 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -12,7 +12,7 @@ add_executable(best best.cc BestResults.cc Result.cc) target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs) - target_link_libraries(best stdc++fs) + target_link_libraries(best Boost::boost stdc++fs) else() target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) target_link_libraries(best Boost::boost) -- 2.45.2 From cab8e14b2d09c6e06f2e03063ec43c49b8b3d4d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 26 Sep 2023 11:26:59 +0200 Subject: [PATCH 03/11] Add friedman hyperparameter --- src/Platform/BestResults.cc | 25 ++++++++++++------------- src/Platform/BestResults.h | 3 ++- src/Platform/best.cc | 10 +++++++++- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 232bce8..94ca802 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -237,11 +237,9 @@ namespace platform { cout << "Can't make the Friedman test with less than 3 models and/or less than 3 datasets." << endl; return; } - cout << Colors::BLUE() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << endl; - cout << "N datasets: " << nDatasets << endl; - cout << "N models: " << nModels << endl; - cout << "Significance: " << significance << endl; - cout << "Nº Ranks: " << ranks.size() << endl; + cout << Colors::BLUE() << endl; + cout << "*************************************************************************************" << endl; + cout << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << endl; for (const auto& rank : ranks) { sum += rank.second; } @@ -250,22 +248,21 @@ namespace platform { for (const auto& rank : ranks) { sumSquared += rank.second * rank.second; } - cout << "Sum Squared: " << sumSquared << endl; - cout << "Degrees of freedom: " << degreesOfFreedom << endl; - double friedman = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1); - cout << "Friedman statistic: " << friedman << endl; + double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1); + cout << "Friedman statistic: " << friedmanQ << endl; // Calculate the critical value boost::math::chi_squared chiSquared(degreesOfFreedom); - long double p_value = (long double)1.0 - cdf(chiSquared, friedman); + long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ); double criticalValue = quantile(chiSquared, 1 - significance); - std::cout << "Critical Chi-Square Value for df=" << degreesOfFreedom + std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom << " and alpha=" << significance << ": " << criticalValue << std::endl; cout << "p-value: " << scientific << p_value << endl; - if (friedman > criticalValue) { + if (friedmanQ > criticalValue) { cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl; } else { cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl; } + cout << Colors::BLUE() << "*************************************************************************************" << endl; } void BestResults::printTableResults(set models, json table) { @@ -371,7 +368,9 @@ namespace platform { cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] / (double)origin.size() << " "; } cout << endl; - friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05); + if (friedman) { + friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05); + } } void BestResults::reportAll() { diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h index 3ba6b9d..5495222 100644 --- a/src/Platform/BestResults.h +++ b/src/Platform/BestResults.h @@ -8,7 +8,7 @@ using json = nlohmann::json; namespace platform { class BestResults { public: - explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {} + explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {} string build(); void reportSingle(); void reportAll(); @@ -23,6 +23,7 @@ namespace platform { string path; string score; string model; + bool friedman; }; } #endif //BESTRESULTS_H \ No newline at end of file diff --git a/src/Platform/best.cc b/src/Platform/best.cc index 6e6d432..dcd8d9b 100644 --- a/src/Platform/best.cc +++ b/src/Platform/best.cc @@ -13,12 +13,14 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied"); program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true); program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true); + program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true); try { program.parse_args(argc, argv); auto model = program.get("model"); auto score = program.get("score"); auto build = program.get("build"); auto report = program.get("report"); + auto friedman = program.get("friedman"); if (model == "" || score == "") { throw runtime_error("Model and score name must be supplied"); } @@ -38,12 +40,18 @@ int main(int argc, char** argv) auto score = program.get("score"); auto build = program.get("build"); auto report = program.get("report"); + auto friedman = program.get("friedman"); + if (friedman && model != "any") { + cerr << "Friedman test can only be used with all models" << endl; + cerr << program; + exit(1); + } if (!report && !build) { cerr << "Either build, report or both, have to be selected to do anything!" << endl; cerr << program; exit(1); } - auto results = platform::BestResults(platform::Paths::results(), score, model); + auto results = platform::BestResults(platform::Paths::results(), score, model, friedman); if (build) { if (model == "any") { results.buildAll(); -- 2.45.2 From ce66483b652a4fd36c7e7d40e5579beb8aa6d036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 26 Sep 2023 14:12:53 +0200 Subject: [PATCH 04/11] Update boost version requirement for Linux --- CMakeLists.txt | 2 +- src/Platform/BestResults.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9eafe6e..eb77a77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,7 +35,7 @@ option(CODE_COVERAGE "Collect coverage from test library" OFF) set(Boost_USE_STATIC_LIBS OFF) set(Boost_USE_MULTITHREADED ON) set(Boost_USE_STATIC_RUNTIME OFF) -find_package(Boost 1.81.0 REQUIRED) +find_package(Boost 1.78.0 REQUIRED) if(Boost_FOUND) message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}") include_directories(${Boost_INCLUDE_DIRS}) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 94ca802..673378c 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -239,7 +239,7 @@ namespace platform { } cout << Colors::BLUE() << endl; cout << "*************************************************************************************" << endl; - cout << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << endl; + cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl; for (const auto& rank : ranks) { sum += rank.second; } @@ -255,7 +255,7 @@ namespace platform { long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ); double criticalValue = quantile(chiSquared, 1 - significance); std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom - << " and alpha=" << significance << ": " << criticalValue << std::endl; + << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl; cout << "p-value: " << scientific << p_value << endl; if (friedmanQ > criticalValue) { cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl; -- 2.45.2 From 11320e2cc74d274b21e9b39fcd9f33ed6fcf4b4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Wed, 27 Sep 2023 12:36:03 +0200 Subject: [PATCH 05/11] Complete friedman test as in exreport --- src/Platform/BestResults.cc | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 673378c..b153085 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -245,10 +245,18 @@ namespace platform { } double degreesOfFreedom = nModels - 1.0; double sumSquared = 0; + // For original Friedman test + // for (const auto& rank : ranks) { + // sumSquared += rank.second * rank.second; + // } for (const auto& rank : ranks) { - sumSquared += rank.second * rank.second; + sumSquared += pow(rank.second / nDatasets, 2); } - double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1); + cout << "Sum of ranks: " << sum << endl; + cout << "Sum of squared ranks: " << sumSquared << endl; + // (original) double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1); + // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8 + double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4); cout << "Friedman statistic: " << friedmanQ << endl; // Calculate the critical value boost::math::chi_squared chiSquared(degreesOfFreedom); @@ -257,7 +265,8 @@ namespace platform { std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl; cout << "p-value: " << scientific << p_value << endl; - if (friedmanQ > criticalValue) { + //if (friedmanQ > criticalValue) { (original) + if (p_value < significance) { cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl; } else { cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl; @@ -318,7 +327,8 @@ namespace platform { efectiveColor = Colors::RED(); } totals[model] += value; - cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " "; + // cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " "; + cout << efectiveColor << setw(12) << setprecision(10) << fixed << ranks[model] << " "; } cout << endl; odd = !odd; -- 2.45.2 From 5043c12be86183918aa5f6bfe9990458e9d46190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Wed, 27 Sep 2023 18:34:16 +0200 Subject: [PATCH 06/11] Complete posthoc with Holm adjust --- src/Platform/BestResults.cc | 139 ++++++++++++++++++++++++++++++------ 1 file changed, 118 insertions(+), 21 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index b153085..80f5136 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -7,6 +7,8 @@ #include "Result.h" #include "Colors.h" #include +#include + namespace fs = std::filesystem; @@ -24,6 +26,11 @@ std::string ftime_to_string(TP tp) buffer << std::put_time(gmt, "%Y-%m-%d %H:%M"); return buffer.str(); } +struct WTL { + int win; + int tie; + int loss; +}; namespace platform { @@ -228,33 +235,110 @@ namespace platform { } return ranks; } - void friedmanTest(int nModels, int nDatasets, map ranks, double significance = 0.05) + + map computeWTL(int controlIdx, vector models, json table) + { + // Compute the WTL matrix + map wtl; + int nModels = models.size(); + for (int i = 0; i < nModels; ++i) { + wtl[i] = { 0, 0, 0 }; + } + json origin = table.begin().value(); + for (auto const& item : origin.items()) { + auto controlModel = models.at(controlIdx); + double controlValue = table[controlModel].at(item.key()).at(0).get(); + for (int i = 0; i < nModels; ++i) { + if (i == controlIdx) { + continue; + } + double value = table[models[i]].at(item.key()).at(0).get(); + if (value < controlValue) { + wtl[i].win++; + } else if (value == controlValue) { + wtl[i].tie++; + } else { + wtl[i].loss++; + } + } + } + return wtl; + } + + void postHocHolm(int controlIdx, vector models, int nDatasets, map ranks, double significance, map wtl) + { + // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8 + // Post-hoc Holm test + // Calculate the p-value for the models paired with the control model + int nModels = models.size(); + map stats; // p-value of each model paired with the control model + boost::math::normal dist(0.0, 1.0); + double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets)); + for (int i = 0; i < nModels; i++) { + if (i == controlIdx) { + stats[i] = 0.0; + continue; + } + double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff; + double p_value = (long double)2 * (1 - cdf(dist, z)); + stats[i] = p_value; + } + // Sort the models by p-value + vector> statsOrder; + for (const auto& stat : stats) { + statsOrder.push_back({ stat.first, stat.second }); + } + sort(statsOrder.begin(), statsOrder.end(), [](const pair& a, const pair& b) { + return a.second < b.second; + }); + + // Holm adjustment + for (int i = 0; i < statsOrder.size(); ++i) { + auto item = statsOrder.at(i); + double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second; + double p_value = min((double)1.0, item.second * (nModels - i)); + p_value = max(before, p_value); + statsOrder[i] = { item.first, p_value }; + } + cout << Colors::CYAN(); + cout << " *************************************************************************************************************" << endl; + cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl; + cout << " Control model: " << models[controlIdx] << endl; + cout << " Model p-value rank win tie loss" << endl; + cout << " ============ ============ ========= === === ====" << endl; + for (const auto& item : ranks) { + if (item.first == models.at(controlIdx)) { + continue; + } + auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first)); + double pvalue = 0.0; + for (const auto& stat : statsOrder) { + if (stat.first == idx) { + pvalue = stat.second; + } + } + cout << " " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second; + cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl; + } + cout << " *************************************************************************************************************" << endl; + cout << Colors::RESET(); + } + bool friedmanTest(vector models, int nDatasets, map ranks, double significance = 0.05) { // Friedman test // Calculate the Friedman statistic - double sum = 0.0; + int nModels = models.size(); if (nModels < 3 || nDatasets < 3) { - cout << "Can't make the Friedman test with less than 3 models and/or less than 3 datasets." << endl; - return; + throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); } cout << Colors::BLUE() << endl; - cout << "*************************************************************************************" << endl; + cout << "***************************************************************************************************************" << endl; cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl; - for (const auto& rank : ranks) { - sum += rank.second; - } double degreesOfFreedom = nModels - 1.0; double sumSquared = 0; - // For original Friedman test - // for (const auto& rank : ranks) { - // sumSquared += rank.second * rank.second; - // } for (const auto& rank : ranks) { - sumSquared += pow(rank.second / nDatasets, 2); + sumSquared += pow(rank.second, 2); } - cout << "Sum of ranks: " << sum << endl; - cout << "Sum of squared ranks: " << sumSquared << endl; - // (original) double friedmanQ = 12.0 / (nModels * nDatasets * (nModels + 1)) * sumSquared - 3 * nDatasets * (nModels + 1); // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8 double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4); cout << "Friedman statistic: " << friedmanQ << endl; @@ -264,14 +348,18 @@ namespace platform { double criticalValue = quantile(chiSquared, 1 - significance); std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl; - cout << "p-value: " << scientific << p_value << endl; + cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl; //if (friedmanQ > criticalValue) { (original) + bool result; if (p_value < significance) { cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl; + result = true; } else { cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl; + result = false; } - cout << Colors::BLUE() << "*************************************************************************************" << endl; + cout << Colors::BLUE() << "***************************************************************************************************************" << endl; + return result; } void BestResults::printTableResults(set models, json table) { @@ -292,6 +380,7 @@ namespace platform { map totals; map ranks; map ranksTotal; + int nDatasets = table.begin().value().size(); for (const auto& model : models) { totals[model] = 0.0; } @@ -355,10 +444,11 @@ namespace platform { // Output the averaged ranks cout << endl; int min = 1; - for (const auto& rank : ranksTotal) { + for (auto& rank : ranksTotal) { if (rank.second < min) { min = rank.second; } + rank.second /= nDatasets; } cout << Colors::BLUE() << setw(30) << " Ranks...................."; for (const auto& model : models) { @@ -375,11 +465,18 @@ namespace platform { if (ranksTotal[model] == min) { efectiveColor = Colors::RED(); } - cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] / (double)origin.size() << " "; + cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] << " "; } cout << endl; if (friedman) { - friedmanTest(models.size(), table.begin().value().size(), ranksTotal, 0.05); + double significance = 0.05; + vector vModels(models.begin(), models.end()); + if (friedmanTest(vModels, nDatasets, ranksTotal, significance)) { + // Stablish the control model as the one with the lowest averaged rank + int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); + auto wtl = computeWTL(controlIdx, vModels, table); + postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl); + } } } void BestResults::reportAll() -- 2.45.2 From 00c6cf663be8b7130448c23f00ed11a2354e8d13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Wed, 27 Sep 2023 19:11:47 +0200 Subject: [PATCH 07/11] Fix order of output in posthoc --- src/Platform/BestResults.cc | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 80f5136..1ed6de6 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -306,7 +306,15 @@ namespace platform { cout << " Control model: " << models[controlIdx] << endl; cout << " Model p-value rank win tie loss" << endl; cout << " ============ ============ ========= === === ====" << endl; - for (const auto& item : ranks) { + // sort ranks from lowest to highest + vector> ranksOrder; + for (const auto& rank : ranks) { + ranksOrder.push_back({ rank.first, rank.second }); + } + sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { + return a.second < b.second; + }); + for (const auto& item : ranksOrder) { if (item.first == models.at(controlIdx)) { continue; } @@ -349,13 +357,12 @@ namespace platform { std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl; cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl; - //if (friedmanQ > criticalValue) { (original) bool result; if (p_value < significance) { - cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl; + cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl; result = true; } else { - cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl; + cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl; result = false; } cout << Colors::BLUE() << "***************************************************************************************************************" << endl; @@ -416,8 +423,8 @@ namespace platform { efectiveColor = Colors::RED(); } totals[model] += value; - // cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " "; - cout << efectiveColor << setw(12) << setprecision(10) << fixed << ranks[model] << " "; + cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " "; + // cout << efectiveColor << setw(12) << setprecision(10) << fixed << ranks[model] << " "; } cout << endl; odd = !odd; @@ -471,12 +478,11 @@ namespace platform { if (friedman) { double significance = 0.05; vector vModels(models.begin(), models.end()); - if (friedmanTest(vModels, nDatasets, ranksTotal, significance)) { - // Stablish the control model as the one with the lowest averaged rank - int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); - auto wtl = computeWTL(controlIdx, vModels, table); - postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl); - } + friedmanTest(vModels, nDatasets, ranksTotal, significance); + // Stablish the control model as the one with the lowest averaged rank + int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); + auto wtl = computeWTL(controlIdx, vModels, table); + postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl); } } void BestResults::reportAll() -- 2.45.2 From ac89a451e3fcddba681ef115565f51f5b67e0d35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Thu, 28 Sep 2023 00:45:15 +0200 Subject: [PATCH 08/11] Duplicate statistics tests in class --- src/Platform/BestResults.cc | 13 ++- src/Platform/CMakeLists.txt | 2 +- src/Platform/Statistics.cc | 209 ++++++++++++++++++++++++++++++++++++ src/Platform/Statistics.h | 37 +++++++ 4 files changed, 258 insertions(+), 3 deletions(-) create mode 100644 src/Platform/Statistics.cc create mode 100644 src/Platform/Statistics.h diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 1ed6de6..7aec335 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -6,6 +6,7 @@ #include "BestResults.h" #include "Result.h" #include "Colors.h" +#include "Statistics.h" #include #include @@ -475,15 +476,23 @@ namespace platform { cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] << " "; } cout << endl; + vector vModels(models.begin(), models.end()); + vector datasets; + for (const auto& dataset : table.begin().value().items()) { + datasets.push_back(dataset.key()); + } + double significance = 0.05; if (friedman) { - double significance = 0.05; - vector vModels(models.begin(), models.end()); friedmanTest(vModels, nDatasets, ranksTotal, significance); // Stablish the control model as the one with the lowest averaged rank int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); auto wtl = computeWTL(controlIdx, vModels, table); postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl); } + + Statistics stats(vModels, datasets, table, significance); + stats.friedmanTest(); + stats.postHocHolmTest(); } void BestResults::reportAll() { diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index db32852..edf014a 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -8,7 +8,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc) add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc) add_executable(list list.cc platformUtils Datasets.cc) -add_executable(best best.cc BestResults.cc Result.cc) +add_executable(best best.cc BestResults.cc Result.cc Statistics.cc) target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs) diff --git a/src/Platform/Statistics.cc b/src/Platform/Statistics.cc new file mode 100644 index 0000000..33b2f57 --- /dev/null +++ b/src/Platform/Statistics.cc @@ -0,0 +1,209 @@ +#include "Statistics.h" +#include "Colors.h" +#include +#include + +namespace platform { + + Statistics::Statistics(vector& models, vector& datasets, json data, double significance) : models(models), datasets(datasets), data(data), significance(significance) + { + nModels = models.size(); + nDatasets = datasets.size(); + }; + + void Statistics::fit() + { + if (nModels < 3 || nDatasets < 3) { + cerr << "nModels: " << nModels << endl; + cerr << "nDatasets: " << nDatasets << endl; + throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); + } + computeRanks(); + // Set the control model as the one with the lowest average rank + controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); + computeWTL(); + fitted = true; + } + map assignRanks2(vector>& ranksOrder) + { + // sort the ranksOrder vector by value + sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { + return a.second > b.second; + }); + //Assign ranks to values and if they are the same they share the same averaged rank + map ranks; + for (int i = 0; i < ranksOrder.size(); i++) { + ranks[ranksOrder[i].first] = i + 1.0; + } + int i = 0; + while (i < static_cast(ranksOrder.size())) { + int j = i + 1; + int sumRanks = ranks[ranksOrder[i].first]; + while (j < static_cast(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) { + sumRanks += ranks[ranksOrder[j++].first]; + } + if (j > i + 1) { + float averageRank = (float)sumRanks / (j - i); + for (int k = i; k < j; k++) { + ranks[ranksOrder[k].first] = averageRank; + } + } + i = j; + } + return ranks; + } + void Statistics::computeRanks() + { + map ranksLine; + for (const auto& dataset : datasets) { + vector> ranksOrder; + for (const auto& model : models) { + double value = data[model].at(dataset).at(0).get(); + ranksOrder.push_back({ model, value }); + } + // Assign the ranks + ranksLine = assignRanks2(ranksOrder); + if (ranks.size() == 0) { + ranks = ranksLine; + } else { + for (const auto& rank : ranksLine) { + ranks[rank.first] += rank.second; + } + } + } + // Average the ranks + for (const auto& rank : ranks) { + ranks[rank.first] /= nDatasets; + } + } + void Statistics::computeWTL() + { + // Compute the WTL matrix + for (int i = 0; i < nModels; ++i) { + wtl[i] = { 0, 0, 0 }; + } + json origin = data.begin().value(); + for (auto const& item : origin.items()) { + auto controlModel = models.at(controlIdx); + double controlValue = data[controlModel].at(item.key()).at(0).get(); + for (int i = 0; i < nModels; ++i) { + if (i == controlIdx) { + continue; + } + double value = data[models[i]].at(item.key()).at(0).get(); + if (value < controlValue) { + wtl[i].win++; + } else if (value == controlValue) { + wtl[i].tie++; + } else { + wtl[i].loss++; + } + } + } + } + + void Statistics::postHocHolmTest() + { + if (!fitted) { + fit(); + } + // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8 + // Post-hoc Holm test + // Calculate the p-value for the models paired with the control model + map stats; // p-value of each model paired with the control model + boost::math::normal dist(0.0, 1.0); + double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets)); + for (int i = 0; i < nModels; i++) { + if (i == controlIdx) { + stats[i] = 0.0; + continue; + } + double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff; + double p_value = (long double)2 * (1 - cdf(dist, z)); + stats[i] = p_value; + } + // Sort the models by p-value + vector> statsOrder; + for (const auto& stat : stats) { + statsOrder.push_back({ stat.first, stat.second }); + } + sort(statsOrder.begin(), statsOrder.end(), [](const pair& a, const pair& b) { + return a.second < b.second; + }); + + // Holm adjustment + for (int i = 0; i < statsOrder.size(); ++i) { + auto item = statsOrder.at(i); + double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second; + double p_value = min((double)1.0, item.second * (nModels - i)); + p_value = max(before, p_value); + statsOrder[i] = { item.first, p_value }; + } + cout << Colors::MAGENTA(); + cout << " *************************************************************************************************************" << endl; + cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl; + cout << " Control model: " << models[controlIdx] << endl; + cout << " Model p-value rank win tie loss" << endl; + cout << " ============ ============ ========= === === ====" << endl; + // sort ranks from lowest to highest + vector> ranksOrder; + for (const auto& rank : ranks) { + ranksOrder.push_back({ rank.first, rank.second }); + } + sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { + return a.second < b.second; + }); + for (const auto& item : ranksOrder) { + if (item.first == models.at(controlIdx)) { + continue; + } + auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first)); + double pvalue = 0.0; + for (const auto& stat : statsOrder) { + if (stat.first == idx) { + pvalue = stat.second; + } + } + cout << " " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second; + cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl; + } + cout << " *************************************************************************************************************" << endl; + cout << Colors::RESET(); + } + bool Statistics::friedmanTest() + { + if (!fitted) { + fit(); + } + // Friedman test + // Calculate the Friedman statistic + cout << Colors::BLUE() << endl; + cout << "***************************************************************************************************************" << endl; + cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl; + double degreesOfFreedom = nModels - 1.0; + double sumSquared = 0; + for (const auto& rank : ranks) { + sumSquared += pow(rank.second, 2); + } + // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8 + double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4); + cout << "Friedman statistic: " << friedmanQ << endl; + // Calculate the critical value + boost::math::chi_squared chiSquared(degreesOfFreedom); + long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ); + double criticalValue = quantile(chiSquared, 1 - significance); + std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom + << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl; + cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl; + bool result; + if (p_value < significance) { + cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl; + result = true; + } else { + cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl; + result = false; + } + cout << Colors::BLUE() << "***************************************************************************************************************" << endl; + return result; + } +} // namespace platform diff --git a/src/Platform/Statistics.h b/src/Platform/Statistics.h new file mode 100644 index 0000000..92c8a2a --- /dev/null +++ b/src/Platform/Statistics.h @@ -0,0 +1,37 @@ +#ifndef STATISTICS_H +#define STATISTICS_H +#include +#include +#include + +using namespace std; +using json = nlohmann::json; + +namespace platform { + struct WTL { + int win; + int tie; + int loss; + }; + class Statistics { + public: + Statistics(vector& models, vector& datasets, json data, double significance = 0.05); + bool friedmanTest(); + void postHocHolmTest(); + private: + void fit(); + void computeRanks(); + void computeWTL(); + vector models; + vector datasets; + json data; + double significance; + bool fitted = false; + int nModels = 0; + int nDatasets = 0; + int controlIdx = 0; + map wtl; + map ranks; + }; +} +#endif // !STATISTICS_H \ No newline at end of file -- 2.45.2 From 3b0653432714e8fbe9edfb076847101c8630bbf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Thu, 28 Sep 2023 00:59:34 +0200 Subject: [PATCH 09/11] Remove duplicated code in BestResults --- src/Platform/BestResults.cc | 246 +++--------------------------------- src/Platform/BestResults.h | 6 +- src/Platform/Statistics.cc | 7 +- src/Platform/Statistics.h | 2 +- 4 files changed, 25 insertions(+), 236 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 7aec335..5c41d27 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -7,8 +7,6 @@ #include "Result.h" #include "Colors.h" #include "Statistics.h" -#include -#include @@ -27,12 +25,6 @@ std::string ftime_to_string(TP tp) buffer << std::put_time(gmt, "%Y-%m-%d %H:%M"); return buffer.str(); } -struct WTL { - int win; - int tie; - int loss; -}; - namespace platform { string BestResults::build() @@ -114,9 +106,10 @@ namespace platform { } throw invalid_argument("Unable to open result file. [" + fileName + "]"); } - set BestResults::getModels() + vector BestResults::getModels() { set models; + vector result; auto files = loadResultFiles(); if (files.size() == 0) { cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl; @@ -129,7 +122,8 @@ namespace platform { // add the model to the vector of models models.insert(fileModel); } - return models; + result = vector(models.begin(), models.end()); + return result; } void BestResults::buildAll() @@ -171,7 +165,7 @@ namespace platform { odd = !odd; } } - json BestResults::buildTableResults(set models) + json BestResults::buildTableResults(vector models) { int numberOfDatasets = 0; bool first = true; @@ -208,168 +202,8 @@ namespace platform { table["dateTable"] = ftime_to_string(maxDate); return table; } - map assignRanks(vector>& ranksOrder) - { - // sort the ranksOrder vector by value - sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { - return a.second > b.second; - }); - //Assign ranks to values and if they are the same they share the same averaged rank - map ranks; - for (int i = 0; i < ranksOrder.size(); i++) { - ranks[ranksOrder[i].first] = i + 1.0; - } - int i = 0; - while (i < static_cast(ranksOrder.size())) { - int j = i + 1; - int sumRanks = ranks[ranksOrder[i].first]; - while (j < static_cast(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) { - sumRanks += ranks[ranksOrder[j++].first]; - } - if (j > i + 1) { - float averageRank = (float)sumRanks / (j - i); - for (int k = i; k < j; k++) { - ranks[ranksOrder[k].first] = averageRank; - } - } - i = j; - } - return ranks; - } - map computeWTL(int controlIdx, vector models, json table) - { - // Compute the WTL matrix - map wtl; - int nModels = models.size(); - for (int i = 0; i < nModels; ++i) { - wtl[i] = { 0, 0, 0 }; - } - json origin = table.begin().value(); - for (auto const& item : origin.items()) { - auto controlModel = models.at(controlIdx); - double controlValue = table[controlModel].at(item.key()).at(0).get(); - for (int i = 0; i < nModels; ++i) { - if (i == controlIdx) { - continue; - } - double value = table[models[i]].at(item.key()).at(0).get(); - if (value < controlValue) { - wtl[i].win++; - } else if (value == controlValue) { - wtl[i].tie++; - } else { - wtl[i].loss++; - } - } - } - return wtl; - } - - void postHocHolm(int controlIdx, vector models, int nDatasets, map ranks, double significance, map wtl) - { - // Reference https://link.springer.com/article/10.1007/s44196-022-00083-8 - // Post-hoc Holm test - // Calculate the p-value for the models paired with the control model - int nModels = models.size(); - map stats; // p-value of each model paired with the control model - boost::math::normal dist(0.0, 1.0); - double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets)); - for (int i = 0; i < nModels; i++) { - if (i == controlIdx) { - stats[i] = 0.0; - continue; - } - double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff; - double p_value = (long double)2 * (1 - cdf(dist, z)); - stats[i] = p_value; - } - // Sort the models by p-value - vector> statsOrder; - for (const auto& stat : stats) { - statsOrder.push_back({ stat.first, stat.second }); - } - sort(statsOrder.begin(), statsOrder.end(), [](const pair& a, const pair& b) { - return a.second < b.second; - }); - - // Holm adjustment - for (int i = 0; i < statsOrder.size(); ++i) { - auto item = statsOrder.at(i); - double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second; - double p_value = min((double)1.0, item.second * (nModels - i)); - p_value = max(before, p_value); - statsOrder[i] = { item.first, p_value }; - } - cout << Colors::CYAN(); - cout << " *************************************************************************************************************" << endl; - cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl; - cout << " Control model: " << models[controlIdx] << endl; - cout << " Model p-value rank win tie loss" << endl; - cout << " ============ ============ ========= === === ====" << endl; - // sort ranks from lowest to highest - vector> ranksOrder; - for (const auto& rank : ranks) { - ranksOrder.push_back({ rank.first, rank.second }); - } - sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { - return a.second < b.second; - }); - for (const auto& item : ranksOrder) { - if (item.first == models.at(controlIdx)) { - continue; - } - auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first)); - double pvalue = 0.0; - for (const auto& stat : statsOrder) { - if (stat.first == idx) { - pvalue = stat.second; - } - } - cout << " " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second; - cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl; - } - cout << " *************************************************************************************************************" << endl; - cout << Colors::RESET(); - } - bool friedmanTest(vector models, int nDatasets, map ranks, double significance = 0.05) - { - // Friedman test - // Calculate the Friedman statistic - int nModels = models.size(); - if (nModels < 3 || nDatasets < 3) { - throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets."); - } - cout << Colors::BLUE() << endl; - cout << "***************************************************************************************************************" << endl; - cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl; - double degreesOfFreedom = nModels - 1.0; - double sumSquared = 0; - for (const auto& rank : ranks) { - sumSquared += pow(rank.second, 2); - } - // Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8 - double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4); - cout << "Friedman statistic: " << friedmanQ << endl; - // Calculate the critical value - boost::math::chi_squared chiSquared(degreesOfFreedom); - long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ); - double criticalValue = quantile(chiSquared, 1 - significance); - std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom - << " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl; - cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl; - bool result; - if (p_value < significance) { - cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl; - result = true; - } else { - cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl; - result = false; - } - cout << Colors::BLUE() << "***************************************************************************************************************" << endl; - return result; - } - void BestResults::printTableResults(set models, json table) + void BestResults::printTableResults(vector models, json table) { cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get() << endl; cout << "------------------------------------------------" << endl; @@ -386,8 +220,6 @@ namespace platform { auto i = 0; bool odd = true; map totals; - map ranks; - map ranksTotal; int nDatasets = table.begin().value().size(); for (const auto& model : models) { totals[model] = 0.0; @@ -398,23 +230,12 @@ namespace platform { cout << color << setw(3) << fixed << right << i++ << " "; cout << setw(25) << left << item.key() << " "; double maxValue = 0; - vector> ranksOrder; // Find out the max value for this dataset for (const auto& model : models) { double value = table[model].at(item.key()).at(0).get(); if (value > maxValue) { maxValue = value; } - ranksOrder.push_back({ model, value }); - } - // Assign the ranks - ranks = assignRanks(ranksOrder); - if (ranksTotal.size() == 0) { - ranksTotal = ranks; - } else { - for (const auto& rank : ranks) { - ranksTotal[rank.first] += rank.second; - } } // Print the row with red colors on max values for (const auto& model : models) { @@ -425,7 +246,6 @@ namespace platform { } totals[model] += value; cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " "; - // cout << efectiveColor << setw(12) << setprecision(10) << fixed << ranks[model] << " "; } cout << endl; odd = !odd; @@ -449,50 +269,7 @@ namespace platform { } cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " "; } - // Output the averaged ranks cout << endl; - int min = 1; - for (auto& rank : ranksTotal) { - if (rank.second < min) { - min = rank.second; - } - rank.second /= nDatasets; - } - cout << Colors::BLUE() << setw(30) << " Ranks...................."; - for (const auto& model : models) { - string efectiveColor = Colors::BLUE(); - if (ranksTotal[model] == min) { - efectiveColor = Colors::RED(); - } - cout << efectiveColor << setw(12) << setprecision(4) << fixed << (double)ranksTotal[model] << " "; - } - cout << endl; - cout << Colors::GREEN() << setw(30) << " Averaged ranks..........."; - for (const auto& model : models) { - string efectiveColor = Colors::GREEN(); - if (ranksTotal[model] == min) { - efectiveColor = Colors::RED(); - } - cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] << " "; - } - cout << endl; - vector vModels(models.begin(), models.end()); - vector datasets; - for (const auto& dataset : table.begin().value().items()) { - datasets.push_back(dataset.key()); - } - double significance = 0.05; - if (friedman) { - friedmanTest(vModels, nDatasets, ranksTotal, significance); - // Stablish the control model as the one with the lowest averaged rank - int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; })); - auto wtl = computeWTL(controlIdx, vModels, table); - postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl); - } - - Statistics stats(vModels, datasets, table, significance); - stats.friedmanTest(); - stats.postHocHolmTest(); } void BestResults::reportAll() { @@ -501,5 +278,16 @@ namespace platform { json table = buildTableResults(models); // Print the table of results printTableResults(models, table); + // Compute the Friedman test + if (friedman) { + vector datasets; + for (const auto& dataset : table.begin().value().items()) { + datasets.push_back(dataset.key()); + } + double significance = 0.05; + Statistics stats(models, datasets, table, significance); + auto result = stats.friedmanTest(); + stats.postHocHolmTest(result); + } } } \ No newline at end of file diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h index 5495222..8ad0f8f 100644 --- a/src/Platform/BestResults.h +++ b/src/Platform/BestResults.h @@ -14,10 +14,10 @@ namespace platform { void reportAll(); void buildAll(); private: - set getModels(); + vector getModels(); vector loadResultFiles(); - json buildTableResults(set models); - void printTableResults(set models, json table); + json buildTableResults(vector models); + void printTableResults(vector models, json table); string bestResultFile(); json loadFile(const string& fileName); string path; diff --git a/src/Platform/Statistics.cc b/src/Platform/Statistics.cc index 33b2f57..f4d72f2 100644 --- a/src/Platform/Statistics.cc +++ b/src/Platform/Statistics.cc @@ -102,7 +102,7 @@ namespace platform { } } - void Statistics::postHocHolmTest() + void Statistics::postHocHolmTest(bool friedmanResult) { if (!fitted) { fit(); @@ -139,7 +139,8 @@ namespace platform { p_value = max(before, p_value); statsOrder[i] = { item.first, p_value }; } - cout << Colors::MAGENTA(); + auto color = friedmanResult ? Colors::GREEN() : Colors::YELLOW(); + cout << color; cout << " *************************************************************************************************************" << endl; cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl; cout << " Control model: " << models[controlIdx] << endl; @@ -203,7 +204,7 @@ namespace platform { cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl; result = false; } - cout << Colors::BLUE() << "***************************************************************************************************************" << endl; + cout << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << endl; return result; } } // namespace platform diff --git a/src/Platform/Statistics.h b/src/Platform/Statistics.h index 92c8a2a..bae91fa 100644 --- a/src/Platform/Statistics.h +++ b/src/Platform/Statistics.h @@ -17,7 +17,7 @@ namespace platform { public: Statistics(vector& models, vector& datasets, json data, double significance = 0.05); bool friedmanTest(); - void postHocHolmTest(); + void postHocHolmTest(bool friedmanResult); private: void fit(); void computeRanks(); -- 2.45.2 From 71704e3547f5dfbf8e3918d801338a9afa5f6128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Thu, 28 Sep 2023 01:27:18 +0200 Subject: [PATCH 10/11] Enhance output info in Statistics --- src/Platform/BestResults.cc | 1 - src/Platform/ReportBase.h | 13 ++----------- src/Platform/Statistics.cc | 21 +++++++++++++-------- src/Platform/Symbols.h | 18 ++++++++++++++++++ 4 files changed, 33 insertions(+), 20 deletions(-) create mode 100644 src/Platform/Symbols.h diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 5c41d27..cc6f60d 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -2,7 +2,6 @@ #include #include #include -#include #include "BestResults.h" #include "Result.h" #include "Colors.h" diff --git a/src/Platform/ReportBase.h b/src/Platform/ReportBase.h index c8400cf..5797b1b 100644 --- a/src/Platform/ReportBase.h +++ b/src/Platform/ReportBase.h @@ -3,22 +3,13 @@ #include #include #include "Paths.h" +#include "Symbols.h" #include using json = nlohmann::json; namespace platform { using namespace std; - class Symbols { - public: - inline static const string check_mark{ "\u2714" }; - inline static const string exclamation{ "\u2757" }; - inline static const string black_star{ "\u2605" }; - inline static const string cross{ "\u2717" }; - inline static const string upward_arrow{ "\u27B6" }; - inline static const string down_arrow{ "\u27B4" }; - inline static const string equal_best{ check_mark }; - inline static const string better_best{ black_star }; - }; + class ReportBase { public: explicit ReportBase(json data_, bool compare); diff --git a/src/Platform/Statistics.cc b/src/Platform/Statistics.cc index f4d72f2..9a4a34f 100644 --- a/src/Platform/Statistics.cc +++ b/src/Platform/Statistics.cc @@ -1,5 +1,6 @@ #include "Statistics.h" #include "Colors.h" +#include "Symbols.h" #include #include @@ -24,7 +25,7 @@ namespace platform { computeWTL(); fitted = true; } - map assignRanks2(vector>& ranksOrder) + map assignRanks(vector>& ranksOrder) { // sort the ranksOrder vector by value sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { @@ -62,7 +63,7 @@ namespace platform { ranksOrder.push_back({ model, value }); } // Assign the ranks - ranksLine = assignRanks2(ranksOrder); + ranksLine = assignRanks(ranksOrder); if (ranks.size() == 0) { ranks = ranksLine; } else { @@ -139,13 +140,13 @@ namespace platform { p_value = max(before, p_value); statsOrder[i] = { item.first, p_value }; } - auto color = friedmanResult ? Colors::GREEN() : Colors::YELLOW(); + auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW(); cout << color; cout << " *************************************************************************************************************" << endl; cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl; cout << " Control model: " << models[controlIdx] << endl; - cout << " Model p-value rank win tie loss" << endl; - cout << " ============ ============ ========= === === ====" << endl; + cout << " Model p-value rank win tie loss Status" << endl; + cout << " ============ ============ ========= === === ==== =============" << endl; // sort ranks from lowest to highest vector> ranksOrder; for (const auto& rank : ranks) { @@ -165,10 +166,14 @@ namespace platform { pvalue = stat.second; } } - cout << " " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second; - cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl; + auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA(); + auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross; + auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0"; + cout << " " << colorStatus << left << setw(12) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second; + cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss; + cout << " " << status << textStatus << endl; } - cout << " *************************************************************************************************************" << endl; + cout << color << " *************************************************************************************************************" << endl; cout << Colors::RESET(); } bool Statistics::friedmanTest() diff --git a/src/Platform/Symbols.h b/src/Platform/Symbols.h new file mode 100644 index 0000000..a9fa1e7 --- /dev/null +++ b/src/Platform/Symbols.h @@ -0,0 +1,18 @@ +#ifndef SYMBOLS_H +#define SYMBOLS_H +#include +using namespace std; +namespace platform { + class Symbols { + public: + inline static const string check_mark{ "\u2714" }; + inline static const string exclamation{ "\u2757" }; + inline static const string black_star{ "\u2605" }; + inline static const string cross{ "\u2717" }; + inline static const string upward_arrow{ "\u27B6" }; + inline static const string down_arrow{ "\u27B4" }; + inline static const string equal_best{ check_mark }; + inline static const string better_best{ black_star }; + }; +} +#endif // !SYMBOLS_H \ No newline at end of file -- 2.45.2 From 926de2bebd116f4cbc634a8eaaf95e2277c9f4b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Thu, 28 Sep 2023 09:44:33 +0200 Subject: [PATCH 11/11] Add boost info to README --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 266bb8a..d9849bf 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,14 @@ Bayesian Network Classifier with libtorch from scratch ## 0. Setup -### libxlswriter - Before compiling BayesNet. +### boost library + +[Getting Started]() + +### libxlswriter + ```bash cd lib/libxlsxwriter make -- 2.45.2