Compare commits
8 Commits
bestResult
...
5043c12be8
Author | SHA1 | Date | |
---|---|---|---|
5043c12be8
|
|||
11320e2cc7
|
|||
ce66483b65
|
|||
cab8e14b2d
|
|||
f0d0abe891
|
|||
dcba146e12
|
|||
3ea0285119
|
|||
e3888e1503 |
@@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
|||||||
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
|
||||||
option(ENABLE_TESTING "Unit testing build" OFF)
|
option(ENABLE_TESTING "Unit testing build" OFF)
|
||||||
option(CODE_COVERAGE "Collect coverage from test library" OFF)
|
option(CODE_COVERAGE "Collect coverage from test library" OFF)
|
||||||
|
|
||||||
|
# Boost Library
|
||||||
|
set(Boost_USE_STATIC_LIBS OFF)
|
||||||
|
set(Boost_USE_MULTITHREADED ON)
|
||||||
|
set(Boost_USE_STATIC_RUNTIME OFF)
|
||||||
|
find_package(Boost 1.78.0 REQUIRED)
|
||||||
|
if(Boost_FOUND)
|
||||||
|
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
|
||||||
|
include_directories(${Boost_INCLUDE_DIRS})
|
||||||
|
endif()
|
||||||
|
|
||||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||||
# CMakes modules
|
# CMakes modules
|
||||||
# --------------
|
# --------------
|
||||||
|
@@ -6,6 +6,8 @@
|
|||||||
#include "BestResults.h"
|
#include "BestResults.h"
|
||||||
#include "Result.h"
|
#include "Result.h"
|
||||||
#include "Colors.h"
|
#include "Colors.h"
|
||||||
|
#include <boost/math/distributions/chi_squared.hpp>
|
||||||
|
#include <boost/math/distributions/normal.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -24,6 +26,11 @@ std::string ftime_to_string(TP tp)
|
|||||||
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
|
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
|
||||||
return buffer.str();
|
return buffer.str();
|
||||||
}
|
}
|
||||||
|
struct WTL {
|
||||||
|
int win;
|
||||||
|
int tie;
|
||||||
|
int loss;
|
||||||
|
};
|
||||||
|
|
||||||
namespace platform {
|
namespace platform {
|
||||||
|
|
||||||
@@ -200,6 +207,160 @@ namespace platform {
|
|||||||
table["dateTable"] = ftime_to_string(maxDate);
|
table["dateTable"] = ftime_to_string(maxDate);
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
|
||||||
|
{
|
||||||
|
// sort the ranksOrder vector by value
|
||||||
|
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
|
||||||
|
return a.second > b.second;
|
||||||
|
});
|
||||||
|
//Assign ranks to values and if they are the same they share the same averaged rank
|
||||||
|
map<string, float> ranks;
|
||||||
|
for (int i = 0; i < ranksOrder.size(); i++) {
|
||||||
|
ranks[ranksOrder[i].first] = i + 1.0;
|
||||||
|
}
|
||||||
|
int i = 0;
|
||||||
|
while (i < static_cast<int>(ranksOrder.size())) {
|
||||||
|
int j = i + 1;
|
||||||
|
int sumRanks = ranks[ranksOrder[i].first];
|
||||||
|
while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
|
||||||
|
sumRanks += ranks[ranksOrder[j++].first];
|
||||||
|
}
|
||||||
|
if (j > i + 1) {
|
||||||
|
float averageRank = (float)sumRanks / (j - i);
|
||||||
|
for (int k = i; k < j; k++) {
|
||||||
|
ranks[ranksOrder[k].first] = averageRank;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i = j;
|
||||||
|
}
|
||||||
|
return ranks;
|
||||||
|
}
|
||||||
|
|
||||||
|
map<int, WTL> computeWTL(int controlIdx, vector<string> models, json table)
|
||||||
|
{
|
||||||
|
// Compute the WTL matrix
|
||||||
|
map<int, WTL> wtl;
|
||||||
|
int nModels = models.size();
|
||||||
|
for (int i = 0; i < nModels; ++i) {
|
||||||
|
wtl[i] = { 0, 0, 0 };
|
||||||
|
}
|
||||||
|
json origin = table.begin().value();
|
||||||
|
for (auto const& item : origin.items()) {
|
||||||
|
auto controlModel = models.at(controlIdx);
|
||||||
|
double controlValue = table[controlModel].at(item.key()).at(0).get<double>();
|
||||||
|
for (int i = 0; i < nModels; ++i) {
|
||||||
|
if (i == controlIdx) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
double value = table[models[i]].at(item.key()).at(0).get<double>();
|
||||||
|
if (value < controlValue) {
|
||||||
|
wtl[i].win++;
|
||||||
|
} else if (value == controlValue) {
|
||||||
|
wtl[i].tie++;
|
||||||
|
} else {
|
||||||
|
wtl[i].loss++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return wtl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void postHocHolm(int controlIdx, vector<string> models, int nDatasets, map<string, float> ranks, double significance, map<int, WTL> wtl)
|
||||||
|
{
|
||||||
|
// Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
|
||||||
|
// Post-hoc Holm test
|
||||||
|
// Calculate the p-value for the models paired with the control model
|
||||||
|
int nModels = models.size();
|
||||||
|
map<int, double> stats; // p-value of each model paired with the control model
|
||||||
|
boost::math::normal dist(0.0, 1.0);
|
||||||
|
double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
|
||||||
|
for (int i = 0; i < nModels; i++) {
|
||||||
|
if (i == controlIdx) {
|
||||||
|
stats[i] = 0.0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
|
||||||
|
double p_value = (long double)2 * (1 - cdf(dist, z));
|
||||||
|
stats[i] = p_value;
|
||||||
|
}
|
||||||
|
// Sort the models by p-value
|
||||||
|
vector<pair<int, double>> statsOrder;
|
||||||
|
for (const auto& stat : stats) {
|
||||||
|
statsOrder.push_back({ stat.first, stat.second });
|
||||||
|
}
|
||||||
|
sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) {
|
||||||
|
return a.second < b.second;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Holm adjustment
|
||||||
|
for (int i = 0; i < statsOrder.size(); ++i) {
|
||||||
|
auto item = statsOrder.at(i);
|
||||||
|
double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
|
||||||
|
double p_value = min((double)1.0, item.second * (nModels - i));
|
||||||
|
p_value = max(before, p_value);
|
||||||
|
statsOrder[i] = { item.first, p_value };
|
||||||
|
}
|
||||||
|
cout << Colors::CYAN();
|
||||||
|
cout << " *************************************************************************************************************" << endl;
|
||||||
|
cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
|
||||||
|
cout << " Control model: " << models[controlIdx] << endl;
|
||||||
|
cout << " Model p-value rank win tie loss" << endl;
|
||||||
|
cout << " ============ ============ ========= === === ====" << endl;
|
||||||
|
for (const auto& item : ranks) {
|
||||||
|
if (item.first == models.at(controlIdx)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
|
||||||
|
double pvalue = 0.0;
|
||||||
|
for (const auto& stat : statsOrder) {
|
||||||
|
if (stat.first == idx) {
|
||||||
|
pvalue = stat.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cout << " " << left << setw(12) << item.first << " " << setprecision(10) << fixed << pvalue << setprecision(7) << " " << item.second;
|
||||||
|
cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss << endl;
|
||||||
|
}
|
||||||
|
cout << " *************************************************************************************************************" << endl;
|
||||||
|
cout << Colors::RESET();
|
||||||
|
}
|
||||||
|
bool friedmanTest(vector<string> models, int nDatasets, map<string, float> ranks, double significance = 0.05)
|
||||||
|
{
|
||||||
|
// Friedman test
|
||||||
|
// Calculate the Friedman statistic
|
||||||
|
int nModels = models.size();
|
||||||
|
if (nModels < 3 || nDatasets < 3) {
|
||||||
|
throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
|
||||||
|
}
|
||||||
|
cout << Colors::BLUE() << endl;
|
||||||
|
cout << "***************************************************************************************************************" << endl;
|
||||||
|
cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
|
||||||
|
double degreesOfFreedom = nModels - 1.0;
|
||||||
|
double sumSquared = 0;
|
||||||
|
for (const auto& rank : ranks) {
|
||||||
|
sumSquared += pow(rank.second, 2);
|
||||||
|
}
|
||||||
|
// Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
|
||||||
|
double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
|
||||||
|
cout << "Friedman statistic: " << friedmanQ << endl;
|
||||||
|
// Calculate the critical value
|
||||||
|
boost::math::chi_squared chiSquared(degreesOfFreedom);
|
||||||
|
long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
|
||||||
|
double criticalValue = quantile(chiSquared, 1 - significance);
|
||||||
|
std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
|
||||||
|
<< " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
|
||||||
|
cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
|
||||||
|
//if (friedmanQ > criticalValue) { (original)
|
||||||
|
bool result;
|
||||||
|
if (p_value < significance) {
|
||||||
|
cout << Colors::MAGENTA() << "The null hypothesis H0 is rejected." << endl;
|
||||||
|
result = true;
|
||||||
|
} else {
|
||||||
|
cout << Colors::GREEN() << "The null hypothesis H0 is accepted." << endl;
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
cout << Colors::BLUE() << "***************************************************************************************************************" << endl;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
void BestResults::printTableResults(set<string> models, json table)
|
void BestResults::printTableResults(set<string> models, json table)
|
||||||
{
|
{
|
||||||
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
|
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
|
||||||
@@ -217,7 +378,9 @@ namespace platform {
|
|||||||
auto i = 0;
|
auto i = 0;
|
||||||
bool odd = true;
|
bool odd = true;
|
||||||
map<string, double> totals;
|
map<string, double> totals;
|
||||||
map<string, int> ranks;
|
map<string, float> ranks;
|
||||||
|
map<string, float> ranksTotal;
|
||||||
|
int nDatasets = table.begin().value().size();
|
||||||
for (const auto& model : models) {
|
for (const auto& model : models) {
|
||||||
totals[model] = 0.0;
|
totals[model] = 0.0;
|
||||||
}
|
}
|
||||||
@@ -236,13 +399,14 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
ranksOrder.push_back({ model, value });
|
ranksOrder.push_back({ model, value });
|
||||||
}
|
}
|
||||||
// sort the ranksOrder vector by value
|
|
||||||
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
|
|
||||||
return a.second > b.second;
|
|
||||||
});
|
|
||||||
// Assign the ranks
|
// Assign the ranks
|
||||||
for (int i = 0; i < ranksOrder.size(); i++) {
|
ranks = assignRanks(ranksOrder);
|
||||||
ranks[ranksOrder[i].first] = i + 1;
|
if (ranksTotal.size() == 0) {
|
||||||
|
ranksTotal = ranks;
|
||||||
|
} else {
|
||||||
|
for (const auto& rank : ranks) {
|
||||||
|
ranksTotal[rank.first] += rank.second;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Print the row with red colors on max values
|
// Print the row with red colors on max values
|
||||||
for (const auto& model : models) {
|
for (const auto& model : models) {
|
||||||
@@ -252,7 +416,8 @@ namespace platform {
|
|||||||
efectiveColor = Colors::RED();
|
efectiveColor = Colors::RED();
|
||||||
}
|
}
|
||||||
totals[model] += value;
|
totals[model] += value;
|
||||||
cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
|
// cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
|
||||||
|
cout << efectiveColor << setw(12) << setprecision(10) << fixed << ranks[model] << " ";
|
||||||
}
|
}
|
||||||
cout << endl;
|
cout << endl;
|
||||||
odd = !odd;
|
odd = !odd;
|
||||||
@@ -279,20 +444,40 @@ namespace platform {
|
|||||||
// Output the averaged ranks
|
// Output the averaged ranks
|
||||||
cout << endl;
|
cout << endl;
|
||||||
int min = 1;
|
int min = 1;
|
||||||
for (const auto& rank : ranks) {
|
for (auto& rank : ranksTotal) {
|
||||||
if (rank.second < min) {
|
if (rank.second < min) {
|
||||||
min = rank.second;
|
min = rank.second;
|
||||||
}
|
}
|
||||||
|
rank.second /= nDatasets;
|
||||||
}
|
}
|
||||||
|
cout << Colors::BLUE() << setw(30) << " Ranks....................";
|
||||||
|
for (const auto& model : models) {
|
||||||
|
string efectiveColor = Colors::BLUE();
|
||||||
|
if (ranksTotal[model] == min) {
|
||||||
|
efectiveColor = Colors::RED();
|
||||||
|
}
|
||||||
|
cout << efectiveColor << setw(12) << setprecision(4) << fixed << (double)ranksTotal[model] << " ";
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
cout << Colors::GREEN() << setw(30) << " Averaged ranks...........";
|
cout << Colors::GREEN() << setw(30) << " Averaged ranks...........";
|
||||||
for (const auto& model : models) {
|
for (const auto& model : models) {
|
||||||
string efectiveColor = Colors::GREEN();
|
string efectiveColor = Colors::GREEN();
|
||||||
if (ranks[model] == min) {
|
if (ranksTotal[model] == min) {
|
||||||
efectiveColor = Colors::RED();
|
efectiveColor = Colors::RED();
|
||||||
}
|
}
|
||||||
cout << efectiveColor << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " ";
|
cout << efectiveColor << setw(12) << setprecision(9) << fixed << (double)ranksTotal[model] << " ";
|
||||||
}
|
}
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
if (friedman) {
|
||||||
|
double significance = 0.05;
|
||||||
|
vector<string> vModels(models.begin(), models.end());
|
||||||
|
if (friedmanTest(vModels, nDatasets, ranksTotal, significance)) {
|
||||||
|
// Stablish the control model as the one with the lowest averaged rank
|
||||||
|
int controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
|
||||||
|
auto wtl = computeWTL(controlIdx, vModels, table);
|
||||||
|
postHocHolm(controlIdx, vModels, nDatasets, ranksTotal, significance, wtl);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
void BestResults::reportAll()
|
void BestResults::reportAll()
|
||||||
{
|
{
|
||||||
|
@@ -8,7 +8,7 @@ using json = nlohmann::json;
|
|||||||
namespace platform {
|
namespace platform {
|
||||||
class BestResults {
|
class BestResults {
|
||||||
public:
|
public:
|
||||||
explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {}
|
explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
|
||||||
string build();
|
string build();
|
||||||
void reportSingle();
|
void reportSingle();
|
||||||
void reportAll();
|
void reportAll();
|
||||||
@@ -23,6 +23,7 @@ namespace platform {
|
|||||||
string path;
|
string path;
|
||||||
string score;
|
string score;
|
||||||
string model;
|
string model;
|
||||||
|
bool friedman;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif //BESTRESULTS_H
|
#endif //BESTRESULTS_H
|
@@ -12,8 +12,9 @@ add_executable(best best.cc BestResults.cc Result.cc)
|
|||||||
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
|
||||||
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
|
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
|
||||||
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
|
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
|
||||||
target_link_libraries(best stdc++fs)
|
target_link_libraries(best Boost::boost stdc++fs)
|
||||||
else()
|
else()
|
||||||
target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
|
target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
|
||||||
|
target_link_libraries(best Boost::boost)
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")
|
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")
|
@@ -13,12 +13,14 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
|
|||||||
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
|
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
|
||||||
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
|
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
|
||||||
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
|
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
|
||||||
|
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
|
||||||
try {
|
try {
|
||||||
program.parse_args(argc, argv);
|
program.parse_args(argc, argv);
|
||||||
auto model = program.get<string>("model");
|
auto model = program.get<string>("model");
|
||||||
auto score = program.get<string>("score");
|
auto score = program.get<string>("score");
|
||||||
auto build = program.get<bool>("build");
|
auto build = program.get<bool>("build");
|
||||||
auto report = program.get<bool>("report");
|
auto report = program.get<bool>("report");
|
||||||
|
auto friedman = program.get<bool>("friedman");
|
||||||
if (model == "" || score == "") {
|
if (model == "" || score == "") {
|
||||||
throw runtime_error("Model and score name must be supplied");
|
throw runtime_error("Model and score name must be supplied");
|
||||||
}
|
}
|
||||||
@@ -38,12 +40,18 @@ int main(int argc, char** argv)
|
|||||||
auto score = program.get<string>("score");
|
auto score = program.get<string>("score");
|
||||||
auto build = program.get<bool>("build");
|
auto build = program.get<bool>("build");
|
||||||
auto report = program.get<bool>("report");
|
auto report = program.get<bool>("report");
|
||||||
|
auto friedman = program.get<bool>("friedman");
|
||||||
|
if (friedman && model != "any") {
|
||||||
|
cerr << "Friedman test can only be used with all models" << endl;
|
||||||
|
cerr << program;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
if (!report && !build) {
|
if (!report && !build) {
|
||||||
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
|
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
|
||||||
cerr << program;
|
cerr << program;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
auto results = platform::BestResults(platform::Paths::results(), score, model);
|
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
|
||||||
if (build) {
|
if (build) {
|
||||||
if (model == "any") {
|
if (model == "any") {
|
||||||
results.buildAll();
|
results.buildAll();
|
||||||
|
Reference in New Issue
Block a user