Compare commits

...

22 Commits

Author SHA1 Message Date
926de2bebd Add boost info to README 2023-09-28 09:44:33 +02:00
71704e3547 Enhance output info in Statistics 2023-09-28 01:27:18 +02:00
3b06534327 Remove duplicated code in BestResults 2023-09-28 00:59:34 +02:00
ac89a451e3 Duplicate statistics tests in class 2023-09-28 00:45:15 +02:00
00c6cf663b Fix order of output in posthoc 2023-09-27 19:11:47 +02:00
5043c12be8 Complete posthoc with Holm adjust 2023-09-27 18:34:16 +02:00
11320e2cc7 Complete friedman test as in exreport 2023-09-27 12:36:03 +02:00
ce66483b65 Update boost version requirement for Linux 2023-09-26 14:12:53 +02:00
cab8e14b2d Add friedman hyperparameter 2023-09-26 11:26:59 +02:00
f0d0abe891 Add boost library link to linux build 2023-09-26 01:07:50 +02:00
dcba146e12 Begin adding Friedman test to BestResults 2023-09-26 01:04:59 +02:00
3ea0285119 Fix ranks to match friedman test ranks 2023-09-25 18:38:12 +02:00
e3888e1503 Merge pull request 'bestResults' (#9) from bestResults into main
Reviewed-on: https://gitea.rmontanana.es:3000/rmontanana/BayesNet/pulls/9

Add best results management, build, report, build all & report all
2023-09-25 12:02:17 +00:00
06de13df98 Add date/time to header of report best 2023-09-25 10:04:53 +02:00
de4fa6a04f Add color to totals 2023-09-23 10:30:39 +02:00
3a7bf4e672 Fix ranking order mistake 2023-09-23 01:33:23 +02:00
cd0bc02a74 Add report/build all with totals and ranks 2023-09-23 01:14:02 +02:00
c8597a794e Begin report all models 2023-09-22 18:13:32 +02:00
b30416364d Fix mistake in best results file name 2023-09-22 14:14:39 +02:00
3a16589220 Add best config for debug in vscode 2023-09-22 01:04:36 +02:00
c4f9187e2a Complete best build and report 2023-09-22 01:03:55 +02:00
c4d0a5b4e6 Split Result from Results 2023-09-21 23:30:17 +02:00
15 changed files with 678 additions and 109 deletions

14
.vscode/launch.json vendored
View File

@@ -37,6 +37,20 @@
],
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "best",
"program": "${workspaceFolder}/build/src/Platform/best",
"args": [
"-m",
"BoostAODE",
"-s",
"accuracy",
"--build",
],
"cwd": "/Users/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",

View File

@@ -30,6 +30,17 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
option(ENABLE_TESTING "Unit testing build" OFF)
option(CODE_COVERAGE "Collect coverage from test library" OFF)
# Boost Library
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.78.0 REQUIRED)
if(Boost_FOUND)
message("Boost_INCLUDE_DIRS=${Boost_INCLUDE_DIRS}")
include_directories(${Boost_INCLUDE_DIRS})
endif()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# CMakes modules
# --------------

View File

@@ -4,10 +4,14 @@ Bayesian Network Classifier with libtorch from scratch
## 0. Setup
### libxlswriter
Before compiling BayesNet.
### boost library
[Getting Started](<https://www.boost.org/doc/libs/1_83_0/more/getting_started/index.html>)
### libxlswriter
```bash
cd lib/libxlsxwriter
make

View File

@@ -1,18 +1,37 @@
#include <filesystem>
#include <fstream>
#include <iostream>
#include "platformUtils.h"
#include <sstream>
#include "BestResults.h"
#include "Results.h"
#include "Result.h"
#include "Colors.h"
#include "Statistics.h"
namespace fs = std::filesystem;
// function ftime_to_string, Code taken from
// https://stackoverflow.com/a/58237530/1389271
template <typename TP>
std::string ftime_to_string(TP tp)
{
using namespace std::chrono;
auto sctp = time_point_cast<system_clock::duration>(tp - TP::clock::now()
+ system_clock::now());
auto tt = system_clock::to_time_t(sctp);
std::tm* gmt = std::gmtime(&tt);
std::stringstream buffer;
buffer << std::put_time(gmt, "%Y-%m-%d %H:%M");
return buffer.str();
}
namespace platform {
void BestResults::build()
string BestResults::build()
{
auto files = loadFiles();
auto files = loadResultFiles();
if (files.size() == 0) {
throw runtime_error("No result files were found!");
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
exit(1);
}
json bests;
for (const auto& file : files) {
@@ -21,7 +40,7 @@ namespace platform {
for (auto const& item : data.at("results")) {
bool update = false;
if (bests.contains(item.at("dataset").get<string>())) {
if (item.at("score").get<double>() > bests["dataset"].at(0).get<double>()) {
if (item.at("score").get<double>() > bests[item.at("dataset").get<string>()].at(0).get<double>()) {
update = true;
}
} else {
@@ -32,13 +51,15 @@ namespace platform {
}
}
}
string bestFileName = path + "/" + bestResultFile();
if (file_exists(bestFileName)) {
cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET();
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl;
}
ofstream file(bestFileName);
file << bests;
file.close();
return bestFileName;
}
string BestResults::bestResultFile()
@@ -46,23 +67,226 @@ namespace platform {
return "best_results_" + score + "_" + model + ".json";
}
vector<string> BestResults::loadFiles()
pair<string, string> getModelScore(string name)
{
// results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json
int i = 0;
auto pos = name.find("_");
auto pos2 = name.find("_", pos + 1);
string score = name.substr(pos + 1, pos2 - pos - 1);
pos = name.find("_", pos2 + 1);
string model = name.substr(pos2 + 1, pos - pos2 - 1);
return { model, score };
}
vector<string> BestResults::loadResultFiles()
{
vector<string> files;
using std::filesystem::directory_iterator;
string fileModel, fileScore;
for (const auto& file : directory_iterator(path)) {
auto fileName = file.path().filename().string();
if (fileName.find(".json") != string::npos && fileName.find("results_") == 0
&& fileName.find("_" + score + "_") != string::npos
&& fileName.find("_" + model + "_") != string::npos) {
files.push_back(fileName);
if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) {
tie(fileModel, fileScore) = getModelScore(fileName);
if (score == fileScore && (model == fileModel || model == "any")) {
files.push_back(fileName);
}
}
}
return files;
}
void BestResults::report()
json BestResults::loadFile(const string& fileName)
{
ifstream resultData(fileName);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + fileName + "]");
}
vector<string> BestResults::getModels()
{
set<string> models;
vector<string> result;
auto files = loadResultFiles();
if (files.size() == 0) {
cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl;
exit(1);
}
string fileModel, fileScore;
for (const auto& file : files) {
// extract the model from the file name
tie(fileModel, fileScore) = getModelScore(file);
// add the model to the vector of models
models.insert(fileModel);
}
result = vector<string>(models.begin(), models.end());
return result;
}
void BestResults::buildAll()
{
auto models = getModels();
for (const auto& model : models) {
cout << "Building best results for model: " << model << endl;
this->model = model;
build();
}
model = "any";
}
void BestResults::reportSingle()
{
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
exit(1);
}
auto date = ftime_to_string(filesystem::last_write_time(bestFileName));
auto data = loadFile(bestFileName);
cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
cout << "--------------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset Score File Hyperparameters" << endl;
cout << "=== ========================= =========== ================================================================== ================================================= " << endl;
auto i = 0;
bool odd = true;
for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
cout << setw(66) << item.value().at(2).get<string>() << " ";
cout << item.value().at(1) << " ";
cout << endl;
odd = !odd;
}
}
json BestResults::buildTableResults(vector<string> models)
{
int numberOfDatasets = 0;
bool first = true;
json origin;
json table;
auto maxDate = filesystem::file_time_type::max();
for (const auto& model : models) {
this->model = model;
string bestFileName = path + bestResultFile();
if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) {
fclose(fileTest);
} else {
cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl;
exit(1);
}
auto dateWrite = filesystem::last_write_time(bestFileName);
if (dateWrite < maxDate) {
maxDate = dateWrite;
}
auto data = loadFile(bestFileName);
if (first) {
// Get the number of datasets of the first file and check that is the same for all the models
first = false;
numberOfDatasets = data.size();
origin = data;
} else {
if (numberOfDatasets != data.size()) {
cerr << Colors::MAGENTA() << "The number of datasets in the best results files is not the same for all the models." << Colors::RESET() << endl;
exit(1);
}
}
table[model] = data;
}
table["dateTable"] = ftime_to_string(maxDate);
return table;
}
void BestResults::printTableResults(vector<string> models, json table)
{
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
cout << "------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset ";
for (const auto& model : models) {
cout << setw(12) << left << model << " ";
}
cout << endl;
cout << "=== ========================= ";
for (const auto& model : models) {
cout << "============ ";
}
cout << endl;
auto i = 0;
bool odd = true;
map<string, double> totals;
int nDatasets = table.begin().value().size();
for (const auto& model : models) {
totals[model] = 0.0;
}
json origin = table.begin().value();
for (auto const& item : origin.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
double maxValue = 0;
// Find out the max value for this dataset
for (const auto& model : models) {
double value = table[model].at(item.key()).at(0).get<double>();
if (value > maxValue) {
maxValue = value;
}
}
// Print the row with red colors on max values
for (const auto& model : models) {
string efectiveColor = color;
double value = table[model].at(item.key()).at(0).get<double>();
if (value == maxValue) {
efectiveColor = Colors::RED();
}
totals[model] += value;
cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
}
cout << endl;
odd = !odd;
}
cout << Colors::GREEN() << "=== ========================= ";
for (const auto& model : models) {
cout << "============ ";
}
cout << endl;
cout << Colors::GREEN() << setw(30) << " Totals...................";
double max = 0.0;
for (const auto& total : totals) {
if (total.second > max) {
max = total.second;
}
}
for (const auto& model : models) {
string efectiveColor = Colors::GREEN();
if (totals[model] == max) {
efectiveColor = Colors::RED();
}
cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
}
cout << endl;
}
void BestResults::reportAll()
{
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
// Print the table of results
printTableResults(models, table);
// Compute the Friedman test
if (friedman) {
vector<string> datasets;
for (const auto& dataset : table.begin().value().items()) {
datasets.push_back(dataset.key());
}
double significance = 0.05;
Statistics stats(models, datasets, table, significance);
auto result = stats.friedmanTest();
stats.postHocHolmTest(result);
}
}
}

View File

@@ -1,20 +1,29 @@
#ifndef BESTRESULTS_H
#define BESTRESULTS_H
#include <string>
#include <set>
#include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json;
namespace platform {
class BestResults {
public:
explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {}
void build();
void report();
explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
string build();
void reportSingle();
void reportAll();
void buildAll();
private:
vector<string> loadFiles();
vector<string> getModels();
vector<string> loadResultFiles();
json buildTableResults(vector<string> models);
void printTableResults(vector<string> models, json table);
string bestResultFile();
json loadFile(const string& fileName);
string path;
string score;
string model;
bool friedman;
};
}
#endif //BESTRESULTS_H

View File

@@ -6,15 +6,15 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
add_executable(list list.cc platformUtils Datasets.cc)
add_executable(best best.cc BestResults.cc Results.cc ReportBase.cc ReportExcel.cc platformUtils.cc)
add_executable(best best.cc BestResults.cc Result.cc Statistics.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
target_link_libraries(best "${TORCH_LIBRARIES}" libxlsxwriter.so stdc++fs)
target_link_libraries(best Boost::boost stdc++fs)
else()
target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp)
target_link_libraries(best "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}")
target_link_libraries(best Boost::boost)
endif()
target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -3,22 +3,13 @@
#include <string>
#include <iostream>
#include "Paths.h"
#include "Symbols.h"
#include <nlohmann/json.hpp>
using json = nlohmann::json;
namespace platform {
using namespace std;
class Symbols {
public:
inline static const string check_mark{ "\u2714" };
inline static const string exclamation{ "\u2757" };
inline static const string black_star{ "\u2605" };
inline static const string cross{ "\u2717" };
inline static const string upward_arrow{ "\u27B6" };
inline static const string down_arrow{ "\u27B4" };
inline static const string equal_best{ check_mark };
inline static const string better_best{ black_star };
};
class ReportBase {
public:
explicit ReportBase(json data_, bool compare);

51
src/Platform/Result.cc Normal file
View File

@@ -0,0 +1,51 @@
#include <filesystem>
#include <fstream>
#include <sstream>
#include "Result.h"
#include "Colors.h"
#include "BestScore.h"
namespace platform {
Result::Result(const string& path, const string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
if (scoreName == BestScore::scoreName()) {
score /= BestScore::score();
}
title = data["title"];
duration = data["duration"];
model = data["model"];
complete = data["results"].size() > 1;
}
json Result::load() const
{
ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
string Result::to_string() const
{
stringstream oss;
oss << date << " ";
oss << setw(12) << left << model << " ";
oss << setw(11) << left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << setw(1) << " " << completeString << " ";
oss << setw(9) << setprecision(3) << fixed << duration << " ";
oss << setw(50) << left << title << " ";
return oss.str();
}
}

37
src/Platform/Result.h Normal file
View File

@@ -0,0 +1,37 @@
#ifndef RESULT_H
#define RESULT_H
#include <map>
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
namespace platform {
using namespace std;
using json = nlohmann::json;
class Result {
public:
Result(const string& path, const string& filename);
json load() const;
string to_string() const;
string getFilename() const { return filename; };
string getDate() const { return date; };
double getScore() const { return score; };
string getTitle() const { return title; };
double getDuration() const { return duration; };
string getModel() const { return model; };
string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; };
private:
string path;
string filename;
string date;
double score;
string title;
double duration;
string model;
string scoreName;
bool complete;
};
};
#endif

View File

@@ -6,34 +6,6 @@
#include "BestScore.h"
#include "Colors.h"
namespace platform {
Result::Result(const string& path, const string& filename)
: path(path)
, filename(filename)
{
auto data = load();
date = data["date"];
score = 0;
for (const auto& result : data["results"]) {
score += result["score"].get<double>();
}
scoreName = data["score_name"];
if (scoreName == BestScore::scoreName()) {
score /= BestScore::score();
}
title = data["title"];
duration = data["duration"];
model = data["model"];
complete = data["results"].size() > 1;
}
json Result::load() const
{
ifstream resultData(path + "/" + filename);
if (resultData.is_open()) {
json data = json::parse(resultData);
return data;
}
throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]");
}
void Results::load()
{
using std::filesystem::directory_iterator;
@@ -52,19 +24,6 @@ namespace platform {
max = files.size();
}
}
string Result::to_string() const
{
stringstream oss;
oss << date << " ";
oss << setw(12) << left << model << " ";
oss << setw(11) << left << scoreName << " ";
oss << right << setw(11) << setprecision(7) << fixed << score << " ";
auto completeString = isComplete() ? "C" : "P";
oss << setw(1) << " " << completeString << " ";
oss << setw(9) << setprecision(3) << fixed << duration << " ";
oss << setw(50) << left << title << " ";
return oss.str();
}
void Results::show() const
{
cout << Colors::GREEN() << "Results found: " << files.size() << endl;

View File

@@ -5,34 +5,11 @@
#include <vector>
#include <string>
#include <nlohmann/json.hpp>
#include "Result.h"
namespace platform {
using namespace std;
using json = nlohmann::json;
class Result {
public:
Result(const string& path, const string& filename);
json load() const;
string to_string() const;
string getFilename() const { return filename; };
string getDate() const { return date; };
double getScore() const { return score; };
string getTitle() const { return title; };
double getDuration() const { return duration; };
string getModel() const { return model; };
string getScoreName() const { return scoreName; };
bool isComplete() const { return complete; };
private:
string path;
string filename;
string date;
double score;
string title;
double duration;
string model;
string scoreName;
bool complete;
};
class Results {
public:
Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) :

215
src/Platform/Statistics.cc Normal file
View File

@@ -0,0 +1,215 @@
#include "Statistics.h"
#include "Colors.h"
#include "Symbols.h"
#include <boost/math/distributions/chi_squared.hpp>
#include <boost/math/distributions/normal.hpp>
namespace platform {
Statistics::Statistics(vector<string>& models, vector<string>& datasets, json data, double significance) : models(models), datasets(datasets), data(data), significance(significance)
{
nModels = models.size();
nDatasets = datasets.size();
};
void Statistics::fit()
{
if (nModels < 3 || nDatasets < 3) {
cerr << "nModels: " << nModels << endl;
cerr << "nDatasets: " << nDatasets << endl;
throw runtime_error("Can't make the Friedman test with less than 3 models and/or less than 3 datasets.");
}
computeRanks();
// Set the control model as the one with the lowest average rank
controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
computeWTL();
fitted = true;
}
map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
{
// sort the ranksOrder vector by value
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, double>& a, const pair<string, double>& b) {
return a.second > b.second;
});
//Assign ranks to values and if they are the same they share the same averaged rank
map<string, float> ranks;
for (int i = 0; i < ranksOrder.size(); i++) {
ranks[ranksOrder[i].first] = i + 1.0;
}
int i = 0;
while (i < static_cast<int>(ranksOrder.size())) {
int j = i + 1;
int sumRanks = ranks[ranksOrder[i].first];
while (j < static_cast<int>(ranksOrder.size()) && ranksOrder[i].second == ranksOrder[j].second) {
sumRanks += ranks[ranksOrder[j++].first];
}
if (j > i + 1) {
float averageRank = (float)sumRanks / (j - i);
for (int k = i; k < j; k++) {
ranks[ranksOrder[k].first] = averageRank;
}
}
i = j;
}
return ranks;
}
void Statistics::computeRanks()
{
map<string, float> ranksLine;
for (const auto& dataset : datasets) {
vector<pair<string, double>> ranksOrder;
for (const auto& model : models) {
double value = data[model].at(dataset).at(0).get<double>();
ranksOrder.push_back({ model, value });
}
// Assign the ranks
ranksLine = assignRanks(ranksOrder);
if (ranks.size() == 0) {
ranks = ranksLine;
} else {
for (const auto& rank : ranksLine) {
ranks[rank.first] += rank.second;
}
}
}
// Average the ranks
for (const auto& rank : ranks) {
ranks[rank.first] /= nDatasets;
}
}
void Statistics::computeWTL()
{
// Compute the WTL matrix
for (int i = 0; i < nModels; ++i) {
wtl[i] = { 0, 0, 0 };
}
json origin = data.begin().value();
for (auto const& item : origin.items()) {
auto controlModel = models.at(controlIdx);
double controlValue = data[controlModel].at(item.key()).at(0).get<double>();
for (int i = 0; i < nModels; ++i) {
if (i == controlIdx) {
continue;
}
double value = data[models[i]].at(item.key()).at(0).get<double>();
if (value < controlValue) {
wtl[i].win++;
} else if (value == controlValue) {
wtl[i].tie++;
} else {
wtl[i].loss++;
}
}
}
}
void Statistics::postHocHolmTest(bool friedmanResult)
{
if (!fitted) {
fit();
}
// Reference https://link.springer.com/article/10.1007/s44196-022-00083-8
// Post-hoc Holm test
// Calculate the p-value for the models paired with the control model
map<int, double> stats; // p-value of each model paired with the control model
boost::math::normal dist(0.0, 1.0);
double diff = sqrt(nModels * (nModels + 1) / (6.0 * nDatasets));
for (int i = 0; i < nModels; i++) {
if (i == controlIdx) {
stats[i] = 0.0;
continue;
}
double z = abs(ranks.at(models[controlIdx]) - ranks.at(models[i])) / diff;
double p_value = (long double)2 * (1 - cdf(dist, z));
stats[i] = p_value;
}
// Sort the models by p-value
vector<pair<int, double>> statsOrder;
for (const auto& stat : stats) {
statsOrder.push_back({ stat.first, stat.second });
}
sort(statsOrder.begin(), statsOrder.end(), [](const pair<int, double>& a, const pair<int, double>& b) {
return a.second < b.second;
});
// Holm adjustment
for (int i = 0; i < statsOrder.size(); ++i) {
auto item = statsOrder.at(i);
double before = i == 0 ? 0.0 : statsOrder.at(i - 1).second;
double p_value = min((double)1.0, item.second * (nModels - i));
p_value = max(before, p_value);
statsOrder[i] = { item.first, p_value };
}
auto color = friedmanResult ? Colors::CYAN() : Colors::YELLOW();
cout << color;
cout << " *************************************************************************************************************" << endl;
cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
cout << " Control model: " << models[controlIdx] << endl;
cout << " Model p-value rank win tie loss Status" << endl;
cout << " ============ ============ ========= === === ==== =============" << endl;
// sort ranks from lowest to highest
vector<pair<string, float>> ranksOrder;
for (const auto& rank : ranks) {
ranksOrder.push_back({ rank.first, rank.second });
}
sort(ranksOrder.begin(), ranksOrder.end(), [](const pair<string, float>& a, const pair<string, float>& b) {
return a.second < b.second;
});
for (const auto& item : ranksOrder) {
if (item.first == models.at(controlIdx)) {
continue;
}
auto idx = distance(models.begin(), find(models.begin(), models.end(), item.first));
double pvalue = 0.0;
for (const auto& stat : statsOrder) {
if (stat.first == idx) {
pvalue = stat.second;
}
}
auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
cout << " " << colorStatus << left << setw(12) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss;
cout << " " << status << textStatus << endl;
}
cout << color << " *************************************************************************************************************" << endl;
cout << Colors::RESET();
}
bool Statistics::friedmanTest()
{
if (!fitted) {
fit();
}
// Friedman test
// Calculate the Friedman statistic
cout << Colors::BLUE() << endl;
cout << "***************************************************************************************************************" << endl;
cout << Colors::GREEN() << "Friedman test: H0: 'There is no significant differences between all the classifiers.'" << Colors::BLUE() << endl;
double degreesOfFreedom = nModels - 1.0;
double sumSquared = 0;
for (const auto& rank : ranks) {
sumSquared += pow(rank.second, 2);
}
// Compute the Friedman statistic as in https://link.springer.com/article/10.1007/s44196-022-00083-8
double friedmanQ = 12.0 * nDatasets / (nModels * (nModels + 1)) * (sumSquared - (nModels * pow(nModels + 1, 2)) / 4);
cout << "Friedman statistic: " << friedmanQ << endl;
// Calculate the critical value
boost::math::chi_squared chiSquared(degreesOfFreedom);
long double p_value = (long double)1.0 - cdf(chiSquared, friedmanQ);
double criticalValue = quantile(chiSquared, 1 - significance);
std::cout << "Critical Chi-Square Value for df=" << fixed << (int)degreesOfFreedom
<< " and alpha=" << setprecision(2) << fixed << significance << ": " << setprecision(7) << scientific << criticalValue << std::endl;
cout << "p-value: " << scientific << p_value << " is " << (p_value < significance ? "less" : "greater") << " than " << setprecision(2) << fixed << significance << endl;
bool result;
if (p_value < significance) {
cout << Colors::GREEN() << "The null hypothesis H0 is rejected." << endl;
result = true;
} else {
cout << Colors::YELLOW() << "The null hypothesis H0 is accepted. Computed p-values will not be significant." << endl;
result = false;
}
cout << Colors::BLUE() << "***************************************************************************************************************" << Colors::RESET() << endl;
return result;
}
} // namespace platform

37
src/Platform/Statistics.h Normal file
View File

@@ -0,0 +1,37 @@
#ifndef STATISTICS_H
#define STATISTICS_H
#include <iostream>
#include <vector>
#include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json;
namespace platform {
struct WTL {
int win;
int tie;
int loss;
};
class Statistics {
public:
Statistics(vector<string>& models, vector<string>& datasets, json data, double significance = 0.05);
bool friedmanTest();
void postHocHolmTest(bool friedmanResult);
private:
void fit();
void computeRanks();
void computeWTL();
vector<string> models;
vector<string> datasets;
json data;
double significance;
bool fitted = false;
int nModels = 0;
int nDatasets = 0;
int controlIdx = 0;
map<int, WTL> wtl;
map<string, float> ranks;
};
}
#endif // !STATISTICS_H

18
src/Platform/Symbols.h Normal file
View File

@@ -0,0 +1,18 @@
#ifndef SYMBOLS_H
#define SYMBOLS_H
#include <string>
using namespace std;
namespace platform {
class Symbols {
public:
inline static const string check_mark{ "\u2714" };
inline static const string exclamation{ "\u2757" };
inline static const string black_star{ "\u2605" };
inline static const string cross{ "\u2717" };
inline static const string upward_arrow{ "\u27B6" };
inline static const string down_arrow{ "\u27B4" };
inline static const string equal_best{ check_mark };
inline static const string better_best{ black_star };
};
}
#endif // !SYMBOLS_H

View File

@@ -2,22 +2,28 @@
#include <argparse/argparse.hpp>
#include "Paths.h"
#include "BestResults.h"
#include "Colors.h"
using namespace std;
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
argparse::ArgumentParser program("best");
program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)");
program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied");
program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)");
program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied");
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
try {
program.parse_args(argc, argv);
auto model = program.get<string>("model");
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
auto friedman = program.get<bool>("friedman");
if (model == "" || score == "") {
throw runtime_error("Model and score name must be supplied");
}
}
catch (const exception& err) {
cerr << err.what() << endl;
@@ -34,16 +40,32 @@ int main(int argc, char** argv)
auto score = program.get<string>("score");
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
if (!report && !build) {
cout << "Either build, report or both, have to be selected to do anything!" << endl;
auto friedman = program.get<bool>("friedman");
if (friedman && model != "any") {
cerr << "Friedman test can only be used with all models" << endl;
cerr << program;
exit(1);
}
auto results = platform::BestResults(platform::Paths::results(), model, score);
if (!report && !build) {
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
cerr << program;
exit(1);
}
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
if (build) {
results.build();
if (model == "any") {
results.buildAll();
} else {
string fileName = results.build();
cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl;
}
}
if (report) {
results.report();
if (model == "any") {
results.reportAll();
} else {
results.reportSingle();
}
}
return 0;
}