Enhance output of Best results reports

This commit is contained in:
Ricardo Montañana Gómez 2023-09-28 12:08:56 +02:00
parent 623ceed396
commit 82acb3cab5
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
9 changed files with 109 additions and 36 deletions

View File

@ -12,14 +12,28 @@ setup: ## Install dependencies for tests and coverage
fi
dest ?= ../discretizbench
dest2 ?= ../covbench
dest3 ?= ../odtebench
copy: ## Copy binary files to selected folder
@echo "Destination folder: $(dest)"
@echo "Destination folder: $(dest2)"
@echo "Destination folder: $(dest3)"
make build
@echo ">>> Copying files to $(dest)"
@cp build/src/Platform/main $(dest)
@cp build/src/Platform/list $(dest)
@cp build/src/Platform/manage $(dest)
@cp build/src/Platform/best $(dest)
@echo ">>> Copying files to $(dest2)"
@cp build/src/Platform/main $(dest2)
@cp build/src/Platform/list $(dest2)
@cp build/src/Platform/manage $(dest2)
@cp build/src/Platform/best $(dest2)
@echo ">>> Copying files to $(dest3)"
@cp build/src/Platform/main $(dest3)
@cp build/src/Platform/list $(dest3)
@cp build/src/Platform/manage $(dest3)
@cp build/src/Platform/best $(dest3)
@echo ">>> Done"
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)

View File

@ -1,4 +1,5 @@
#include <filesystem>
#include <set>
#include <fstream>
#include <iostream>
#include <sstream>
@ -6,7 +7,7 @@
#include "Result.h"
#include "Colors.h"
#include "Statistics.h"
#include "BestResultsExcel.h"
namespace fs = std::filesystem;
@ -124,6 +125,14 @@ namespace platform {
result = vector<string>(models.begin(), models.end());
return result;
}
vector<string> BestResults::getDatasets(json table)
{
vector<string> datasets;
for (const auto& dataset : table.items()) {
datasets.push_back(dataset.key());
}
return datasets;
}
void BestResults::buildAll()
{
@ -147,16 +156,18 @@ namespace platform {
}
auto date = ftime_to_string(filesystem::last_write_time(bestFileName));
auto data = loadFile(bestFileName);
auto datasets = getDatasets(data);
int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
cout << "--------------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset Score File Hyperparameters" << endl;
cout << "=== ========================= =========== ================================================================== ================================================= " << endl;
cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << string("Dataset") << "Score File Hyperparameters" << endl;
cout << "=== " << string(maxDatasetName, '=') << " =========== ================================================================== ================================================= " << endl;
auto i = 0;
bool odd = true;
for (auto const& item : data.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
cout << setw(maxDatasetName) << left << item.key() << " ";
cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
cout << setw(66) << item.value().at(2).get<string>() << " ";
cout << item.value().at(1) << " ";
@ -206,14 +217,14 @@ namespace platform {
{
cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
cout << "------------------------------------------------" << endl;
cout << Colors::GREEN() << " # Dataset ";
cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << string("Dataset");
for (const auto& model : models) {
cout << setw(12) << left << model << " ";
cout << setw(maxModelName) << left << model << " ";
}
cout << endl;
cout << "=== ========================= ";
cout << "=== " << string(maxDatasetName, '=') << " ";
for (const auto& model : models) {
cout << "============ ";
cout << string(maxModelName, '=') << " ";
}
cout << endl;
auto i = 0;
@ -227,7 +238,7 @@ namespace platform {
for (auto const& item : origin.items()) {
auto color = odd ? Colors::BLUE() : Colors::CYAN();
cout << color << setw(3) << fixed << right << i++ << " ";
cout << setw(25) << left << item.key() << " ";
cout << setw(maxDatasetName) << left << item.key() << " ";
double maxValue = 0;
// Find out the max value for this dataset
for (const auto& model : models) {
@ -244,17 +255,17 @@ namespace platform {
efectiveColor = Colors::RED();
}
totals[model] += value;
cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
cout << efectiveColor << setw(maxModelName) << setprecision(maxModelName - 2) << fixed << value << " ";
}
cout << endl;
odd = !odd;
}
cout << Colors::GREEN() << "=== ========================= ";
cout << Colors::GREEN() << "=== " << string(maxDatasetName, '=') << " ";
for (const auto& model : models) {
cout << "============ ";
cout << string(maxModelName, '=') << " ";
}
cout << endl;
cout << Colors::GREEN() << setw(30) << " Totals...................";
cout << Colors::GREEN() << setw(5 + maxDatasetName) << " Totals...................";
double max = 0.0;
for (const auto& total : totals) {
if (total.second > max) {
@ -266,27 +277,32 @@ namespace platform {
if (totals[model] == max) {
efectiveColor = Colors::RED();
}
cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
cout << efectiveColor << right << setw(maxModelName) << setprecision(maxModelName - 4) << fixed << totals[model] << " ";
}
cout << endl;
}
void BestResults::reportAll()
void BestResults::reportAll(bool excel)
{
auto models = getModels();
// Build the table of results
json table = buildTableResults(models);
vector<string> datasets = getDatasets(table.begin().value());
maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
maxModelName = max(12, maxModelName);
maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
maxDatasetName = max(25, maxDatasetName);
// Print the table of results
printTableResults(models, table);
// Compute the Friedman test
if (friedman) {
vector<string> datasets;
for (const auto& dataset : table.begin().value().items()) {
datasets.push_back(dataset.key());
}
double significance = 0.05;
Statistics stats(models, datasets, table, significance);
auto result = stats.friedmanTest();
stats.postHocHolmTest(result);
}
if (excel) {
BestResultsExcel excel(models, datasets, table, friedman);
excel.build();
}
}
}

View File

@ -1,7 +1,6 @@
#ifndef BESTRESULTS_H
#define BESTRESULTS_H
#include <string>
#include <set>
#include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json;
@ -11,10 +10,11 @@ namespace platform {
explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
string build();
void reportSingle();
void reportAll();
void reportAll(bool excel);
void buildAll();
private:
vector<string> getModels();
vector<string> getDatasets(json table);
vector<string> loadResultFiles();
json buildTableResults(vector<string> models);
void printTableResults(vector<string> models, json table);
@ -24,6 +24,8 @@ namespace platform {
string score;
string model;
bool friedman;
int maxModelName = 0;
int maxDatasetName = 0;
};
}
#endif //BESTRESULTS_H

View File

@ -0,0 +1,8 @@
#include "BestResultsExcel.h"
namespace platform {
void BestResultsExcel::build()
{
}
}

View File

@ -0,0 +1,21 @@
#ifndef BESTRESULTS_EXCEL_H
#define BESTRESULTS_EXCEL_H
#include <vector>
#include <nlohmann/json.hpp>
using namespace std;
using json = nlohmann::json;
namespace platform {
class BestResultsExcel {
public:
BestResultsExcel(vector<string> models, vector<string> datasets, json table, bool friedman) : models(models), datasets(datasets), table(table), friedman(friedman) {}
void build();
private:
vector<string> models;
vector<string> datasets;
json table;
bool friedman;
};
}
#endif //BESTRESULTS_EXCEL_H

View File

@ -8,7 +8,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
add_executable(list list.cc platformUtils Datasets.cc)
add_executable(best best.cc BestResults.cc Result.cc Statistics.cc)
add_executable(best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)

View File

@ -23,6 +23,8 @@ namespace platform {
// Set the control model as the one with the lowest average rank
controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
computeWTL();
maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
fitted = true;
}
map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
@ -145,8 +147,8 @@ namespace platform {
cout << " *************************************************************************************************************" << endl;
cout << " Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
cout << " Control model: " << models[controlIdx] << endl;
cout << " Model p-value rank win tie loss Status" << endl;
cout << " ============ ============ ========= === === ==== =============" << endl;
cout << " " << left << setw(maxModelName) << string("Model") << " p-value rank win tie loss Status" << endl;
cout << " " << string(maxModelName, '=') << " ============ ========= === === ==== =============" << endl;
// sort ranks from lowest to highest
vector<pair<string, float>> ranksOrder;
for (const auto& rank : ranks) {
@ -169,7 +171,7 @@ namespace platform {
auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
cout << " " << colorStatus << left << setw(12) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
cout << " " << colorStatus << left << setw(maxModelName) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss;
cout << " " << status << textStatus << endl;
}

View File

@ -32,6 +32,8 @@ namespace platform {
int controlIdx = 0;
map<int, WTL> wtl;
map<string, float> ranks;
int maxModelName = 0;
int maxDatasetName = 0;
};
}
#endif // !STATISTICS_H

View File

@ -14,6 +14,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true);
try {
program.parse_args(argc, argv);
auto model = program.get<string>("model");
@ -21,9 +22,25 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
auto friedman = program.get<bool>("friedman");
auto excel = program.get<bool>("excel");
if (model == "" || score == "") {
throw runtime_error("Model and score name must be supplied");
}
if (friedman && model != "any") {
cerr << "Friedman test can only be used with all models" << endl;
cerr << program;
exit(1);
}
if (excel && model != "any") {
cerr << "Excel ourput can only be used with all models" << endl;
cerr << program;
exit(1);
}
if (!report && !build) {
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
cerr << program;
exit(1);
}
}
catch (const exception& err) {
cerr << err.what() << endl;
@ -41,16 +58,7 @@ int main(int argc, char** argv)
auto build = program.get<bool>("build");
auto report = program.get<bool>("report");
auto friedman = program.get<bool>("friedman");
if (friedman && model != "any") {
cerr << "Friedman test can only be used with all models" << endl;
cerr << program;
exit(1);
}
if (!report && !build) {
cerr << "Either build, report or both, have to be selected to do anything!" << endl;
cerr << program;
exit(1);
}
auto excel = program.get<bool>("excel");
auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
if (build) {
if (model == "any") {
@ -62,7 +70,7 @@ int main(int argc, char** argv)
}
if (report) {
if (model == "any") {
results.reportAll();
results.reportAll(excel);
} else {
results.reportSingle();
}