Enhance output of Best results reports

2023-09-28 12:08:56 +02:00 · 2023-09-28 12:08:56 +02:00 · 82acb3cab5
commit 82acb3cab5
parent 623ceed396
9 changed files with 109 additions and 36 deletions
--- a/14
+++ b/14
@ -12,14 +12,28 @@ setup: ## Install dependencies for tests and coverage
 	fi

 dest ?= ../discretizbench
+dest2 ?= ../covbench
+dest3 ?= ../odtebench
 copy: ## Copy binary files to selected folder
 	@echo "Destination folder: $(dest)"
+	@echo "Destination folder: $(dest2)"
+	@echo "Destination folder: $(dest3)"
 	make build
 	@echo ">>> Copying files to $(dest)"
 	@cp build/src/Platform/main $(dest)
 	@cp build/src/Platform/list $(dest)
 	@cp build/src/Platform/manage $(dest)
 	@cp build/src/Platform/best $(dest)
+	@echo ">>> Copying files to $(dest2)"
+	@cp build/src/Platform/main $(dest2)
+	@cp build/src/Platform/list $(dest2)
+	@cp build/src/Platform/manage $(dest2)
+	@cp build/src/Platform/best $(dest2)
+	@echo ">>> Copying files to $(dest3)"
+	@cp build/src/Platform/main $(dest3)
+	@cp build/src/Platform/list $(dest3)
+	@cp build/src/Platform/manage $(dest3)
+	@cp build/src/Platform/best $(dest3)
 	@echo ">>> Done"

 dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
--- a/src/Platform/BestResults.cc
+++ b/src/Platform/BestResults.cc
@ -1,4 +1,5 @@
 #include <filesystem>
+#include <set>
 #include <fstream>
 #include <iostream>
 #include <sstream>
@ -6,7 +7,7 @@
 #include "Result.h"
 #include "Colors.h"
 #include "Statistics.h"
-
+#include "BestResultsExcel.h"


 namespace fs = std::filesystem;
@ -124,6 +125,14 @@ namespace platform {
        result = vector<string>(models.begin(), models.end());
        return result;
    }
+    vector<string> BestResults::getDatasets(json table)
+    {
+        vector<string> datasets;
+        for (const auto& dataset : table.items()) {
+            datasets.push_back(dataset.key());
+        }
+        return datasets;
+    }

    void BestResults::buildAll()
    {
@ -147,16 +156,18 @@ namespace platform {
        }
        auto date = ftime_to_string(filesystem::last_write_time(bestFileName));
        auto data = loadFile(bestFileName);
+        auto datasets = getDatasets(data);
+        int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
        cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl;
        cout << "--------------------------------------------------------" << endl;
-        cout << Colors::GREEN() << " #  Dataset                   Score       File                                                               Hyperparameters" << endl;
-        cout << "=== ========================= =========== ================================================================== ================================================= " << endl;
+        cout << Colors::GREEN() << " #  " << setw(maxDatasetName + 1) << left << string("Dataset") << "Score       File                                                               Hyperparameters" << endl;
+        cout << "=== " << string(maxDatasetName, '=') << " =========== ================================================================== ================================================= " << endl;
        auto i = 0;
        bool odd = true;
        for (auto const& item : data.items()) {
            auto color = odd ? Colors::BLUE() : Colors::CYAN();
            cout << color << setw(3) << fixed << right << i++ << " ";
-            cout << setw(25) << left << item.key() << " ";
+            cout << setw(maxDatasetName) << left << item.key() << " ";
            cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get<double>() << " ";
            cout << setw(66) << item.value().at(2).get<string>() << " ";
            cout << item.value().at(1) << " ";
@ -206,14 +217,14 @@ namespace platform {
    {
        cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get<string>() << endl;
        cout << "------------------------------------------------" << endl;
-        cout << Colors::GREEN() << " #  Dataset                   ";
+        cout << Colors::GREEN() << " #  " << setw(maxDatasetName + 1) << left << string("Dataset");
        for (const auto& model : models) {
-            cout << setw(12) << left << model << " ";
+            cout << setw(maxModelName) << left << model << " ";
        }
        cout << endl;
-        cout << "=== ========================= ";
+        cout << "=== " << string(maxDatasetName, '=') << " ";
        for (const auto& model : models) {
-            cout << "============ ";
+            cout << string(maxModelName, '=') << " ";
        }
        cout << endl;
        auto i = 0;
@ -227,7 +238,7 @@ namespace platform {
        for (auto const& item : origin.items()) {
            auto color = odd ? Colors::BLUE() : Colors::CYAN();
            cout << color << setw(3) << fixed << right << i++ << " ";
-            cout << setw(25) << left << item.key() << " ";
+            cout << setw(maxDatasetName) << left << item.key() << " ";
            double maxValue = 0;
            // Find out the max value for this dataset
            for (const auto& model : models) {
@ -244,17 +255,17 @@ namespace platform {
                    efectiveColor = Colors::RED();
                }
                totals[model] += value;
-                cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " ";
+                cout << efectiveColor << setw(maxModelName) << setprecision(maxModelName - 2) << fixed << value << " ";
            }
            cout << endl;
            odd = !odd;
        }
-        cout << Colors::GREEN() << "=== ========================= ";
+        cout << Colors::GREEN() << "=== " << string(maxDatasetName, '=') << " ";
        for (const auto& model : models) {
-            cout << "============ ";
+            cout << string(maxModelName, '=') << " ";
        }
        cout << endl;
-        cout << Colors::GREEN() << setw(30) << "    Totals...................";
+        cout << Colors::GREEN() << setw(5 + maxDatasetName) << "    Totals...................";
        double max = 0.0;
        for (const auto& total : totals) {
            if (total.second > max) {
@ -266,27 +277,32 @@ namespace platform {
            if (totals[model] == max) {
                efectiveColor = Colors::RED();
            }
-            cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " ";
+            cout << efectiveColor << right << setw(maxModelName) << setprecision(maxModelName - 4) << fixed << totals[model] << " ";
        }
        cout << endl;
    }
-    void BestResults::reportAll()
+    void BestResults::reportAll(bool excel)
    {
        auto models = getModels();
        // Build the table of results
        json table = buildTableResults(models);
+        vector<string> datasets = getDatasets(table.begin().value());
+        maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
+        maxModelName = max(12, maxModelName);
+        maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
+        maxDatasetName = max(25, maxDatasetName);
        // Print the table of results
        printTableResults(models, table);
        // Compute the Friedman test
        if (friedman) {
-            vector<string> datasets;
-            for (const auto& dataset : table.begin().value().items()) {
-                datasets.push_back(dataset.key());
-            }
            double significance = 0.05;
            Statistics stats(models, datasets, table, significance);
            auto result = stats.friedmanTest();
            stats.postHocHolmTest(result);
        }
+        if (excel) {
+            BestResultsExcel excel(models, datasets, table, friedman);
+            excel.build();
+        }
    }
 }
--- a/src/Platform/BestResults.h
+++ b/src/Platform/BestResults.h
@ -1,7 +1,6 @@
 #ifndef BESTRESULTS_H
 #define BESTRESULTS_H
 #include <string>
-#include <set>
 #include <nlohmann/json.hpp>
 using namespace std;
 using json = nlohmann::json;
@ -11,10 +10,11 @@ namespace platform {
        explicit BestResults(const string& path, const string& score, const string& model, bool friedman) : path(path), score(score), model(model), friedman(friedman) {}
        string build();
        void reportSingle();
-        void reportAll();
+        void reportAll(bool excel);
        void buildAll();
    private:
        vector<string> getModels();
+        vector<string> getDatasets(json table);
        vector<string> loadResultFiles();
        json buildTableResults(vector<string> models);
        void printTableResults(vector<string> models, json table);
@ -24,6 +24,8 @@ namespace platform {
        string score;
        string model;
        bool friedman;
+        int maxModelName = 0;
+        int maxDatasetName = 0;
    };
 }
 #endif //BESTRESULTS_H
--- a/src/Platform/BestResultsExcel.cc
+++ b/src/Platform/BestResultsExcel.cc
@ -0,0 +1,8 @@
+#include "BestResultsExcel.h"
+
+namespace platform {
+    void BestResultsExcel::build()
+    {
+
+    }
+}
--- a/src/Platform/BestResultsExcel.h
+++ b/src/Platform/BestResultsExcel.h
@ -0,0 +1,21 @@
+#ifndef BESTRESULTS_EXCEL_H
+#define BESTRESULTS_EXCEL_H
+#include <vector>
+#include <nlohmann/json.hpp>
+
+using namespace std;
+using json = nlohmann::json;
+
+namespace platform {
+    class BestResultsExcel {
+    public:
+        BestResultsExcel(vector<string> models, vector<string> datasets, json table, bool friedman) : models(models), datasets(datasets), table(table), friedman(friedman) {}
+        void build();
+    private:
+        vector<string> models;
+        vector<string> datasets;
+        json table;
+        bool friedman;
+    };
+}
+#endif //BESTRESULTS_EXCEL_H
--- a/src/Platform/CMakeLists.txt
+++ b/src/Platform/CMakeLists.txt
@ -8,7 +8,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include)
 add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc)
 add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc)
 add_executable(list list.cc platformUtils Datasets.cc)
-add_executable(best best.cc BestResults.cc Result.cc Statistics.cc)
+add_executable(best best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc)
 target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")
 if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
    target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs)
--- a/src/Platform/Statistics.cc
+++ b/src/Platform/Statistics.cc
@ -23,6 +23,8 @@ namespace platform {
        // Set the control model as the one with the lowest average rank
        controlIdx = distance(ranks.begin(), min_element(ranks.begin(), ranks.end(), [](const auto& l, const auto& r) { return l.second < r.second; }));
        computeWTL();
+        maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
+        maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size();
        fitted = true;
    }
    map<string, float> assignRanks(vector<pair<string, double>>& ranksOrder)
@ -145,8 +147,8 @@ namespace platform {
        cout << "  *************************************************************************************************************" << endl;
        cout << "  Post-hoc Holm test: H0: 'There is no significant differences between the control model and the other models.'" << endl;
        cout << "  Control model: " << models[controlIdx] << endl;
-        cout << "  Model        p-value      rank      win tie loss Status" << endl;
-        cout << "  ============ ============ ========= === === ==== =============" << endl;
+        cout << "  " << left << setw(maxModelName) << string("Model") << " p-value      rank      win tie loss Status" << endl;
+        cout << "  " << string(maxModelName, '=') << " ============ ========= === === ==== =============" << endl;
        // sort ranks from lowest to highest
        vector<pair<string, float>> ranksOrder;
        for (const auto& rank : ranks) {
@ -169,7 +171,7 @@ namespace platform {
            auto colorStatus = pvalue > significance ? Colors::GREEN() : Colors::MAGENTA();
            auto status = pvalue > significance ? Symbols::check_mark : Symbols::cross;
            auto textStatus = pvalue > significance ? " accepted H0" : " rejected H0";
-            cout << "  " << colorStatus << left << setw(12) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
+            cout << "  " << colorStatus << left << setw(maxModelName) << item.first << " " << setprecision(6) << scientific << pvalue << setprecision(7) << fixed << " " << item.second;
            cout << " " << right << setw(3) << wtl.at(idx).win << " " << setw(3) << wtl.at(idx).tie << " " << setw(4) << wtl.at(idx).loss;
            cout << " " << status << textStatus << endl;
        }
--- a/src/Platform/Statistics.h
+++ b/src/Platform/Statistics.h
@ -32,6 +32,8 @@ namespace platform {
        int controlIdx = 0;
        map<int, WTL> wtl;
        map<string, float> ranks;
+        int maxModelName = 0;
+        int maxDatasetName = 0;
    };
 }
 #endif // !STATISTICS_H
--- a/src/Platform/best.cc
+++ b/src/Platform/best.cc
@ -14,6 +14,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
    program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true);
    program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true);
    program.add_argument("--friedman").help("Friedman test").default_value(false).implicit_value(true);
+    program.add_argument("--excel").help("Output to excel").default_value(false).implicit_value(true);
    try {
        program.parse_args(argc, argv);
        auto model = program.get<string>("model");
@ -21,9 +22,25 @@ argparse::ArgumentParser manageArguments(int argc, char** argv)
        auto build = program.get<bool>("build");
        auto report = program.get<bool>("report");
        auto friedman = program.get<bool>("friedman");
+        auto excel = program.get<bool>("excel");
        if (model == "" || score == "") {
            throw runtime_error("Model and score name must be supplied");
        }
+        if (friedman && model != "any") {
+            cerr << "Friedman test can only be used with all models" << endl;
+            cerr << program;
+            exit(1);
+        }
+        if (excel && model != "any") {
+            cerr << "Excel ourput can only be used with all models" << endl;
+            cerr << program;
+            exit(1);
+        }
+        if (!report && !build) {
+            cerr << "Either build, report or both, have to be selected to do anything!" << endl;
+            cerr << program;
+            exit(1);
+        }
    }
    catch (const exception& err) {
        cerr << err.what() << endl;
@ -41,16 +58,7 @@ int main(int argc, char** argv)
    auto build = program.get<bool>("build");
    auto report = program.get<bool>("report");
    auto friedman = program.get<bool>("friedman");
-    if (friedman && model != "any") {
-        cerr << "Friedman test can only be used with all models" << endl;
-        cerr << program;
-        exit(1);
-    }
-    if (!report && !build) {
-        cerr << "Either build, report or both, have to be selected to do anything!" << endl;
-        cerr << program;
-        exit(1);
-    }
+    auto excel = program.get<bool>("excel");
    auto results = platform::BestResults(platform::Paths::results(), score, model, friedman);
    if (build) {
        if (model == "any") {
@ -62,7 +70,7 @@ int main(int argc, char** argv)
    }
    if (report) {
        if (model == "any") {
-            results.reportAll();
+            results.reportAll(excel);
        } else {
            results.reportSingle();
        }