From fa366a4c22cbfee1b034fe93b8f322515420e7eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Sat, 16 Mar 2024 13:48:49 +0100 Subject: [PATCH] Convert DatasetsConsole & ResultsDatasetConsole to string output --- src/CMakeLists.txt | 8 +-- src/commands/b_list.cpp | 4 +- src/manage/ManageResults.cpp | 2 +- src/reports/DatasetsConsole.cpp | 56 ++++++++++++++++ src/reports/DatasetsConsole.h | 32 +++++++++ src/reports/DatasetsConsole.hpp | 78 ---------------------- src/results/ResultsDatasetConsole.cpp | 79 ++++++++++++++++++++++ src/results/ResultsDatasetConsole.h | 27 ++++++++ src/results/ResultsDatasetConsole.hpp | 95 --------------------------- 9 files changed, 201 insertions(+), 180 deletions(-) create mode 100644 src/reports/DatasetsConsole.cpp create mode 100644 src/reports/DatasetsConsole.h delete mode 100644 src/reports/DatasetsConsole.hpp create mode 100644 src/results/ResultsDatasetConsole.cpp create mode 100644 src/results/ResultsDatasetConsole.h delete mode 100644 src/results/ResultsDatasetConsole.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1d39579..cf111dd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,8 +40,8 @@ target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet add_executable(b_list commands/b_list.cpp ${list_sources} common/Datasets.cpp common/Dataset.cpp main/Models.cpp - reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp - results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp + reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp + results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp ) target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}") @@ -61,7 +61,7 @@ list(TRANSFORM manage_sources PREPEND manage/) add_executable( b_manage commands/b_manage.cpp ${manage_sources} common/Datasets.cpp common/Dataset.cpp - reports/ReportConsole.cpp reports/ReportExcel.cpp reports/ReportExcelCompared.cpp reports/ReportBase.cpp reports/ExcelFile.cpp - results/Result.cpp + reports/ReportConsole.cpp reports/ReportExcel.cpp reports/ReportExcelCompared.cpp reports/ReportBase.cpp reports/ExcelFile.cpp reports/DatasetsConsole.cpp + results/Result.cpp results/ResultsDatasetConsole.cpp results/ResultsDataset.cpp ) target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) diff --git a/src/commands/b_list.cpp b/src/commands/b_list.cpp index 5c49ce8..38dc199 100644 --- a/src/commands/b_list.cpp +++ b/src/commands/b_list.cpp @@ -9,10 +9,10 @@ #include "common/Colors.h" #include "common/Datasets.h" #include "reports/DatasetsExcel.h" -#include "reports/DatasetsConsole.hpp" +#include "reports/DatasetsConsole.h" #include "results/ResultsDataset.h" #include "results/ResultsDatasetExcel.h" -#include "results/ResultsDatasetConsole.hpp" +#include "results/ResultsDatasetConsole.h" #include "config.h" diff --git a/src/manage/ManageResults.cpp b/src/manage/ManageResults.cpp index 9aa4b06..d09127d 100644 --- a/src/manage/ManageResults.cpp +++ b/src/manage/ManageResults.cpp @@ -5,7 +5,7 @@ #include "common/Paths.h" #include "CommandParser.h" #include "ManageResults.h" -// #include "reports/DatasetsConsole.hpp" +//#include "reports/DatasetsConsole.h" #include "reports/ReportConsole.h" #include "reports/ReportExcel.h" #include "reports/ReportExcelCompared.h" diff --git a/src/reports/DatasetsConsole.cpp b/src/reports/DatasetsConsole.cpp new file mode 100644 index 0000000..e87b2a6 --- /dev/null +++ b/src/reports/DatasetsConsole.cpp @@ -0,0 +1,56 @@ +#include "common/Colors.h" +#include "common/Datasets.h" +#include "common/Paths.h" +#include "DatasetsConsole.h" + +namespace platform { + const int DatasetsConsole::BALANCE_LENGTH = 75; + std::string DatasetsConsole::outputBalance(const std::string& balance) + { + auto temp = std::string(balance); + while (temp.size() > DatasetsConsole::BALANCE_LENGTH - 1) { + auto part = temp.substr(0, DatasetsConsole::BALANCE_LENGTH); + output << part << std::endl; + output << setw(52) << " "; + temp = temp.substr(DatasetsConsole::BALANCE_LENGTH); + } + return temp; + } + void DatasetsConsole::list_datasets() + { + auto datasets = platform::Datasets(false, platform::Paths::datasets()); + locale mylocale(std::cout.getloc(), new separated_datasets); + locale::global(mylocale); + output.imbue(mylocale); + output << Colors::GREEN() << " # Dataset Sampl. Feat. Cls Balance" << std::endl; + std::string balanceBars = std::string(DatasetsConsole::BALANCE_LENGTH, '='); + output << "=== ============================== ====== ===== === " << balanceBars << std::endl; + int num = 0; + for (const auto& dataset : datasets.getNames()) { + auto color = num % 2 ? Colors::CYAN() : Colors::BLUE(); + output << color << setw(3) << right << num++ << " "; + output << setw(30) << left << dataset << " "; + datasets.loadDataset(dataset); + auto nSamples = datasets.getNSamples(dataset); + output << setw(6) << right << nSamples << " "; + output << setw(5) << right << datasets.getFeatures(dataset).size() << " "; + output << setw(3) << right << datasets.getNClasses(dataset) << " "; + std::stringstream oss; + std::string sep = ""; + for (auto number : datasets.getClassesCounts(dataset)) { + oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; + sep = " / "; + } + auto balance = outputBalance(oss.str()); + output << balance << std::endl; + // Store data for Excel report + data[dataset] = json::object(); + data[dataset]["samples"] = nSamples; + data[dataset]["features"] = datasets.getFeatures(dataset).size(); + data[dataset]["classes"] = datasets.getNClasses(dataset); + data[dataset]["balance"] = oss.str(); + } + numLines = num + 2; + } +} + diff --git a/src/reports/DatasetsConsole.h b/src/reports/DatasetsConsole.h new file mode 100644 index 0000000..69aa9c6 --- /dev/null +++ b/src/reports/DatasetsConsole.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + +namespace platform { + using json = nlohmann::json; + + struct separated_datasets : numpunct { + char do_decimal_point() const { return ','; } + char do_thousands_sep() const { return '.'; } + std::string do_grouping() const { return "\03"; } + }; + + class DatasetsConsole { + public: + static const int BALANCE_LENGTH; + DatasetsConsole() = default; + ~DatasetsConsole() = default; + std::string getOutput() const { return output.str(); } + int getNumLines() const { return numLines; } + json& getData() { return data; } + std::string outputBalance(const std::string& balance); + void list_datasets(); + private: + std::stringstream output; + json data; + int numLines = 0; + }; +} + diff --git a/src/reports/DatasetsConsole.hpp b/src/reports/DatasetsConsole.hpp deleted file mode 100644 index c1ec0e8..0000000 --- a/src/reports/DatasetsConsole.hpp +++ /dev/null @@ -1,78 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include "common/Colors.h" -#include "common/Datasets.h" - -namespace platform { - const int BALANCE_LENGTH = 75; - struct separated_datasets : numpunct { - char do_decimal_point() const { return ','; } - char do_thousands_sep() const { return '.'; } - std::string do_grouping() const { return "\03"; } - }; - - class DatasetsConsole { - public: - DatasetsConsole() = default; - ~DatasetsConsole() = default; - std::string getOutput() const { return output.str(); } - int getNumLines() const { return numLines; } - json& getData() { return data; } - std::string outputBalance(const std::string& balance) - { - auto temp = std::string(balance); - while (temp.size() > BALANCE_LENGTH - 1) { - auto part = temp.substr(0, BALANCE_LENGTH); - output << part << std::endl; - output << setw(52) << " "; - temp = temp.substr(BALANCE_LENGTH); - } - return temp; - } - void list_datasets() - { - auto datasets = platform::Datasets(false, platform::Paths::datasets()); - locale mylocale(std::cout.getloc(), new separated_datasets); - locale::global(mylocale); - output.imbue(mylocale); - output << Colors::GREEN() << " # Dataset Sampl. Feat. Cls Balance" << std::endl; - std::string balanceBars = std::string(BALANCE_LENGTH, '='); - output << "=== ============================== ====== ===== === " << balanceBars << std::endl; - int num = 0; - for (const auto& dataset : datasets.getNames()) { - auto color = num % 2 ? Colors::CYAN() : Colors::BLUE(); - output << color << setw(3) << right << num++ << " "; - output << setw(30) << left << dataset << " "; - datasets.loadDataset(dataset); - auto nSamples = datasets.getNSamples(dataset); - output << setw(6) << right << nSamples << " "; - output << setw(5) << right << datasets.getFeatures(dataset).size() << " "; - output << setw(3) << right << datasets.getNClasses(dataset) << " "; - std::stringstream oss; - std::string sep = ""; - for (auto number : datasets.getClassesCounts(dataset)) { - oss << sep << std::setprecision(2) << fixed << (float)number / nSamples * 100.0 << "% (" << number << ")"; - sep = " / "; - } - auto balance = outputBalance(oss.str()); - output << balance << std::endl; - // Store data for Excel report - data[dataset] = json::object(); - data[dataset]["samples"] = nSamples; - data[dataset]["features"] = datasets.getFeatures(dataset).size(); - data[dataset]["classes"] = datasets.getNClasses(dataset); - data[dataset]["balance"] = oss.str(); - } - numLines = num + 2; - } - private: - std::stringstream output; - json data; - int numLines = 0; - }; -} - diff --git a/src/results/ResultsDatasetConsole.cpp b/src/results/ResultsDatasetConsole.cpp new file mode 100644 index 0000000..34dd8f2 --- /dev/null +++ b/src/results/ResultsDatasetConsole.cpp @@ -0,0 +1,79 @@ + +#include "common/Colors.h" +#include "results/ResultsDataset.h" +#include "ResultsDatasetConsole.h" +namespace platform { + void ResultsDatasetsConsole::list_results(const std::string& dataset, const std::string& score, const std::string& model) + { + output.str(""); + auto results = platform::ResultsDataset(dataset, model, score); + results.load(); + results.sortModel(); + if (results.empty()) { + output << Colors::RED() << "No results found for dataset " << dataset << " and model " << model << Colors::RESET() << std::endl; + return; + } + int maxModel = results.maxModelSize(); + int maxHyper = results.maxHyperSize(); + double maxResult = results.maxResultScore(); + // Build data for the Report + json data = json::object(); + data["results"] = json::array(); + data["max_models"] = json::object(); // Max score per model + for (const auto& result : results) { + auto results = result.getData(); + if (!data["max_models"].contains(result.getModel())) { + data["max_models"][result.getModel()] = 0; + } + for (const auto& item : results["results"]) { + if (item["dataset"] == dataset) { + + // Store data for Excel report + json res = json::object(); + res["date"] = result.getDate(); + res["time"] = result.getTime(); + res["model"] = result.getModel(); + res["score"] = item["score"].get(); + res["hyperparameters"] = item["hyperparameters"].dump(); + data["results"].push_back(res); + if (item["score"].get() > data["max_models"][result.getModel()]) { + data["max_models"][result.getModel()] = item["score"].get(); + } + break; + } + } + } + // + // List the results + // + output << Colors::GREEN() << "Results of dataset " << dataset << " - for " << model << " model" << std::endl; + output << "There are " << results.size() << " results" << std::endl; + output << Colors::GREEN() << " # " << std::setw(maxModel + 1) << std::left << "Model" << "Date Time Score Hyperparameters" << std::endl; + output << "=== " << std::string(maxModel, '=') << " ========== ======== =========== " << std::string(maxHyper, '=') << std::endl; + numLines = 4; + auto i = 0; + for (const auto& item : data["results"]) { + auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN(); + auto score = item["score"].get(); + color = score == data["max_models"][item["model"].get()] ? Colors::YELLOW() : color; + color = score == maxResult ? Colors::RED() : color; + output << color << std::setw(3) << std::fixed << std::right << i++ << " "; + output << std::setw(maxModel) << std::left << item["model"].get() << " "; + output << color << item["date"].get() << " "; + output << color << item["time"].get() << " "; + output << std::setw(11) << std::setprecision(9) << std::fixed << score << " "; + output << item["hyperparameters"].get() << std::endl; + numLines++; + } + data["dataset"] = dataset; + data["score"] = score; + data["model"] = model; + data["lengths"]["maxModel"] = maxModel; + data["lengths"]["maxHyper"] = maxHyper; + data["maxResult"] = maxResult; + } +} + + + + diff --git a/src/results/ResultsDatasetConsole.h b/src/results/ResultsDatasetConsole.h new file mode 100644 index 0000000..3a87838 --- /dev/null +++ b/src/results/ResultsDatasetConsole.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include +#include +#include "results/ResultsDataset.h" + +namespace platform { + class ResultsDatasetsConsole { + public: + ResultsDatasetsConsole() = default; + ~ResultsDatasetsConsole() = default; + std::string getOutput() const { return output.str(); } + int getNumLines() const { return numLines; } + json getData() { return data; } + void list_results(const std::string& dataset, const std::string& score, const std::string& model); + private: + std::stringstream output; + json data; + int numLines = 0; + }; +} + + + + diff --git a/src/results/ResultsDatasetConsole.hpp b/src/results/ResultsDatasetConsole.hpp deleted file mode 100644 index 3d9fa91..0000000 --- a/src/results/ResultsDatasetConsole.hpp +++ /dev/null @@ -1,95 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include "common/Colors.h" -#include "results/ResultsDataset.h" - -namespace platform { - class ResultsDatasetsConsole { - public: - ResultsDatasetsConsole() = default; - ~ResultsDatasetsConsole() = default; - std::string getOutput() const { return output.str(); } - int getNumLines() const { return numLines; } - json& getData() { return data; } - void list_results(const std::string& dataset, const std::string& score, const std::string& model) - { - auto results = platform::ResultsDataset(dataset, model, score); - results.load(); - results.sortModel(); - if (results.empty()) { - std::cerr << Colors::RED() << "No results found for dataset " << dataset << " and model " << model << Colors::RESET() << std::endl; - exit(1); - } - int maxModel = results.maxModelSize(); - int maxHyper = results.maxHyperSize(); - double maxResult = results.maxResultScore(); - // Build data for the Report - json data = json::object(); - data["results"] = json::array(); - data["max_models"] = json::object(); // Max score per model - for (const auto& result : results) { - auto results = result.getData(); - if (!data["max_models"].contains(result.getModel())) { - data["max_models"][result.getModel()] = 0; - } - for (const auto& item : results["results"]) { - if (item["dataset"] == dataset) { - - // Store data for Excel report - json res = json::object(); - res["date"] = result.getDate(); - res["time"] = result.getTime(); - res["model"] = result.getModel(); - res["score"] = item["score"].get(); - res["hyperparameters"] = item["hyperparameters"].dump(); - data["results"].push_back(res); - if (item["score"].get() > data["max_models"][result.getModel()]) { - data["max_models"][result.getModel()] = item["score"].get(); - } - break; - } - } - } - // - // List the results - // - output << Colors::GREEN() << "Results of dataset " << dataset << " - for " << model << " model" << std::endl; - output << "There are " << results.size() << " results" << std::endl; - output << Colors::GREEN() << " # " << std::setw(maxModel + 1) << std::left << "Model" << "Date Time Score Hyperparameters" << std::endl; - output << "=== " << std::string(maxModel, '=') << " ========== ======== =========== " << std::string(maxHyper, '=') << std::endl; - numLines = 4; - auto i = 0; - for (const auto& item : data["results"]) { - auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN(); - auto score = item["score"].get(); - color = score == data["max_models"][item["model"].get()] ? Colors::YELLOW() : color; - color = score == maxResult ? Colors::RED() : color; - output << color << std::setw(3) << std::fixed << std::right << i++ << " "; - output << std::setw(maxModel) << std::left << item["model"].get() << " "; - output << color << item["date"].get() << " "; - output << color << item["time"].get() << " "; - output << std::setw(11) << std::setprecision(9) << std::fixed << score << " "; - output << item["hyperparameters"].get() << std::endl; - numLines++; - } - data["dataset"] = dataset; - data["score"] = score; - data["model"] = model; - data["lengths"]["maxModel"] = maxModel; - data["lengths"]["maxHyper"] = maxHyper; - data["maxResult"] = maxResult; - } - private: - std::stringstream output; - json data; - int numLines = 0; - }; -} - - - -