From 38423048bd20e51290d346d33870dd1ed36e09aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 19 Oct 2023 18:12:55 +0200 Subject: [PATCH] Add excel to best report of model --- src/Platform/BestResults.cc | 71 ++++++++++++++++++++------- src/Platform/BestResults.h | 9 +++- src/Platform/BestResultsExcel.cc | 83 +++++++++++++++++++++++++++++--- src/Platform/BestResultsExcel.h | 6 ++- src/Platform/b_best.cc | 44 +++++++---------- 5 files changed, 159 insertions(+), 54 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 94c08a3..27c4fd1 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -27,7 +27,6 @@ std::string ftime_to_string(TP tp) return buffer.str(); } namespace platform { - string BestResults::build() { auto files = loadResultFiles(); @@ -65,12 +64,10 @@ namespace platform { file.close(); return bestFileName; } - string BestResults::bestResultFile() { return "best_results_" + score + "_" + model + ".json"; } - pair getModelScore(string name) { // results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json @@ -82,7 +79,6 @@ namespace platform { string model = name.substr(pos2 + 1, pos - pos2 - 1); return { model, score }; } - vector BestResults::loadResultFiles() { vector files; @@ -99,7 +95,6 @@ namespace platform { } return files; } - json BestResults::loadFile(const string& fileName) { ifstream resultData(fileName); @@ -136,7 +131,6 @@ namespace platform { } return datasets; } - void BestResults::buildAll() { auto models = getModels(); @@ -147,8 +141,7 @@ namespace platform { } model = "any"; } - - void BestResults::reportSingle() + void BestResults::listFile() { string bestFileName = path + bestResultFile(); if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { @@ -162,22 +155,29 @@ namespace platform { auto data = loadFile(bestFileName); auto datasets = getDatasets(data); int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); - cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl; - cout << "--------------------------------------------------------" << endl; + stringstream oss; + oss << Colors::GREEN() << "Best results for " << model << " as of " << date << endl; + cout << oss.str(); + cout << string(oss.str().size() - 8, '-') << endl; cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << string("Dataset") << "Score File Hyperparameters" << endl; cout << "=== " << string(maxDatasetName, '=') << " =========== ================================================================== ================================================= " << endl; auto i = 0; bool odd = true; + double total = 0; for (auto const& item : data.items()) { auto color = odd ? Colors::BLUE() : Colors::CYAN(); + double value = item.value().at(0).get(); cout << color << setw(3) << fixed << right << i++ << " "; cout << setw(maxDatasetName) << left << item.key() << " "; - cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get() << " "; + cout << setw(11) << setprecision(9) << fixed << value << " "; cout << setw(66) << item.value().at(2).get() << " "; cout << item.value().at(1) << " "; cout << endl; + total += value; odd = !odd; } + cout << Colors::GREEN() << "=== " << string(maxDatasetName, '=') << " ===========" << endl; + cout << setw(5 + maxDatasetName) << "Total.................. " << setw(11) << setprecision(8) << fixed << total << endl; } json BestResults::buildTableResults(vector models) { @@ -202,11 +202,12 @@ namespace platform { table["dateTable"] = ftime_to_string(maxDate); return table; } - void BestResults::printTableResults(vector models, json table) { - cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get() << endl; - cout << "------------------------------------------------" << endl; + stringstream oss; + oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get() << endl; + cout << oss.str(); + cout << string(oss.str().size() - 8, '-') << endl; cout << Colors::GREEN() << " # " << setw(maxDatasetName + 1) << left << string("Dataset"); for (const auto& model : models) { cout << setw(maxModelName) << left << model << " "; @@ -271,6 +272,19 @@ namespace platform { } cout << endl; } + void BestResults::reportSingle(bool excel) + { + listFile(); + if (excel) { + auto models = getModels(); + // Build the table of results + json table = buildTableResults(models); + vector datasets = getDatasets(table.begin().value()); + BestResultsExcel excel(score, datasets); + excel.reportSingle(model, path + bestResultFile()); + messageExcelFile(excel.getFileName()); + } + } void BestResults::reportAll(bool excel) { auto models = getModels(); @@ -292,9 +306,32 @@ namespace platform { ranksModels = stats.getRanks(); } if (excel) { - BestResultsExcel excel(score, models, datasets, table, ranksModels, friedman, significance); - excel.build(); - cout << Colors::YELLOW() << "** Excel file generated: " << excel.getFileName() << Colors::RESET() << endl; + BestResultsExcel excel(score, datasets); + excel.reportAll(models, table, ranksModels, friedman, significance); + if (friedman) { + int idx = -1; + double min = 2000; + // Find out the control model + auto totals = vector(models.size(), 0.0); + for (const auto& dataset : datasets) { + for (int i = 0; i < models.size(); ++i) { + totals[i] += ranksModels[dataset][models[i]]; + } + } + for (int i = 0; i < models.size(); ++i) { + if (totals[i] < min) { + min = totals[i]; + idx = i; + } + } + model = models.at(idx); + excel.reportSingle(model, path + bestResultFile()); + } + messageExcelFile(excel.getFileName()); } } + void BestResults::messageExcelFile(const string& fileName) + { + cout << Colors::YELLOW() << "** Excel file generated: " << fileName << Colors::RESET() << endl; + } } \ No newline at end of file diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h index f830e01..2406d33 100644 --- a/src/Platform/BestResults.h +++ b/src/Platform/BestResults.h @@ -7,19 +7,24 @@ using json = nlohmann::json; namespace platform { class BestResults { public: - explicit BestResults(const string& path, const string& score, const string& model, bool friedman, double significance = 0.05) : path(path), score(score), model(model), friedman(friedman), significance(significance) {} + explicit BestResults(const string& path, const string& score, const string& model, bool friedman, double significance = 0.05) + : path(path), score(score), model(model), friedman(friedman), significance(significance) + { + } string build(); - void reportSingle(); + void reportSingle(bool excel); void reportAll(bool excel); void buildAll(); private: vector getModels(); vector getDatasets(json table); vector loadResultFiles(); + void messageExcelFile(const string& fileName); json buildTableResults(vector models); void printTableResults(vector models, json table); string bestResultFile(); json loadFile(const string& fileName); + void listFile(); string path; string score; string model; diff --git a/src/Platform/BestResultsExcel.cc b/src/Platform/BestResultsExcel.cc index 39675d9..fb711ce 100644 --- a/src/Platform/BestResultsExcel.cc +++ b/src/Platform/BestResultsExcel.cc @@ -4,18 +4,87 @@ #include "Statistics.h" namespace platform { - BestResultsExcel::BestResultsExcel(const string& score, const vector& models, const vector& datasets, const json& table, const map>& ranksModels, bool friedman, double significance) : - score(score), models(models), datasets(datasets), table(table), ranksModels(ranksModels), friedman(friedman), significance(significance) + BestResultsExcel::BestResultsExcel(const string& score, const vector& datasets) : score(score), datasets(datasets) { workbook = workbook_new((Paths::excel() + fileName).c_str()); - worksheet = workbook_add_worksheet(workbook, "Best Results"); setProperties("Best Results"); - createFormats(); - int maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); - modelNameSize = max(modelNameSize, maxModelName); int maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); datasetNameSize = max(datasetNameSize, maxDatasetName); + createFormats(); + } + void BestResultsExcel::reportAll(const vector& models, const json& table, const map>& ranks, bool friedman, double significance) + { + this->table = table; + this->models = models; + ranksModels = ranks; + this->friedman = friedman; + this->significance = significance; + worksheet = workbook_add_worksheet(workbook, "Best Results"); + int maxModelName = (*max_element(models.begin(), models.end(), [](const string& a, const string& b) { return a.size() < b.size(); })).size(); + modelNameSize = max(modelNameSize, maxModelName); formatColumns(); + build(); + } + void BestResultsExcel::reportSingle(const string& model, const string& fileName) + { + worksheet = workbook_add_worksheet(workbook, "Report"); + if (FILE* fileTest = fopen(fileName.c_str(), "r")) { + fclose(fileTest); + } else { + cerr << "File " << fileName << " doesn't exist." << endl; + exit(1); + } + json data; + ifstream resultData(fileName); + if (resultData.is_open()) { + data = json::parse(resultData); + } else { + throw invalid_argument("Unable to open result file. [" + fileName + "]"); + } + string title = "Best results for " + model; + worksheet_merge_range(worksheet, 0, 0, 0, 4, title.c_str(), styles["headerFirst"]); + // Body header + row = 3; + int col = 1; + writeString(row, 0, "NÂș", "bodyHeader"); + writeString(row, 1, "Dataset", "bodyHeader"); + writeString(row, 2, "Score", "bodyHeader"); + writeString(row, 3, "File", "bodyHeader"); + writeString(row, 4, "Hyperparameters", "bodyHeader"); + auto i = 0; + string hyperparameters; + int hypSize = 0; + for (auto const& item : data.items()) { + row++; + writeInt(row, 0, i++, "ints"); + writeString(row, 1, item.key().c_str(), "text"); + writeDouble(row, 2, item.value().at(0).get(), "result"); + writeString(row, 3, item.value().at(2).get(), "text"); + try { + hyperparameters = item.value().at(1).get(); + } + catch (const exception& err) { + stringstream oss; + oss << item.value().at(1); + hyperparameters = oss.str(); + } + if (hyperparameters.size() > hypSize) { + hypSize = hyperparameters.size(); + } + writeString(row, 4, hyperparameters, "text"); + } + row++; + // Set Totals + writeString(row, 1, "Total", "bodyHeader"); + stringstream oss; + oss << "=sum(indirect(address(5, 3)):indirect(address(" << row << ", 3)))"; + worksheet_write_formula(worksheet, row, 2, oss.str().c_str(), styles["bodyHeader_odd"]); + // Set format + worksheet_freeze_panes(worksheet, 4, 2); + vector columns_sizes = { 5, datasetNameSize, modelNameSize, 66, hypSize + 1 }; + for (int i = 0; i < columns_sizes.size(); ++i) { + worksheet_set_column(worksheet, i, i, columns_sizes.at(i), NULL); + } } BestResultsExcel::~BestResultsExcel() { @@ -99,7 +168,7 @@ namespace platform { int col = 1; for (const auto& model : models) { stringstream oss; - oss << "=sum(indirect(address(" << 5 << "," << col + 2 << ")):indirect(address(" << row - 1 << "," << col + 2 << ")))/" << datasets.size(); + oss << "=sum(indirect(address(5, " << col + 2 << ")):indirect(address(" << row - 1 << "," << col + 2 << ")))/" << datasets.size(); worksheet_write_formula(worksheet, row, ++col, oss.str().c_str(), styles["bodyHeader_odd"]); } } diff --git a/src/Platform/BestResultsExcel.h b/src/Platform/BestResultsExcel.h index c31c61f..cba334c 100644 --- a/src/Platform/BestResultsExcel.h +++ b/src/Platform/BestResultsExcel.h @@ -12,11 +12,13 @@ namespace platform { class BestResultsExcel : ExcelFile { public: - BestResultsExcel(const string& score, const vector& models, const vector& datasets, const json& table, const map>& ranks, bool friedman, double significance); + BestResultsExcel(const string& score, const vector& datasets); ~BestResultsExcel(); - void build(); + void reportAll(const vector& models, const json& table, const map>& ranks, bool friedman, double significance); + void reportSingle(const string& model, const string& fileName); string getFileName(); private: + void build(); void header(bool ranks); void body(bool ranks); void footer(bool ranks); diff --git a/src/Platform/b_best.cc b/src/Platform/b_best.cc index 10aed7e..b23b3db 100644 --- a/src/Platform/b_best.cc +++ b/src/Platform/b_best.cc @@ -29,15 +29,24 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) catch (...) { throw runtime_error("Number of folds must be an decimal number"); }}); + return program; +} + +int main(int argc, char** argv) +{ + auto program = manageArguments(argc, argv); + string model, score; + bool build, report, friedman, excel; + double level; try { program.parse_args(argc, argv); - auto model = program.get("model"); - auto score = program.get("score"); - auto build = program.get("build"); - auto report = program.get("report"); - auto friedman = program.get("friedman"); - auto excel = program.get("excel"); - auto level = program.get("level"); + model = program.get("model"); + score = program.get("score"); + build = program.get("build"); + report = program.get("report"); + friedman = program.get("friedman"); + excel = program.get("excel"); + level = program.get("level"); if (model == "" || score == "") { throw runtime_error("Model and score name must be supplied"); } @@ -46,11 +55,6 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) cerr << program; exit(1); } - if (excel && model != "any") { - cerr << "Excel ourput can only be used with all models" << endl; - cerr << program; - exit(1); - } if (!report && !build) { cerr << "Either build, report or both, have to be selected to do anything!" << endl; cerr << program; @@ -62,19 +66,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) cerr << program; exit(1); } - return program; -} - -int main(int argc, char** argv) -{ - auto program = manageArguments(argc, argv); - auto model = program.get("model"); - auto score = program.get("score"); - auto build = program.get("build"); - auto report = program.get("report"); - auto friedman = program.get("friedman"); - auto excel = program.get("excel"); - auto level = program.get("level"); + // Generate report auto results = platform::BestResults(platform::Paths::results(), score, model, friedman, level); if (build) { if (model == "any") { @@ -88,7 +80,7 @@ int main(int argc, char** argv) if (model == "any") { results.reportAll(excel); } else { - results.reportSingle(); + results.reportSingle(excel); } } return 0;