#include #include #include #include #include #include #include "common/Colors.h" #include "common/CLocale.h" #include "common/Paths.h" #include "results/Result.h" #include "BestResultsExcel.h" #include "best/Statistics.h" #include "BestResults.h" namespace fs = std::filesystem; // function ftime_to_std::string, Code taken from // https://stackoverflow.com/a/58237530/1389271 template std::string ftime_to_string(TP tp) { auto sctp = std::chrono::time_point_cast(tp - TP::clock::now() + std::chrono::system_clock::now()); auto tt = std::chrono::system_clock::to_time_t(sctp); std::tm* gmt = std::gmtime(&tt); std::stringstream buffer; buffer << std::put_time(gmt, "%Y-%m-%d %H:%M"); return buffer.str(); } namespace platform { std::string BestResults::build() { auto files = loadResultFiles(); if (files.size() == 0) { std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl; exit(1); } json bests; for (const auto& file : files) { auto result = Result(); result.load(path, file); auto data = result.getJson(); for (auto const& item : data.at("results")) { bool update = true; auto datasetName = item.at("dataset").get(); if (dataset != "any" && dataset != datasetName) { continue; } if (bests.contains(datasetName)) { if (item.at("score").get() < bests[datasetName].at(0).get()) { update = false; } } if (update) { bests[datasetName] = { item.at("score").get(), item.at("hyperparameters"), file, item.at("score_std").get() }; } } } if (bests.empty()) { std::cerr << Colors::MAGENTA() << "No results found for model " << model << " and score " << score << Colors::RESET() << std::endl; exit(1); } std::string bestFileName = path + Paths::bestResultsFile(score, model); std::ofstream file(bestFileName); file << bests; file.close(); return bestFileName; } std::pair getModelScore(std::string name) { // results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json auto pos = name.find("_"); auto pos2 = name.find("_", pos + 1); std::string score = name.substr(pos + 1, pos2 - pos - 1); pos = name.find("_", pos2 + 1); std::string model = name.substr(pos2 + 1, pos - pos2 - 1); return { model, score }; } std::vector BestResults::loadResultFiles() { std::vector files; using std::filesystem::directory_iterator; std::string fileModel, fileScore; for (const auto& file : directory_iterator(path)) { auto fileName = file.path().filename().string(); if (fileName.find(".json") != std::string::npos && fileName.find("results_") == 0) { tie(fileModel, fileScore) = getModelScore(fileName); if (score == fileScore && (model == fileModel || model == "any")) { files.push_back(fileName); } } } std::sort(files.begin(), files.end()); return files; } json BestResults::loadFile(const std::string& fileName) { std::ifstream resultData(fileName); if (resultData.is_open()) { json data = json::parse(resultData); return data; } throw std::invalid_argument("Unable to open result file. [" + fileName + "]"); } std::vector BestResults::getModels() { std::set models; std::vector result; auto files = loadResultFiles(); if (files.size() == 0) { std::cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << std::endl; exit(1); } std::string fileModel, fileScore; for (const auto& file : files) { // extract the model from the file name tie(fileModel, fileScore) = getModelScore(file); // add the model to the std::vector of models models.insert(fileModel); } result = std::vector(models.begin(), models.end()); maxModelName = (*max_element(result.begin(), result.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); maxModelName = std::max(12, maxModelName); return result; } std::vector BestResults::getDatasets(json table) { std::vector datasets; for (const auto& dataset_ : table.items()) { datasets.push_back(dataset_.key()); } maxDatasetName = (*max_element(datasets.begin(), datasets.end(), [](const std::string& a, const std::string& b) { return a.size() < b.size(); })).size(); maxDatasetName = std::max(7, maxDatasetName); return datasets; } void BestResults::buildAll() { auto models = getModels(); std::cout << "Building best results for model: "; for (const auto& model : models) { this->model = model; std::cout << model << ", "; build(); } std::cout << "end." << std::endl << std::endl; model = "any"; } void BestResults::listFile() { std::string bestFileName = path + Paths::bestResultsFile(score, model); if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { fclose(fileTest); } else { std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl; exit(1); } auto temp = ConfigLocale(); auto date = ftime_to_string(std::filesystem::last_write_time(bestFileName)); auto data = loadFile(bestFileName); auto datasets = getDatasets(data); int maxFileName = 0; int maxHyper = 15; for (auto const& item : data.items()) { maxHyper = std::max(maxHyper, (int)item.value().at(1).dump().size()); maxFileName = std::max(maxFileName, (int)item.value().at(2).get().size()); } std::stringstream oss; oss << Colors::GREEN() << "Best results for " << model << " as of " << date << std::endl; std::cout << oss.str(); std::cout << std::string(oss.str().size() - 8, '-') << std::endl; std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << "Dataset" << "Score " << std::setw(maxFileName) << "File" << " Hyperparameters" << std::endl; std::cout << "=== " << std::string(maxDatasetName, '=') << " =========== " << std::string(maxFileName, '=') << " " << std::string(maxHyper, '=') << std::endl; auto i = 0; double total = 0; for (auto const& item : data.items()) { auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN(); double value = item.value().at(0).get(); std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " "; std::cout << std::setw(maxDatasetName) << std::left << item.key() << " "; std::cout << std::setw(11) << std::setprecision(9) << std::fixed << value << " "; std::cout << std::setw(maxFileName) << item.value().at(2).get() << " "; std::cout << item.value().at(1) << " "; std::cout << std::endl; total += value; } std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " ===========" << std::endl; std::cout << Colors::GREEN() << " Total" << std::string(maxDatasetName - 5, '.') << " " << std::setw(11) << std::setprecision(8) << std::fixed << total << std::endl; } json BestResults::buildTableResults(std::vector models) { json table; auto maxDate = std::filesystem::file_time_type::max(); for (const auto& model : models) { this->model = model; std::string bestFileName = path + Paths::bestResultsFile(score, model); if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { fclose(fileTest); } else { std::cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << std::endl; exit(1); } auto dateWrite = std::filesystem::last_write_time(bestFileName); if (dateWrite < maxDate) { maxDate = dateWrite; } auto data = loadFile(bestFileName); table[model] = data; } table["dateTable"] = ftime_to_string(maxDate); return table; } double compute_std(std::vector values, double mean) { // Compute standard devation of the values double sum = 0.0; for (const auto& value : values) { sum += std::pow(value - mean, 2); } double variance = sum / values.size(); return std::sqrt(variance); } void BestResults::printTableResults(std::vector models, json table, bool tex) { std::stringstream oss; oss << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get() << std::endl; std::FILE* output_tex; std::cout << oss.str(); std::cout << std::string(oss.str().size() - 8, '-') << std::endl; std::cout << Colors::GREEN() << " # " << std::setw(maxDatasetName + 1) << std::left << std::string("Dataset"); if (tex) { auto file_name = Paths::tex_output(); output_tex = fopen(file_name.c_str(), "w"); if (output_tex == NULL) { std::cerr << "Error opening file "<< file_name << std::endl; exit(1); } fprintf(output_tex, "%% This file has been generated by the platform program\n"); fprintf(output_tex, "%% Date: %s\n", table.at("dateTable").get().c_str()); fprintf(output_tex, "%%\n"); fprintf(output_tex, "%% Table of results\n"); fprintf(output_tex, "%%\n"); fprintf(output_tex, "\\begin{table}[htbp] \n"); fprintf(output_tex, "\\centering \n"); fprintf(output_tex, "\\tiny \n"); fprintf(output_tex, "\\renewcommand{\\arraystretch }{1.2} \n"); fprintf(output_tex, "\\renewcommand{\\tabcolsep }{0.07cm} \n"); fprintf(output_tex, "\\caption{Accuracy results(mean ± std) for all the algorithms and datasets} \n"); fprintf(output_tex, "\\label{tab:results_accuracy}\n"); fprintf(output_tex, "\\begin{tabular} {{r%s}}\n", std::string(models.size(), 'c').c_str()); fprintf(output_tex, "\\hline \n"); fprintf(output_tex, "Id"); } for (const auto& model : models) { std::cout << std::setw(maxModelName) << std::left << model << " "; if (tex) { fprintf(output_tex, "& %s ", model.c_str()); } } if (tex) { fprintf(output_tex, "\\\\ \n"); fprintf(output_tex, "\\hline \n"); } std::cout << std::endl; std::cout << "=== " << std::string(maxDatasetName, '=') << " "; for (const auto& model : models) { std::cout << std::string(maxModelName, '=') << " "; } std::cout << std::endl; auto i = 0; std::map> totals; int nDatasets = table.begin().value().size(); auto datasets = getDatasets(table.begin().value()); for (auto const& dataset_ : datasets) { auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN(); std::cout << color << std::setw(3) << std::fixed << std::right << i++ << " "; std::cout << std::setw(maxDatasetName) << std::left << dataset_ << " "; double maxValue = 0; // Find out the max value for this dataset for (const auto& model : models) { double value; try { value = table[model].at(dataset_).at(0).get(); } catch (nlohmann::json_abi_v3_11_3::detail::out_of_range err) { value = -1.0; } if (value > maxValue) { maxValue = value; } } if (tex) { fprintf(output_tex, "%d ", i); } // Print the row with red colors on max values for (const auto& model : models) { std::string efectiveColor = color; double value; try { value = table[model].at(dataset_).at(0).get(); } catch (nlohmann::json_abi_v3_11_3::detail::out_of_range err) { value = -1.0; } if (value == maxValue) { efectiveColor = Colors::RED(); } if (value == -1) { std::cout << Colors::YELLOW() << std::setw(maxModelName) << std::right << "N/A" << " "; } else { totals[model].push_back(value); std::cout << efectiveColor << std::setw(maxModelName) << std::setprecision(maxModelName - 2) << std::fixed << value << " "; } if (tex) { auto std_value = table[model].at(dataset_).at(3).get(); const char* bold = value == maxValue ? "\\bfseries" : ""; fprintf(output_tex, "& %s %0.4f±%0.3f", bold, value, std_value); } } std::cout << std::endl; if (tex) { fprintf(output_tex, "\\\\\n"); } } std::cout << Colors::GREEN() << "=== " << std::string(maxDatasetName, '=') << " "; for (const auto& model : models) { std::cout << std::string(maxModelName, '=') << " "; } std::cout << std::endl; std::cout << Colors::GREEN() << " Average" << std::string(maxDatasetName - 7, '.') << " "; double max_value = 0.0; std::string best_model = ""; for (const auto& total : totals) { auto actual = std::reduce(total.second.begin(), total.second.end()); if (actual > max_value) { max_value = actual; best_model = total.first; } } if (tex) { fprintf(output_tex, "\\hline \n"); fprintf(output_tex, "Average "); } for (const auto& model : models) { std::string efectiveColor = model == best_model ? Colors::RED() : Colors::GREEN(); double value = std::reduce(totals[model].begin(), totals[model].end()) / nDatasets; double std_value = compute_std(totals[model], value); std::cout << efectiveColor << std::right << std::setw(maxModelName) << std::setprecision(maxModelName - 4) << std::fixed << value << " "; if (tex) { const char* bold = model == best_model ? "\\bfseries" : ""; fprintf(output_tex, "& %s %0.4f±%0.3f", bold, value, std_value); } } if (tex) { // Footer for TeX fprintf(output_tex, "\\ \n\\hline \n\\end{tabular}\n\\end{table}\n"); fclose(output_tex); } std::cout << std::endl; } void BestResults::reportSingle(bool excel) { listFile(); if (excel) { auto models = getModels(); // Build the table of results json table = buildTableResults(models); std::vector datasets = getDatasets(table.begin().value()); BestResultsExcel excel_report(score, datasets); excel_report.reportSingle(model, path + Paths::bestResultsFile(score, model)); messageOutputFile("Excel", excel_report.getFileName()); } } void BestResults::reportAll(bool excel, bool tex) { auto models = getModels(); // Build the table of results json table = buildTableResults(models); std::vector datasets = getDatasets(table.begin().value()); // Print the table of results printTableResults(models, table, tex); // Compute the Friedman test std::map> ranksModels; if (friedman) { Statistics stats(models, datasets, table, significance); auto result = stats.friedmanTest(); stats.postHocHolmTest(result); ranksModels = stats.getRanks(); } if (tex) { messageOutputFile("TeX", Paths::tex_output()); } if (excel) { BestResultsExcel excel(score, datasets); excel.reportAll(models, table, ranksModels, friedman, significance); if (friedman) { int idx = -1; double min = 2000; // Find out the control model auto totals = std::vector(models.size(), 0.0); for (const auto& dataset_ : datasets) { for (int i = 0; i < models.size(); ++i) { totals[i] += ranksModels[dataset_][models[i]]; } } for (int i = 0; i < models.size(); ++i) { if (totals[i] < min) { min = totals[i]; idx = i; } } model = models.at(idx); excel.reportSingle(model, path + Paths::bestResultsFile(score, model)); } messageOutputFile("Excel", excel.getFileName()); } } void BestResults::messageOutputFile(const std::string& title, const std::string& fileName) { std::cout << Colors::YELLOW() << "** " << std::setw(5) << std::left << title << " file generated: " << fileName << Colors::RESET() << std::endl; } }