From 337b6f7e79f61bf6584f4becb1e873149fc89860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Thu, 21 Sep 2023 19:30:07 +0200 Subject: [PATCH 01/11] Rename BestResult to BestScore --- src/Platform/{BestResult.h => BestScore.h} | 6 +-- src/Platform/CMakeLists.txt | 1 + src/Platform/ReportBase.cc | 2 +- src/Platform/ReportConsole.cc | 6 +-- src/Platform/ReportExcel.cc | 8 ++-- src/Platform/Results.cc | 6 +-- src/Platform/best.cc | 52 ++++++++++++++++++++++ 7 files changed, 67 insertions(+), 14 deletions(-) rename src/Platform/{BestResult.h => BestScore.h} (77%) create mode 100644 src/Platform/best.cc diff --git a/src/Platform/BestResult.h b/src/Platform/BestScore.h similarity index 77% rename from src/Platform/BestResult.h rename to src/Platform/BestScore.h index 8b3f1cb..4e649b2 100644 --- a/src/Platform/BestResult.h +++ b/src/Platform/BestScore.h @@ -1,7 +1,7 @@ -#ifndef BESTRESULT_H -#define BESTRESULT_H +#ifndef BESTSCORE_H +#define BESTSCORE_H #include -class BestResult { +class BestScore { public: static std::string title() { return "STree_default (linear-ovo)"; } static double score() { return 22.109799; } diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 071577f..2b899ea 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -8,6 +8,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc) add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc) add_executable(list list.cc platformUtils Datasets.cc) +add_executable(best list.cc platformUtils Datasets.cc) target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs) diff --git a/src/Platform/ReportBase.cc b/src/Platform/ReportBase.cc index 702c3f0..3cac9d3 100644 --- a/src/Platform/ReportBase.cc +++ b/src/Platform/ReportBase.cc @@ -2,7 +2,7 @@ #include #include "Datasets.h" #include "ReportBase.h" -#include "BestResult.h" +#include "BestScore.h" namespace platform { diff --git a/src/Platform/ReportConsole.cc b/src/Platform/ReportConsole.cc index 621f463..daf6242 100644 --- a/src/Platform/ReportConsole.cc +++ b/src/Platform/ReportConsole.cc @@ -1,7 +1,7 @@ #include #include #include "ReportConsole.h" -#include "BestResult.h" +#include "BestScore.h" namespace platform { @@ -99,9 +99,9 @@ namespace platform { cout << Colors::MAGENTA() << string(MAXL, '*') << endl; showSummary(); auto score = data["score_name"].get(); - if (score == BestResult::scoreName()) { + if (score == BestScore::scoreName()) { stringstream oss; - oss << score << " compared to " << BestResult::title() << " .: " << totalScore / BestResult::score(); + oss << score << " compared to " << BestScore::title() << " .: " << totalScore / BestScore::score(); cout << headerLine(oss.str()); } if (!getExistBestFile() && compare) { diff --git a/src/Platform/ReportExcel.cc b/src/Platform/ReportExcel.cc index 1b6d74e..9398185 100644 --- a/src/Platform/ReportExcel.cc +++ b/src/Platform/ReportExcel.cc @@ -1,7 +1,7 @@ #include #include #include "ReportExcel.h" -#include "BestResult.h" +#include "BestScore.h" namespace platform { @@ -322,9 +322,9 @@ namespace platform { showSummary(); row += 4 + summary.size(); auto score = data["score_name"].get(); - if (score == BestResult::scoreName()) { - worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestResult::title() + " .:").c_str(), efectiveStyle("text")); - writeDouble(row, 6, totalScore / BestResult::score(), "result"); + if (score == BestScore::scoreName()) { + worksheet_merge_range(worksheet, row, 1, row, 5, (score + " compared to " + BestScore::title() + " .:").c_str(), efectiveStyle("text")); + writeDouble(row, 6, totalScore / BestScore::score(), "result"); } if (!getExistBestFile() && compare) { worksheet_write_string(worksheet, row + 1, 0, "*** Best Results File not found. Couldn't compare any result!", styles["summaryStyle"]); diff --git a/src/Platform/Results.cc b/src/Platform/Results.cc index d03f22e..51ecc87 100644 --- a/src/Platform/Results.cc +++ b/src/Platform/Results.cc @@ -3,7 +3,7 @@ #include "Results.h" #include "ReportConsole.h" #include "ReportExcel.h" -#include "BestResult.h" +#include "BestScore.h" #include "Colors.h" namespace platform { Result::Result(const string& path, const string& filename) @@ -17,8 +17,8 @@ namespace platform { score += result["score"].get(); } scoreName = data["score_name"]; - if (scoreName == BestResult::scoreName()) { - score /= BestResult::score(); + if (scoreName == BestScore::scoreName()) { + score /= BestScore::score(); } title = data["title"]; duration = data["duration"]; diff --git a/src/Platform/best.cc b/src/Platform/best.cc new file mode 100644 index 0000000..585cb17 --- /dev/null +++ b/src/Platform/best.cc @@ -0,0 +1,52 @@ +#include +#include +#include "platformUtils.h" +#include "Paths.h" +#include "Results.h" + +using namespace std; + +argparse::ArgumentParser manageArguments(int argc, char** argv) +{ + argparse::ArgumentParser program("best"); + program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>(); + program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)"); + program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied"); + program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true); + program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true); + program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true); + try { + program.parse_args(argc, argv); + auto number = program.get("number"); + if (number < 0) { + throw runtime_error("Number of results must be greater than or equal to 0"); + } + auto model = program.get("model"); + auto score = program.get("score"); + auto complete = program.get("complete"); + auto partial = program.get("partial"); + auto compare = program.get("compare"); + } + catch (const exception& err) { + cerr << err.what() << endl; + cerr << program; + exit(1); + } + return program; +} + +int main(int argc, char** argv) +{ + auto program = manageArguments(argc, argv); + auto number = program.get("number"); + auto model = program.get("model"); + auto score = program.get("score"); + auto complete = program.get("complete"); + auto partial = program.get("partial"); + auto compare = program.get("compare"); + if (complete) + partial = false; + auto results = platform::Results(platform::Paths::results(), number, model, score, complete, partial, compare); + results.manage(); + return 0; +} From 7bfafe555f7ebc522cef4fa148a144979cc5edb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Thu, 21 Sep 2023 23:04:11 +0200 Subject: [PATCH 02/11] Begin BestResults build --- Makefile | 7 +- sample/sample.cc | 344 +++++++++++++++++------------------ src/Platform/BestResults.cc | 68 +++++++ src/Platform/BestResults.h | 20 ++ src/Platform/CMakeLists.txt | 4 +- src/Platform/best.cc | 39 ++-- src/Platform/platformUtils.h | 1 - 7 files changed, 285 insertions(+), 198 deletions(-) create mode 100644 src/Platform/BestResults.cc create mode 100644 src/Platform/BestResults.h diff --git a/Makefile b/Makefile index 099cd8a..c782d1d 100644 --- a/Makefile +++ b/Makefile @@ -19,13 +19,14 @@ copy: ## Copy binary files to selected folder @cp build/src/Platform/main $(dest) @cp build/src/Platform/list $(dest) @cp build/src/Platform/manage $(dest) + @cp build/src/Platform/best $(dest) @echo ">>> Done" dependency: ## Create a dependency graph diagram of the project (build/dependency.png) cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png build: ## Build the main and BayesNetSample - cmake --build build -t main -t BayesNetSample -t manage -t list -j 32 + cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32 clean: ## Clean the debug info @echo ">>> Cleaning Debug BayesNet ..."; @@ -40,7 +41,7 @@ debug: ## Build a debug version of the project @if [ -d ./build ]; then rm -rf ./build; fi @mkdir build; cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \ - cmake --build build -t main -t BayesNetSample -t manage -t list unit_tests -j 32; + cmake --build build -t main -t BayesNetSample -t manage -t list -t best -t unit_tests -j 32; @echo ">>> Done"; release: ## Build a Release version of the project @@ -48,7 +49,7 @@ release: ## Build a Release version of the project @if [ -d ./build ]; then rm -rf ./build; fi @mkdir build; cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \ - cmake --build build -t main -t BayesNetSample -t manage -t list -j 32; + cmake --build build -t main -t BayesNetSample -t manage -t list -t best -j 32; @echo ">>> Done"; test: ## Run tests diff --git a/sample/sample.cc b/sample/sample.cc index 7e9d569..89c491c 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -104,180 +104,180 @@ int main(int argc, char** argv) for (int i = 0; i < 10; i++) { cout << weights_.index({ i }).item() << endl; } - // map datasets = { - // {"diabetes", true}, - // {"ecoli", true}, - // {"glass", true}, - // {"iris", true}, - // {"kdd_JapaneseVowels", false}, - // {"letter", true}, - // {"liver-disorders", true}, - // {"mfeat-factors", true}, - // }; - // auto valid_datasets = vector(); - // transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets), - // [](const pair& pair) { return pair.first; }); - // argparse::ArgumentParser program("BayesNetSample"); - // program.add_argument("-d", "--dataset") - // .help("Dataset file name") - // .action([valid_datasets](const std::string& value) { - // if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { - // return value; - // } - // throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}"); - // } - // ); - // program.add_argument("-p", "--path") - // .help(" folder where the data files are located, default") - // .default_value(string{ PATH } - // ); - // program.add_argument("-m", "--model") - // .help("Model to use " + platform::Models::instance()->toString()) - // .action([](const std::string& value) { - // static const vector choices = platform::Models::instance()->getNames(); - // if (find(choices.begin(), choices.end(), value) != choices.end()) { - // return value; - // } - // throw runtime_error("Model must be one of " + platform::Models::instance()->toString()); - // } - // ); - // program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); - // program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true); - // program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); - // program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); - // program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { - // try { - // auto k = stoi(value); - // if (k < 2) { - // throw runtime_error("Number of folds must be greater than 1"); - // } - // return k; - // } - // catch (const runtime_error& err) { - // throw runtime_error(err.what()); - // } - // catch (...) { - // throw runtime_error("Number of folds must be an integer"); - // }}); - // program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); - // bool class_last, stratified, tensors, dump_cpt; - // string model_name, file_name, path, complete_file_name; - // int nFolds, seed; - // try { - // program.parse_args(argc, argv); - // file_name = program.get("dataset"); - // path = program.get("path"); - // model_name = program.get("model"); - // complete_file_name = path + file_name + ".arff"; - // stratified = program.get("stratified"); - // tensors = program.get("tensors"); - // nFolds = program.get("folds"); - // seed = program.get("seed"); - // dump_cpt = program.get("dumpcpt"); - // class_last = datasets[file_name]; - // if (!file_exists(complete_file_name)) { - // throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); - // } - // } - // catch (const exception& err) { - // cerr << err.what() << endl; - // cerr << program; - // exit(1); - // } + map datasets = { + {"diabetes", true}, + {"ecoli", true}, + {"glass", true}, + {"iris", true}, + {"kdd_JapaneseVowels", false}, + {"letter", true}, + {"liver-disorders", true}, + {"mfeat-factors", true}, + }; + auto valid_datasets = vector(); + transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets), + [](const pair& pair) { return pair.first; }); + argparse::ArgumentParser program("BayesNetSample"); + program.add_argument("-d", "--dataset") + .help("Dataset file name") + .action([valid_datasets](const std::string& value) { + if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) { + return value; + } + throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}"); + } + ); + program.add_argument("-p", "--path") + .help(" folder where the data files are located, default") + .default_value(string{ PATH } + ); + program.add_argument("-m", "--model") + .help("Model to use " + platform::Models::instance()->toString()) + .action([](const std::string& value) { + static const vector choices = platform::Models::instance()->getNames(); + if (find(choices.begin(), choices.end(), value) != choices.end()) { + return value; + } + throw runtime_error("Model must be one of " + platform::Models::instance()->toString()); + } + ); + program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true); + program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true); + program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true); + program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true); + program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) { + try { + auto k = stoi(value); + if (k < 2) { + throw runtime_error("Number of folds must be greater than 1"); + } + return k; + } + catch (const runtime_error& err) { + throw runtime_error(err.what()); + } + catch (...) { + throw runtime_error("Number of folds must be an integer"); + }}); + program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>(); + bool class_last, stratified, tensors, dump_cpt; + string model_name, file_name, path, complete_file_name; + int nFolds, seed; + try { + program.parse_args(argc, argv); + file_name = program.get("dataset"); + path = program.get("path"); + model_name = program.get("model"); + complete_file_name = path + file_name + ".arff"; + stratified = program.get("stratified"); + tensors = program.get("tensors"); + nFolds = program.get("folds"); + seed = program.get("seed"); + dump_cpt = program.get("dumpcpt"); + class_last = datasets[file_name]; + if (!file_exists(complete_file_name)) { + throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist"); + } + } + catch (const exception& err) { + cerr << err.what() << endl; + cerr << program; + exit(1); + } /* * Begin Processing */ - // auto handler = ArffFiles(); - // handler.load(complete_file_name, class_last); - // // Get Dataset X, y - // vector& X = handler.getX(); - // mdlp::labels_t& y = handler.getY(); - // // Get className & Features - // auto className = handler.getClassName(); - // vector features; - // auto attributes = handler.getAttributes(); - // transform(attributes.begin(), attributes.end(), back_inserter(features), - // [](const pair& item) { return item.first; }); - // // Discretize Dataset - // auto [Xd, maxes] = discretize(X, y, features); - // maxes[className] = *max_element(y.begin(), y.end()) + 1; - // map> states; - // for (auto feature : features) { - // states[feature] = vector(maxes[feature]); - // } - // states[className] = vector(maxes[className]); - // auto clf = platform::Models::instance()->create(model_name); - // clf->fit(Xd, y, features, className, states); - // if (dump_cpt) { - // cout << "--- CPT Tables ---" << endl; - // clf->dump_cpt(); - // } - // auto lines = clf->show(); - // for (auto line : lines) { - // cout << line << endl; - // } - // cout << "--- Topological Order ---" << endl; - // auto order = clf->topological_order(); - // for (auto name : order) { - // cout << name << ", "; - // } - // cout << "end." << endl; - // auto score = clf->score(Xd, y); - // cout << "Score: " << score << endl; - // auto graph = clf->graph(); - // auto dot_file = model_name + "_" + file_name; - // ofstream file(dot_file + ".dot"); - // file << graph; - // file.close(); - // cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl; - // cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl; - // string stratified_string = stratified ? " Stratified" : ""; - // cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl; - // cout << "==========================================" << endl; - // torch::Tensor Xt = torch::zeros({ static_cast(Xd.size()), static_cast(Xd[0].size()) }, torch::kInt32); - // torch::Tensor yt = torch::tensor(y, torch::kInt32); - // for (int i = 0; i < features.size(); ++i) { - // Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32)); - // } - // float total_score = 0, total_score_train = 0, score_train, score_test; - // platform::Fold* fold; - // if (stratified) - // fold = new platform::StratifiedKFold(nFolds, y, seed); - // else - // fold = new platform::KFold(nFolds, y.size(), seed); - // for (auto i = 0; i < nFolds; ++i) { - // auto [train, test] = fold->getFold(i); - // cout << "Fold: " << i + 1 << endl; - // if (tensors) { - // auto ttrain = torch::tensor(train, torch::kInt64); - // auto ttest = torch::tensor(test, torch::kInt64); - // torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain); - // torch::Tensor ytraint = yt.index({ ttrain }); - // torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest); - // torch::Tensor ytestt = yt.index({ ttest }); - // clf->fit(Xtraint, ytraint, features, className, states); - // auto temp = clf->predict(Xtraint); - // score_train = clf->score(Xtraint, ytraint); - // score_test = clf->score(Xtestt, ytestt); - // } else { - // auto [Xtrain, ytrain] = extract_indices(train, Xd, y); - // auto [Xtest, ytest] = extract_indices(test, Xd, y); - // clf->fit(Xtrain, ytrain, features, className, states); - // score_train = clf->score(Xtrain, ytrain); - // score_test = clf->score(Xtest, ytest); - // } - // if (dump_cpt) { - // cout << "--- CPT Tables ---" << endl; - // clf->dump_cpt(); - // } - // total_score_train += score_train; - // total_score += score_test; - // cout << "Score Train: " << score_train << endl; - // cout << "Score Test : " << score_test << endl; - // cout << "-------------------------------------------------------------------------------" << endl; - // } - // cout << "**********************************************************************************" << endl; - // cout << "Average Score Train: " << total_score_train / nFolds << endl; - // cout << "Average Score Test : " << total_score / nFolds << endl;return 0; + auto handler = ArffFiles(); + handler.load(complete_file_name, class_last); + // Get Dataset X, y + vector& X = handler.getX(); + mdlp::labels_t& y = handler.getY(); + // Get className & Features + auto className = handler.getClassName(); + vector features; + auto attributes = handler.getAttributes(); + transform(attributes.begin(), attributes.end(), back_inserter(features), + [](const pair& item) { return item.first; }); + // Discretize Dataset + auto [Xd, maxes] = discretize(X, y, features); + maxes[className] = *max_element(y.begin(), y.end()) + 1; + map> states; + for (auto feature : features) { + states[feature] = vector(maxes[feature]); + } + states[className] = vector(maxes[className]); + auto clf = platform::Models::instance()->create(model_name); + clf->fit(Xd, y, features, className, states); + if (dump_cpt) { + cout << "--- CPT Tables ---" << endl; + clf->dump_cpt(); + } + auto lines = clf->show(); + for (auto line : lines) { + cout << line << endl; + } + cout << "--- Topological Order ---" << endl; + auto order = clf->topological_order(); + for (auto name : order) { + cout << name << ", "; + } + cout << "end." << endl; + auto score = clf->score(Xd, y); + cout << "Score: " << score << endl; + auto graph = clf->graph(); + auto dot_file = model_name + "_" + file_name; + ofstream file(dot_file + ".dot"); + file << graph; + file.close(); + cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl; + cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl; + string stratified_string = stratified ? " Stratified" : ""; + cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl; + cout << "==========================================" << endl; + torch::Tensor Xt = torch::zeros({ static_cast(Xd.size()), static_cast(Xd[0].size()) }, torch::kInt32); + torch::Tensor yt = torch::tensor(y, torch::kInt32); + for (int i = 0; i < features.size(); ++i) { + Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32)); + } + float total_score = 0, total_score_train = 0, score_train, score_test; + platform::Fold* fold; + if (stratified) + fold = new platform::StratifiedKFold(nFolds, y, seed); + else + fold = new platform::KFold(nFolds, y.size(), seed); + for (auto i = 0; i < nFolds; ++i) { + auto [train, test] = fold->getFold(i); + cout << "Fold: " << i + 1 << endl; + if (tensors) { + auto ttrain = torch::tensor(train, torch::kInt64); + auto ttest = torch::tensor(test, torch::kInt64); + torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain); + torch::Tensor ytraint = yt.index({ ttrain }); + torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest); + torch::Tensor ytestt = yt.index({ ttest }); + clf->fit(Xtraint, ytraint, features, className, states); + auto temp = clf->predict(Xtraint); + score_train = clf->score(Xtraint, ytraint); + score_test = clf->score(Xtestt, ytestt); + } else { + auto [Xtrain, ytrain] = extract_indices(train, Xd, y); + auto [Xtest, ytest] = extract_indices(test, Xd, y); + clf->fit(Xtrain, ytrain, features, className, states); + score_train = clf->score(Xtrain, ytrain); + score_test = clf->score(Xtest, ytest); + } + if (dump_cpt) { + cout << "--- CPT Tables ---" << endl; + clf->dump_cpt(); + } + total_score_train += score_train; + total_score += score_test; + cout << "Score Train: " << score_train << endl; + cout << "Score Test : " << score_test << endl; + cout << "-------------------------------------------------------------------------------" << endl; + } + cout << "**********************************************************************************" << endl; + cout << "Average Score Train: " << total_score_train / nFolds << endl; + cout << "Average Score Test : " << total_score / nFolds << endl;return 0; } \ No newline at end of file diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc new file mode 100644 index 0000000..5c06eb6 --- /dev/null +++ b/src/Platform/BestResults.cc @@ -0,0 +1,68 @@ +#include +#include +#include +#include "platformUtils.h" +#include "BestResults.h" +#include "Results.h" +#include "Colors.h" + +namespace platform { + + void BestResults::build() + { + auto files = loadFiles(); + if (files.size() == 0) { + throw runtime_error("No result files were found!"); + } + json bests; + for (const auto& file : files) { + auto result = Result(path, file); + auto data = result.load(); + for (auto const& item : data.at("results")) { + bool update = false; + if (bests.contains(item.at("dataset").get())) { + if (item.at("score").get() > bests["dataset"].at(0).get()) { + update = true; + } + } else { + update = true; + } + if (update) { + bests[item.at("dataset").get()] = { item.at("score").get(), item.at("hyperparameters"), file }; + } + } + } + string bestFileName = path + "/" + bestResultFile(); + if (file_exists(bestFileName)) { + cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET(); + } + ofstream file(bestFileName); + file << bests; + file.close(); + } + + string BestResults::bestResultFile() + { + return "best_results_" + score + "_" + model + ".json"; + } + + vector BestResults::loadFiles() + { + vector files; + using std::filesystem::directory_iterator; + for (const auto& file : directory_iterator(path)) { + auto fileName = file.path().filename().string(); + if (fileName.find(".json") != string::npos && fileName.find("results_") == 0 + && fileName.find("_" + score + "_") != string::npos + && fileName.find("_" + model + "_") != string::npos) { + files.push_back(fileName); + } + } + return files; + } + + void BestResults::report() + { + + } +} \ No newline at end of file diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h new file mode 100644 index 0000000..05c04f7 --- /dev/null +++ b/src/Platform/BestResults.h @@ -0,0 +1,20 @@ +#ifndef BESTRESULTS_H +#define BESTRESULTS_H +#include +using namespace std; + +namespace platform { + class BestResults { + public: + explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {} + void build(); + void report(); + private: + vector loadFiles(); + string bestResultFile(); + string path; + string score; + string model; + }; +} +#endif //BESTRESULTS_H \ No newline at end of file diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 2b899ea..c87fb7f 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -8,11 +8,13 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc) add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc) add_executable(list list.cc platformUtils Datasets.cc) -add_executable(best list.cc platformUtils Datasets.cc) +add_executable(best best.cc BestResults.cc Results.cc ReportBase.cc ReportExcel.cc platformUtils.cc) target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs) + target_link_libraries(best "${TORCH_LIBRARIES}" libxlsxwriter.so stdc++fs) else() target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) + target_link_libraries(best "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}") endif() target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Platform/best.cc b/src/Platform/best.cc index 585cb17..c4bd9fc 100644 --- a/src/Platform/best.cc +++ b/src/Platform/best.cc @@ -1,31 +1,23 @@ #include #include -#include "platformUtils.h" #include "Paths.h" -#include "Results.h" +#include "BestResults.h" using namespace std; argparse::ArgumentParser manageArguments(int argc, char** argv) { argparse::ArgumentParser program("best"); - program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>(); program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)"); program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied"); - program.add_argument("--complete").help("Show only results with all datasets").default_value(false).implicit_value(true); - program.add_argument("--partial").help("Show only partial results").default_value(false).implicit_value(true); - program.add_argument("--compare").help("Compare with best results").default_value(false).implicit_value(true); + program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true); + program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true); try { program.parse_args(argc, argv); - auto number = program.get("number"); - if (number < 0) { - throw runtime_error("Number of results must be greater than or equal to 0"); - } auto model = program.get("model"); auto score = program.get("score"); - auto complete = program.get("complete"); - auto partial = program.get("partial"); - auto compare = program.get("compare"); + auto build = program.get("build"); + auto report = program.get("report"); } catch (const exception& err) { cerr << err.what() << endl; @@ -38,15 +30,20 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) int main(int argc, char** argv) { auto program = manageArguments(argc, argv); - auto number = program.get("number"); auto model = program.get("model"); auto score = program.get("score"); - auto complete = program.get("complete"); - auto partial = program.get("partial"); - auto compare = program.get("compare"); - if (complete) - partial = false; - auto results = platform::Results(platform::Paths::results(), number, model, score, complete, partial, compare); - results.manage(); + auto build = program.get("build"); + auto report = program.get("report"); + if (!report && !build) { + cout << "Either build, report or both, have to be selected to do anything!" << endl; + exit(1); + } + auto results = platform::BestResults(platform::Paths::results(), model, score); + if (build) { + results.build(); + } + if (report) { + results.report(); + } return 0; } diff --git a/src/Platform/platformUtils.h b/src/Platform/platformUtils.h index 2b4ca54..213e28a 100644 --- a/src/Platform/platformUtils.h +++ b/src/Platform/platformUtils.h @@ -8,7 +8,6 @@ #include "ArffFiles.h" #include "CPPFImdlp.h" using namespace std; -const string PATH = "../../data/"; bool file_exists(const std::string& name); vector split(const string& text, char delimiter); From c4d0a5b4e6ba1ed4e7490f7f1822fe8a15407709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Thu, 21 Sep 2023 23:30:17 +0200 Subject: [PATCH 03/11] Split Result from Results --- src/Platform/BestResults.cc | 6 ++--- src/Platform/CMakeLists.txt | 7 +++--- src/Platform/Result.cc | 50 +++++++++++++++++++++++++++++++++++++ src/Platform/Result.h | 37 +++++++++++++++++++++++++++ src/Platform/Results.cc | 41 ------------------------------ src/Platform/Results.h | 25 +------------------ 6 files changed, 94 insertions(+), 72 deletions(-) create mode 100644 src/Platform/Result.cc create mode 100644 src/Platform/Result.h diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 5c06eb6..b0e75f7 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -1,9 +1,8 @@ #include #include #include -#include "platformUtils.h" #include "BestResults.h" -#include "Results.h" +#include "Result.h" #include "Colors.h" namespace platform { @@ -33,7 +32,8 @@ namespace platform { } } string bestFileName = path + "/" + bestResultFile(); - if (file_exists(bestFileName)) { + if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { + fclose(fileTest); cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET(); } ofstream file(bestFileName); diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index c87fb7f..b40a311 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -6,15 +6,14 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include) include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc ReportConsole.cc ReportBase.cc) -add_executable(manage manage.cc Results.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc) +add_executable(manage manage.cc Results.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc platformUtils.cc) add_executable(list list.cc platformUtils Datasets.cc) -add_executable(best best.cc BestResults.cc Results.cc ReportBase.cc ReportExcel.cc platformUtils.cc) +add_executable(best best.cc BestResults.cc Result.cc) target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}") if (${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") target_link_libraries(manage "${TORCH_LIBRARIES}" libxlsxwriter.so ArffFiles mdlp stdc++fs) - target_link_libraries(best "${TORCH_LIBRARIES}" libxlsxwriter.so stdc++fs) + target_link_libraries(best stdc++fs) else() target_link_libraries(manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) - target_link_libraries(best "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}") endif() target_link_libraries(list ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file diff --git a/src/Platform/Result.cc b/src/Platform/Result.cc new file mode 100644 index 0000000..a438238 --- /dev/null +++ b/src/Platform/Result.cc @@ -0,0 +1,50 @@ +#include +#include +#include "Result.h" +#include "Colors.h" +#include "BestScore.h" +namespace platform { + Result::Result(const string& path, const string& filename) + : path(path) + , filename(filename) + { + auto data = load(); + date = data["date"]; + score = 0; + for (const auto& result : data["results"]) { + score += result["score"].get(); + } + scoreName = data["score_name"]; + if (scoreName == BestScore::scoreName()) { + score /= BestScore::score(); + } + title = data["title"]; + duration = data["duration"]; + model = data["model"]; + complete = data["results"].size() > 1; + } + + json Result::load() const + { + ifstream resultData(path + "/" + filename); + if (resultData.is_open()) { + json data = json::parse(resultData); + return data; + } + throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]"); + } + + string Result::to_string() const + { + stringstream oss; + oss << date << " "; + oss << setw(12) << left << model << " "; + oss << setw(11) << left << scoreName << " "; + oss << right << setw(11) << setprecision(7) << fixed << score << " "; + auto completeString = isComplete() ? "C" : "P"; + oss << setw(1) << " " << completeString << " "; + oss << setw(9) << setprecision(3) << fixed << duration << " "; + oss << setw(50) << left << title << " "; + return oss.str(); + } +} \ No newline at end of file diff --git a/src/Platform/Result.h b/src/Platform/Result.h new file mode 100644 index 0000000..76a47d2 --- /dev/null +++ b/src/Platform/Result.h @@ -0,0 +1,37 @@ +#ifndef RESULT_H +#define RESULT_H +#include +#include +#include +#include +namespace platform { + using namespace std; + using json = nlohmann::json; + + class Result { + public: + Result(const string& path, const string& filename); + json load() const; + string to_string() const; + string getFilename() const { return filename; }; + string getDate() const { return date; }; + double getScore() const { return score; }; + string getTitle() const { return title; }; + double getDuration() const { return duration; }; + string getModel() const { return model; }; + string getScoreName() const { return scoreName; }; + bool isComplete() const { return complete; }; + private: + string path; + string filename; + string date; + double score; + string title; + double duration; + string model; + string scoreName; + bool complete; + }; +}; + +#endif \ No newline at end of file diff --git a/src/Platform/Results.cc b/src/Platform/Results.cc index 51ecc87..f5e3481 100644 --- a/src/Platform/Results.cc +++ b/src/Platform/Results.cc @@ -6,34 +6,6 @@ #include "BestScore.h" #include "Colors.h" namespace platform { - Result::Result(const string& path, const string& filename) - : path(path) - , filename(filename) - { - auto data = load(); - date = data["date"]; - score = 0; - for (const auto& result : data["results"]) { - score += result["score"].get(); - } - scoreName = data["score_name"]; - if (scoreName == BestScore::scoreName()) { - score /= BestScore::score(); - } - title = data["title"]; - duration = data["duration"]; - model = data["model"]; - complete = data["results"].size() > 1; - } - json Result::load() const - { - ifstream resultData(path + "/" + filename); - if (resultData.is_open()) { - json data = json::parse(resultData); - return data; - } - throw invalid_argument("Unable to open result file. [" + path + "/" + filename + "]"); - } void Results::load() { using std::filesystem::directory_iterator; @@ -52,19 +24,6 @@ namespace platform { max = files.size(); } } - string Result::to_string() const - { - stringstream oss; - oss << date << " "; - oss << setw(12) << left << model << " "; - oss << setw(11) << left << scoreName << " "; - oss << right << setw(11) << setprecision(7) << fixed << score << " "; - auto completeString = isComplete() ? "C" : "P"; - oss << setw(1) << " " << completeString << " "; - oss << setw(9) << setprecision(3) << fixed << duration << " "; - oss << setw(50) << left << title << " "; - return oss.str(); - } void Results::show() const { cout << Colors::GREEN() << "Results found: " << files.size() << endl; diff --git a/src/Platform/Results.h b/src/Platform/Results.h index 60748ba..b322cfb 100644 --- a/src/Platform/Results.h +++ b/src/Platform/Results.h @@ -5,34 +5,11 @@ #include #include #include +#include "Result.h" namespace platform { using namespace std; using json = nlohmann::json; - class Result { - public: - Result(const string& path, const string& filename); - json load() const; - string to_string() const; - string getFilename() const { return filename; }; - string getDate() const { return date; }; - double getScore() const { return score; }; - string getTitle() const { return title; }; - double getDuration() const { return duration; }; - string getModel() const { return model; }; - string getScoreName() const { return scoreName; }; - bool isComplete() const { return complete; }; - private: - string path; - string filename; - string date; - double score; - string title; - double duration; - string model; - string scoreName; - bool complete; - }; class Results { public: Results(const string& path, const int max, const string& model, const string& score, bool complete, bool partial, bool compare) : From c4f9187e2a811a2f5e2d590e4ab041a3e758e13b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Fri, 22 Sep 2023 01:03:55 +0200 Subject: [PATCH 04/11] Complete best build and report --- src/Platform/BestResults.cc | 46 +++++++++++++++++++++++++++++++++---- src/Platform/BestResults.h | 6 +++-- src/Platform/Result.cc | 1 + src/Platform/best.cc | 14 +++++++---- 4 files changed, 56 insertions(+), 11 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index b0e75f7..95b38bb 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -7,11 +7,12 @@ namespace platform { - void BestResults::build() + string BestResults::build() { auto files = loadFiles(); if (files.size() == 0) { - throw runtime_error("No result files were found!"); + cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl; + exit(1); } json bests; for (const auto& file : files) { @@ -20,7 +21,7 @@ namespace platform { for (auto const& item : data.at("results")) { bool update = false; if (bests.contains(item.at("dataset").get())) { - if (item.at("score").get() > bests["dataset"].at(0).get()) { + if (item.at("score").get() > bests[item.at("dataset").get()].at(0).get()) { update = true; } } else { @@ -31,14 +32,15 @@ namespace platform { } } } - string bestFileName = path + "/" + bestResultFile(); + string bestFileName = path + bestResultFile(); if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { fclose(fileTest); - cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET(); + cout << Colors::MAGENTA() << "File " << bestFileName << " already exists and it shall be overwritten." << Colors::RESET() << endl; } ofstream file(bestFileName); file << bests; file.close(); + return bestFileName; } string BestResults::bestResultFile() @@ -60,9 +62,43 @@ namespace platform { } return files; } + json BestResults::loadFile(const string& fileName) + { + ifstream resultData(fileName); + if (resultData.is_open()) { + json data = json::parse(resultData); + return data; + } + throw invalid_argument("Unable to open result file. [" + fileName + "]"); + } void BestResults::report() { + string bestFileName = path + bestResultFile(); + if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { + fclose(fileTest); + } else { + cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl; + exit(1); + } + auto data = loadFile(bestFileName); + cout << Colors::GREEN() << "Best results for " << model << " and " << score << endl; + cout << "------------------------------------------" << endl; + cout << Colors::GREEN() << " # Dataset Score File Hyperparameters" << endl; + cout << "=== ========================= =========== ================================================================== ================================================= " << endl; + auto i = 0; + bool odd = true; + for (auto const& item : data.items()) { + auto color = odd ? Colors::BLUE() : Colors::CYAN(); + cout << color << setw(3) << fixed << right << i++ << " "; + cout << setw(25) << left << item.key() << " "; + cout << setw(11) << setprecision(9) << fixed << item.value().at(0).get() << " "; + cout << setw(66) << item.value().at(2).get() << " "; + cout << item.value().at(1) << " "; + cout << endl; + odd = !odd; + } + } } \ No newline at end of file diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h index 05c04f7..91bc45a 100644 --- a/src/Platform/BestResults.h +++ b/src/Platform/BestResults.h @@ -1,17 +1,19 @@ #ifndef BESTRESULTS_H #define BESTRESULTS_H #include +#include using namespace std; - +using json = nlohmann::json; namespace platform { class BestResults { public: explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {} - void build(); + string build(); void report(); private: vector loadFiles(); string bestResultFile(); + json loadFile(const string& fileName); string path; string score; string model; diff --git a/src/Platform/Result.cc b/src/Platform/Result.cc index a438238..156ffa7 100644 --- a/src/Platform/Result.cc +++ b/src/Platform/Result.cc @@ -1,5 +1,6 @@ #include #include +#include #include "Result.h" #include "Colors.h" #include "BestScore.h" diff --git a/src/Platform/best.cc b/src/Platform/best.cc index c4bd9fc..e76ccdd 100644 --- a/src/Platform/best.cc +++ b/src/Platform/best.cc @@ -2,14 +2,15 @@ #include #include "Paths.h" #include "BestResults.h" +#include "Colors.h" using namespace std; argparse::ArgumentParser manageArguments(int argc, char** argv) { argparse::ArgumentParser program("best"); - program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)"); - program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied"); + program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model)"); + program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied"); program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true); program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true); try { @@ -18,6 +19,9 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) auto score = program.get("score"); auto build = program.get("build"); auto report = program.get("report"); + if (model == "" || score == "") { + throw runtime_error("Model and score name must be supplied"); + } } catch (const exception& err) { cerr << err.what() << endl; @@ -35,12 +39,14 @@ int main(int argc, char** argv) auto build = program.get("build"); auto report = program.get("report"); if (!report && !build) { - cout << "Either build, report or both, have to be selected to do anything!" << endl; + cerr << "Either build, report or both, have to be selected to do anything!" << endl; + cerr << program; exit(1); } auto results = platform::BestResults(platform::Paths::results(), model, score); if (build) { - results.build(); + string fileName = results.build(); + cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl; } if (report) { results.report(); From 3a16589220bbd14f112153ae3158fefc2089b53b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Fri, 22 Sep 2023 01:04:36 +0200 Subject: [PATCH 05/11] Add best config for debug in vscode --- .vscode/launch.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index 52b3d7a..4e9b5f1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -37,6 +37,20 @@ ], "cwd": "/Users/rmontanana/Code/discretizbench", }, + { + "type": "lldb", + "request": "launch", + "name": "best", + "program": "${workspaceFolder}/build/src/Platform/best", + "args": [ + "-m", + "BoostAODE", + "-s", + "accuracy", + "--build", + ], + "cwd": "/Users/rmontanana/Code/discretizbench", + }, { "type": "lldb", "request": "launch", From b30416364d6d6ed4b70a1cf4b226763e62c5e73e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Fri, 22 Sep 2023 14:14:39 +0200 Subject: [PATCH 06/11] Fix mistake in best results file name --- src/Platform/BestResults.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 95b38bb..b5ea179 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -45,7 +45,7 @@ namespace platform { string BestResults::bestResultFile() { - return "best_results_" + score + "_" + model + ".json"; + return "best_results_" + model + "_" + score + ".json"; } vector BestResults::loadFiles() From c8597a794e6a746df76db87a5ead4380fedbfac1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Fri, 22 Sep 2023 18:13:32 +0200 Subject: [PATCH 07/11] Begin report all models --- src/Platform/BestResults.cc | 12 ++++++++---- src/Platform/BestResults.h | 5 +++-- src/Platform/best.cc | 13 +++++++++++-- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index b5ea179..3220382 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -9,7 +9,7 @@ namespace platform { string BestResults::build() { - auto files = loadFiles(); + auto files = loadResultFiles(); if (files.size() == 0) { cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl; exit(1); @@ -48,7 +48,7 @@ namespace platform { return "best_results_" + model + "_" + score + ".json"; } - vector BestResults::loadFiles() + vector BestResults::loadResultFiles() { vector files; using std::filesystem::directory_iterator; @@ -56,7 +56,7 @@ namespace platform { auto fileName = file.path().filename().string(); if (fileName.find(".json") != string::npos && fileName.find("results_") == 0 && fileName.find("_" + score + "_") != string::npos - && fileName.find("_" + model + "_") != string::npos) { + && (fileName.find("_" + model + "_") != string::npos || model == "any")) { files.push_back(fileName); } } @@ -71,8 +71,12 @@ namespace platform { } throw invalid_argument("Unable to open result file. [" + fileName + "]"); } + void BestResults::reportAll() + { - void BestResults::report() + } + + void BestResults::reportSingle() { string bestFileName = path + bestResultFile(); if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h index 91bc45a..19e10d7 100644 --- a/src/Platform/BestResults.h +++ b/src/Platform/BestResults.h @@ -9,9 +9,10 @@ namespace platform { public: explicit BestResults(const string& path, const string& score, const string& model) : path(path), score(score), model(model) {} string build(); - void report(); + void reportSingle(); + void reportAll(); private: - vector loadFiles(); + vector loadResultFiles(); string bestResultFile(); json loadFile(const string& fileName); string path; diff --git a/src/Platform/best.cc b/src/Platform/best.cc index e76ccdd..0280a8a 100644 --- a/src/Platform/best.cc +++ b/src/Platform/best.cc @@ -9,7 +9,7 @@ using namespace std; argparse::ArgumentParser manageArguments(int argc, char** argv) { argparse::ArgumentParser program("best"); - program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model)"); + program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)"); program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied"); program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true); program.add_argument("--report").help("report of best score results file").default_value(false).implicit_value(true); @@ -43,13 +43,22 @@ int main(int argc, char** argv) cerr << program; exit(1); } + if (model == "any" && build) { + cerr << "Can't build best results file for all models. \"any\" is only valid for report" << endl; + cerr << program; + exit(1); + } auto results = platform::BestResults(platform::Paths::results(), model, score); if (build) { string fileName = results.build(); cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl; } if (report) { - results.report(); + if (model == "any") { + results.reportAll(); + } else { + results.reportSingle(); + } } return 0; } From cd0bc02a74bac0cfb37e739dd46ca3d554b20c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Sat, 23 Sep 2023 01:14:02 +0200 Subject: [PATCH 08/11] Add report/build all with totals and ranks --- src/Platform/BestResults.cc | 182 ++++++++++++++++++++++++++++++++++-- src/Platform/BestResults.h | 5 + src/Platform/best.cc | 15 ++- 3 files changed, 186 insertions(+), 16 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 3220382..87ad3b3 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -1,6 +1,7 @@ #include #include #include +#include #include "BestResults.h" #include "Result.h" #include "Colors.h" @@ -45,23 +46,38 @@ namespace platform { string BestResults::bestResultFile() { - return "best_results_" + model + "_" + score + ".json"; + return "best_results_" + score + "_" + model + ".json"; + } + + pair getModelScore(string name) + { + // results_accuracy_BoostAODE_MacBookpro16_2023-09-06_12:27:00_1.json + int i = 0; + auto pos = name.find("_"); + auto pos2 = name.find("_", pos + 1); + string score = name.substr(pos + 1, pos2 - pos - 1); + pos = name.find("_", pos2 + 1); + string model = name.substr(pos2 + 1, pos - pos2 - 1); + return { model, score }; } vector BestResults::loadResultFiles() { vector files; using std::filesystem::directory_iterator; + string fileModel, fileScore; for (const auto& file : directory_iterator(path)) { auto fileName = file.path().filename().string(); - if (fileName.find(".json") != string::npos && fileName.find("results_") == 0 - && fileName.find("_" + score + "_") != string::npos - && (fileName.find("_" + model + "_") != string::npos || model == "any")) { - files.push_back(fileName); + if (fileName.find(".json") != string::npos && fileName.find("results_") == 0) { + tie(fileModel, fileScore) = getModelScore(fileName); + if (score == fileScore && (model == fileModel || model == "any")) { + files.push_back(fileName); + } } } return files; } + json BestResults::loadFile(const string& fileName) { ifstream resultData(fileName); @@ -71,9 +87,48 @@ namespace platform { } throw invalid_argument("Unable to open result file. [" + fileName + "]"); } - void BestResults::reportAll() + set BestResults::getModels() { + set models; + auto files = loadResultFiles(); + if (files.size() == 0) { + cerr << Colors::MAGENTA() << "No result files were found!" << Colors::RESET() << endl; + exit(1); + } + string fileModel, fileScore; + for (const auto& file : files) { + // take the model from the file name and add it to a vector of models + // set model to the name of the first model in the vector + // filter files and build the best results file of this model + // repeat for all models + // another for loop to read the best results file of each model and print al together + // each row is a dataset and each column is a model + // the score is the score of the best result of each model for that dataset + // the rows are datasets the columns are models and the cells are the scores + // the first row is the header with the model names + // the first column is the dataset names + // the last column is the average score of each dataset + // the last row is the average score of each model + // the last cell is the average score of all models + // the last row and column are in bold + // extract the model from the file name + tie(fileModel, fileScore) = getModelScore(file); + // add the model to the vector of models + models.insert(fileModel); + } + return models; + } + + void BestResults::buildAll() + { + auto models = getModels(); + for (const auto& model : models) { + cout << "Building best results for model: " << model << endl; + this->model = model; + build(); + } + model = "any"; } void BestResults::reportSingle() @@ -102,7 +157,118 @@ namespace platform { cout << endl; odd = !odd; } - - + } + json BestResults::buildTableResults(set models) + { + int numberOfDatasets = 0; + bool first = true; + json origin; + json table; + for (const auto& model : models) { + this->model = model; + string bestFileName = path + bestResultFile(); + if (FILE* fileTest = fopen(bestFileName.c_str(), "r")) { + fclose(fileTest); + } else { + cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl; + exit(1); + } + auto data = loadFile(bestFileName); + if (first) { + // Get the number of datasets of the first file and check that is the same for all the models + first = false; + numberOfDatasets = data.size(); + origin = data; + } else { + if (numberOfDatasets != data.size()) { + cerr << Colors::MAGENTA() << "The number of datasets in the best results files is not the same for all the models." << Colors::RESET() << endl; + exit(1); + } + } + table[model] = data; + } + return table; + } + void BestResults::printTableResults(set models, json table) + { + cout << Colors::GREEN() << "Best results for " << score << endl; + cout << "------------------------------------------" << endl; + cout << Colors::GREEN() << " # Dataset "; + for (const auto& model : models) { + cout << setw(12) << left << model << " "; + } + cout << endl; + cout << "=== ========================= "; + for (const auto& model : models) { + cout << "============ "; + } + cout << endl; + auto i = 0; + bool odd = true; + map totals; + map ranks; + for (const auto& model : models) { + totals[model] = 0.0; + } + json origin = table.begin().value(); + for (auto const& item : origin.items()) { + auto color = odd ? Colors::BLUE() : Colors::CYAN(); + cout << color << setw(3) << fixed << right << i++ << " "; + cout << setw(25) << left << item.key() << " "; + double maxValue = 0; + vector> ranksOrder; + // Find out the max value for this dataset + for (const auto& model : models) { + double value = table[model].at(item.key()).at(0).get(); + if (value > maxValue) { + maxValue = value; + } + ranksOrder.push_back({ model, value }); + } + // sort the ranksOrder vector by value + sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { + return a.second < b.second; + }); + // Assign the ranks + for (int i = 0; i < ranksOrder.size(); i++) { + ranks[ranksOrder[i].first] = i + 1; + } + // Print the row with red colors on max values + for (const auto& model : models) { + string efectiveColor = color; + double value = table[model].at(item.key()).at(0).get(); + if (value == maxValue) { + efectiveColor = Colors::RED(); + } + totals[model] += value; + cout << efectiveColor << setw(12) << setprecision(10) << fixed << value << " "; + } + cout << endl; + odd = !odd; + } + cout << Colors::GREEN() << "=== ========================= "; + for (const auto& model : models) { + cout << "============ "; + } + cout << endl; + cout << Colors::GREEN() << setw(30) << " Totals..................."; + for (const auto& model : models) { + cout << setw(12) << setprecision(9) << fixed << totals[model] << " "; + } + // Output the averaged ranks + cout << endl; + cout << Colors::GREEN() << setw(30) << " Averaged ranks..........."; + for (const auto& model : models) { + cout << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " "; + } + cout << endl; + } + void BestResults::reportAll() + { + auto models = getModels(); + // Build the table of results + json table = buildTableResults(models); + // Print the table of results + printTableResults(models, table); } } \ No newline at end of file diff --git a/src/Platform/BestResults.h b/src/Platform/BestResults.h index 19e10d7..3ba6b9d 100644 --- a/src/Platform/BestResults.h +++ b/src/Platform/BestResults.h @@ -1,6 +1,7 @@ #ifndef BESTRESULTS_H #define BESTRESULTS_H #include +#include #include using namespace std; using json = nlohmann::json; @@ -11,8 +12,12 @@ namespace platform { string build(); void reportSingle(); void reportAll(); + void buildAll(); private: + set getModels(); vector loadResultFiles(); + json buildTableResults(set models); + void printTableResults(set models, json table); string bestResultFile(); json loadFile(const string& fileName); string path; diff --git a/src/Platform/best.cc b/src/Platform/best.cc index 0280a8a..6e6d432 100644 --- a/src/Platform/best.cc +++ b/src/Platform/best.cc @@ -43,15 +43,14 @@ int main(int argc, char** argv) cerr << program; exit(1); } - if (model == "any" && build) { - cerr << "Can't build best results file for all models. \"any\" is only valid for report" << endl; - cerr << program; - exit(1); - } - auto results = platform::BestResults(platform::Paths::results(), model, score); + auto results = platform::BestResults(platform::Paths::results(), score, model); if (build) { - string fileName = results.build(); - cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl; + if (model == "any") { + results.buildAll(); + } else { + string fileName = results.build(); + cout << Colors::GREEN() << fileName << " created!" << Colors::RESET() << endl; + } } if (report) { if (model == "any") { From 3a7bf4e672f2bc5624bb631385c626c1ad3bec26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Sat, 23 Sep 2023 01:33:23 +0200 Subject: [PATCH 09/11] Fix ranking order mistake --- src/Platform/BestResults.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 87ad3b3..c902c15 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -227,7 +227,7 @@ namespace platform { } // sort the ranksOrder vector by value sort(ranksOrder.begin(), ranksOrder.end(), [](const pair& a, const pair& b) { - return a.second < b.second; + return a.second > b.second; }); // Assign the ranks for (int i = 0; i < ranksOrder.size(); i++) { From de4fa6a04fbefb0bf69fe3996100b9e300fb10c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Sat, 23 Sep 2023 10:30:39 +0200 Subject: [PATCH 10/11] Add color to totals --- src/Platform/BestResults.cc | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index c902c15..6e69043 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -252,14 +252,34 @@ namespace platform { } cout << endl; cout << Colors::GREEN() << setw(30) << " Totals..................."; + double max = 0.0; + for (const auto& total : totals) { + if (total.second > max) { + max = total.second; + } + } for (const auto& model : models) { - cout << setw(12) << setprecision(9) << fixed << totals[model] << " "; + string efectiveColor = Colors::GREEN(); + if (totals[model] == max) { + efectiveColor = Colors::RED(); + } + cout << efectiveColor << setw(12) << setprecision(9) << fixed << totals[model] << " "; } // Output the averaged ranks cout << endl; + int min = 1; + for (const auto& rank : ranks) { + if (rank.second < min) { + min = rank.second; + } + } cout << Colors::GREEN() << setw(30) << " Averaged ranks..........."; for (const auto& model : models) { - cout << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " "; + string efectiveColor = Colors::GREEN(); + if (ranks[model] == min) { + efectiveColor = Colors::RED(); + } + cout << efectiveColor << setw(12) << setprecision(10) << fixed << (double)ranks[model] / (double)origin.size() << " "; } cout << endl; } From 06de13df98ac0730511246f5d03399c9d773446a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Mon, 25 Sep 2023 10:04:53 +0200 Subject: [PATCH 11/11] Add date/time to header of report best --- src/Platform/BestResults.cc | 49 +++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/Platform/BestResults.cc b/src/Platform/BestResults.cc index 6e69043..d529cbf 100644 --- a/src/Platform/BestResults.cc +++ b/src/Platform/BestResults.cc @@ -1,11 +1,30 @@ #include #include #include +#include #include #include "BestResults.h" #include "Result.h" #include "Colors.h" + + +namespace fs = std::filesystem; +// function ftime_to_string, Code taken from +// https://stackoverflow.com/a/58237530/1389271 +template +std::string ftime_to_string(TP tp) +{ + using namespace std::chrono; + auto sctp = time_point_cast(tp - TP::clock::now() + + system_clock::now()); + auto tt = system_clock::to_time_t(sctp); + std::tm* gmt = std::gmtime(&tt); + std::stringstream buffer; + buffer << std::put_time(gmt, "%Y-%m-%d %H:%M"); + return buffer.str(); +} + namespace platform { string BestResults::build() @@ -97,21 +116,6 @@ namespace platform { } string fileModel, fileScore; for (const auto& file : files) { - // take the model from the file name and add it to a vector of models - // set model to the name of the first model in the vector - // filter files and build the best results file of this model - // repeat for all models - // another for loop to read the best results file of each model and print al together - // each row is a dataset and each column is a model - // the score is the score of the best result of each model for that dataset - // the rows are datasets the columns are models and the cells are the scores - // the first row is the header with the model names - // the first column is the dataset names - // the last column is the average score of each dataset - // the last row is the average score of each model - // the last cell is the average score of all models - // the last row and column are in bold - // extract the model from the file name tie(fileModel, fileScore) = getModelScore(file); // add the model to the vector of models @@ -140,9 +144,10 @@ namespace platform { cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl; exit(1); } + auto date = ftime_to_string(filesystem::last_write_time(bestFileName)); auto data = loadFile(bestFileName); - cout << Colors::GREEN() << "Best results for " << model << " and " << score << endl; - cout << "------------------------------------------" << endl; + cout << Colors::GREEN() << "Best results for " << model << " and " << score << " as of " << date << endl; + cout << "--------------------------------------------------------" << endl; cout << Colors::GREEN() << " # Dataset Score File Hyperparameters" << endl; cout << "=== ========================= =========== ================================================================== ================================================= " << endl; auto i = 0; @@ -164,6 +169,7 @@ namespace platform { bool first = true; json origin; json table; + auto maxDate = filesystem::file_time_type::max(); for (const auto& model : models) { this->model = model; string bestFileName = path + bestResultFile(); @@ -173,6 +179,10 @@ namespace platform { cerr << Colors::MAGENTA() << "File " << bestFileName << " doesn't exist." << Colors::RESET() << endl; exit(1); } + auto dateWrite = filesystem::last_write_time(bestFileName); + if (dateWrite < maxDate) { + maxDate = dateWrite; + } auto data = loadFile(bestFileName); if (first) { // Get the number of datasets of the first file and check that is the same for all the models @@ -187,12 +197,13 @@ namespace platform { } table[model] = data; } + table["dateTable"] = ftime_to_string(maxDate); return table; } void BestResults::printTableResults(set models, json table) { - cout << Colors::GREEN() << "Best results for " << score << endl; - cout << "------------------------------------------" << endl; + cout << Colors::GREEN() << "Best results for " << score << " as of " << table.at("dateTable").get() << endl; + cout << "------------------------------------------------" << endl; cout << Colors::GREEN() << " # Dataset "; for (const auto& model : models) { cout << setw(12) << left << model << " ";