From 4628e48d3c923dbb1336efc217a76c45219e1b61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Mon, 20 Nov 2023 23:32:34 +0100 Subject: [PATCH 1/9] Build gridsearch structure --- .gitignore | 3 +- Makefile | 10 ++--- sample/sample.cc | 2 +- src/Platform/CMakeLists.txt | 12 +++--- src/Platform/Experiment.h | 16 +------- src/Platform/GridSearch.cc | 32 +++++++++++++++ src/Platform/GridSearch.h | 30 ++++++++++++++ src/Platform/Paths.h | 1 + src/Platform/Timer.h | 34 ++++++++++++++++ src/Platform/b_best.cc | 2 +- src/Platform/b_grid.cc | 80 +++++++++++++++++++++++++++++++++++++ src/Platform/b_main.cc | 6 +-- src/Platform/b_manage.cc | 2 +- 13 files changed, 197 insertions(+), 33 deletions(-) create mode 100644 src/Platform/GridSearch.cc create mode 100644 src/Platform/GridSearch.h create mode 100644 src/Platform/Timer.h create mode 100644 src/Platform/b_grid.cc diff --git a/.gitignore b/.gitignore index 424b902..268bb77 100644 --- a/.gitignore +++ b/.gitignore @@ -32,8 +32,7 @@ *.out *.app build/** -build_debug/** -build_release/** +build_*/** *.dSYM/** cmake-build*/** .idea diff --git a/Makefile b/Makefile index f6650a2..cb82162 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ SHELL := /bin/bash f_release = build_release f_debug = build_debug -app_targets = b_best b_list b_main b_manage +app_targets = b_best b_list b_main b_manage b_grid test_targets = unit_tests_bayesnet unit_tests_platform n_procs = -j 16 @@ -36,10 +36,10 @@ install: ## Copy binary files to bin folder @echo "Destination folder: $(dest)" make buildr @echo ">>> Copying files to $(dest)" - @cp $(f_release)/src/Platform/b_main $(dest) - @cp $(f_release)/src/Platform/b_list $(dest) - @cp $(f_release)/src/Platform/b_manage $(dest) - @cp $(f_release)/src/Platform/b_best $(dest) + for item in $(app_targets); do \ + echo ">>> Copying $$item" ; \ + cp $(f_release)/src/Platform/$$item $(dest) ; \ + done dependency: ## Create a dependency graph diagram of the project (build/dependency.png) @echo ">>> Creating dependency graph diagram of the project..."; diff --git a/sample/sample.cc b/sample/sample.cc index d5f84e9..8024707 100644 --- a/sample/sample.cc +++ b/sample/sample.cc @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index fba1656..8fc33a4 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -8,12 +8,14 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/json/include) include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) include_directories(${Python3_INCLUDE_DIRS}) +add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) +add_executable(b_grid b_grid.cc GridSearch.cc Folding.cc) +add_executable(b_list b_list.cc Datasets.cc Dataset.cc) add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc) add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) -add_executable(b_list b_list.cc Datasets.cc Dataset.cc) -add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) -target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap) -target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp) -target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}") \ No newline at end of file +target_link_libraries(b_grid BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap) +target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}") +target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap) +target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) \ No newline at end of file diff --git a/src/Platform/Experiment.h b/src/Platform/Experiment.h index c00d7ff..b7aeda6 100644 --- a/src/Platform/Experiment.h +++ b/src/Platform/Experiment.h @@ -3,30 +3,16 @@ #include #include #include -#include #include "Folding.h" #include "BaseClassifier.h" #include "HyperParameters.h" #include "TAN.h" #include "KDB.h" #include "AODE.h" +#include "Timer.h" namespace platform { using json = nlohmann::json; - class Timer { - private: - std::chrono::high_resolution_clock::time_point begin; - public: - Timer() = default; - ~Timer() = default; - void start() { begin = std::chrono::high_resolution_clock::now(); } - double getDuration() - { - std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); - std::chrono::duration time_span = std::chrono::duration_cast> (end - begin); - return time_span.count(); - } - }; class Result { private: std::string dataset, model_version; diff --git a/src/Platform/GridSearch.cc b/src/Platform/GridSearch.cc new file mode 100644 index 0000000..1a9d1f2 --- /dev/null +++ b/src/Platform/GridSearch.cc @@ -0,0 +1,32 @@ +#include "GridSearch.h" + +namespace platform { + + GridSearch::GridSearch(struct ConfigGrid& config) : config(config) + { + this->config.input_file = config.path + "grid_" + config.model + "_input.json"; + this->config.output_file = config.path + "grid_" + config.model + "_output.json"; + } + void GridSearch::go() + { + // // Load datasets + // auto datasets = platform::Datasets(config.input_file); + // // Load hyperparameters + // auto hyperparameters = platform::HyperParameters(datasets.getNames(), config.input_file); + // // Check if hyperparameters are valid + // auto valid_hyperparameters = platform::Models::instance()->getHyperparameters(config.model); + // hyperparameters.check(valid_hyperparameters, config.model); + // // Load model + // auto model = platform::Models::instance()->get(config.model); + // // Run gridsearch + // auto grid = platform::Grid(datasets, hyperparameters, model, config.score, config.discretize, config.stratified, config.n_folds, config.seeds); + // grid.run(); + // // Save results + // grid.save(config.output_file); + } + void GridSearch::save() + { + + } + +} /* namespace platform */ \ No newline at end of file diff --git a/src/Platform/GridSearch.h b/src/Platform/GridSearch.h new file mode 100644 index 0000000..9ded996 --- /dev/null +++ b/src/Platform/GridSearch.h @@ -0,0 +1,30 @@ +#ifndef GRIDSEARCH_H +#define GRIDSEARCH_H +#include +#include + +namespace platform { + struct ConfigGrid { + std::string model; + std::string score; + std::string path; + std::string input_file; + std::string output_file; + bool discretize; + bool stratified; + int n_folds; + std::vector seeds; + }; + class GridSearch { + public: + explicit GridSearch(struct ConfigGrid& config); + void go(); + void save(); + ~GridSearch() = default; + private: + struct ConfigGrid config; + + }; + +} /* namespace platform */ +#endif /* GRIDSEARCH_H */ \ No newline at end of file diff --git a/src/Platform/Paths.h b/src/Platform/Paths.h index c642cae..d3c4422 100644 --- a/src/Platform/Paths.h +++ b/src/Platform/Paths.h @@ -9,6 +9,7 @@ namespace platform { static std::string hiddenResults() { return "hidden_results/"; } static std::string excel() { return "excel/"; } static std::string cfs() { return "cfs/"; } + static std::string grid() { return "grid/"; } static std::string datasets() { auto env = platform::DotEnv(); diff --git a/src/Platform/Timer.h b/src/Platform/Timer.h new file mode 100644 index 0000000..87db481 --- /dev/null +++ b/src/Platform/Timer.h @@ -0,0 +1,34 @@ +#ifndef TIMER_H +#define TIMER_H +#include +#include +#include + +namespace platform { + class Timer { + private: + std::chrono::high_resolution_clock::time_point begin; + std::chrono::high_resolution_clock::time_point end; + public: + Timer() = default; + ~Timer() = default; + void start() { begin = std::chrono::high_resolution_clock::now(); } + void stop() { end = std::chrono::high_resolution_clock::now(); } + double getDuration() + { + stop(); + std::chrono::duration time_span = std::chrono::duration_cast> (end - begin); + return time_span.count(); + } + std::string getDurationString() + { + double duration = getDuration(); + double durationShow = duration > 3600 ? duration / 3600 : duration > 60 ? duration / 60 : duration; + std::string durationUnit = duration > 3600 ? "h" : duration > 60 ? "m" : "s"; + std::stringstream ss; + ss << std::setw(7) << std::setprecision(2) << std::fixed << durationShow << " " << durationUnit << " "; + return ss.str(); + } + }; +} /* namespace platform */ +#endif /* TIMER_H */ \ No newline at end of file diff --git a/src/Platform/b_best.cc b/src/Platform/b_best.cc index b559d03..1ed73c7 100644 --- a/src/Platform/b_best.cc +++ b/src/Platform/b_best.cc @@ -7,7 +7,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) { - argparse::ArgumentParser program("best"); + argparse::ArgumentParser program("b_sbest"); program.add_argument("-m", "--model").default_value("").help("Filter results of the selected model) (any for all models)"); program.add_argument("-s", "--score").default_value("").help("Filter results of the score name supplied"); program.add_argument("--build").help("build best score results file").default_value(false).implicit_value(true); diff --git a/src/Platform/b_grid.cc b/src/Platform/b_grid.cc new file mode 100644 index 0000000..0d0851c --- /dev/null +++ b/src/Platform/b_grid.cc @@ -0,0 +1,80 @@ +#include +#include +#include "DotEnv.h" +#include "Models.h" +#include "modelRegister.h" +#include "GridSearch.h" +#include "Paths.h" +#include "Timer.h" + + + +argparse::ArgumentParser manageArguments(std::string program_name) +{ + auto env = platform::DotEnv(); + argparse::ArgumentParser program(program_name); + program.add_argument("-m", "--model") + .help("Model to use " + platform::Models::instance()->tostring()) + .action([](const std::string& value) { + static const std::vector choices = platform::Models::instance()->getNames(); + if (find(choices.begin(), choices.end(), value) != choices.end()) { + return value; + } + throw std::runtime_error("Model must be one of " + platform::Models::instance()->tostring()); + } + ); + program.add_argument("--discretize").help("Discretize input datasets").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); + program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); + program.add_argument("--score").help("Score used in gridsearch").default_value("accuracy"); + program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) { + try { + auto k = stoi(value); + if (k < 2) { + throw std::runtime_error("Number of folds must be greater than 1"); + } + return k; + } + catch (const runtime_error& err) { + throw std::runtime_error(err.what()); + } + catch (...) { + throw std::runtime_error("Number of folds must be an integer"); + }}); + auto seed_values = env.getSeeds(); + program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); + return program; +} + +int main(int argc, char** argv) +{ + auto program = manageArguments("b_grid"); + struct platform::ConfigGrid config; + try { + program.parse_args(argc, argv); + config.model = program.get("model"); + config.score = program.get("score"); + config.discretize = program.get("discretize"); + config.stratified = program.get("stratified"); + config.n_folds = program.get("folds"); + config.seeds = program.get>("seeds"); + } + catch (const exception& err) { + cerr << err.what() << std::endl; + cerr << program; + exit(1); + } + + /* + * Begin Processing + */ + auto env = platform::DotEnv(); + config.path = platform::Paths::grid(); + auto grid_search = platform::GridSearch(config); + platform::Timer timer; + timer.start(); + grid_search.go(); + std::cout << "Process took " << timer.getDurationString() << std::endl; + grid_search.save(); + std::cout << "Done!" << std::endl; + return 0; +} diff --git a/src/Platform/b_main.cc b/src/Platform/b_main.cc index bf2c703..c09f071 100644 --- a/src/Platform/b_main.cc +++ b/src/Platform/b_main.cc @@ -11,10 +11,10 @@ using json = nlohmann::json; -argparse::ArgumentParser manageArguments() +argparse::ArgumentParser manageArguments(std::string program_name) { auto env = platform::DotEnv(); - argparse::ArgumentParser program("main"); + argparse::ArgumentParser program(program_name); program.add_argument("-d", "--dataset").default_value("").help("Dataset file name"); program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment"); program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \ @@ -61,7 +61,7 @@ int main(int argc, char** argv) std::vector seeds; std::vector filesToTest; int n_folds; - auto program = manageArguments(); + auto program = manageArguments("b_main"); try { program.parse_args(argc, argv); file_name = program.get("dataset"); diff --git a/src/Platform/b_manage.cc b/src/Platform/b_manage.cc index d4b6fa1..1067902 100644 --- a/src/Platform/b_manage.cc +++ b/src/Platform/b_manage.cc @@ -5,7 +5,7 @@ argparse::ArgumentParser manageArguments(int argc, char** argv) { - argparse::ArgumentParser program("manage"); + argparse::ArgumentParser program("b_manage"); program.add_argument("-n", "--number").default_value(0).help("Number of results to show (0 = all)").scan<'i', int>(); program.add_argument("-m", "--model").default_value("any").help("Filter results of the selected model)"); program.add_argument("-s", "--score").default_value("any").help("Filter results of the score name supplied"); From 495d8a8528b857904447db0840c6bec7c341f22c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 21 Nov 2023 13:11:14 +0100 Subject: [PATCH 2/9] Begin implementing grid combinations --- Makefile | 4 ++- src/Platform/CMakeLists.txt | 4 +-- src/Platform/GridData.cc | 67 +++++++++++++++++++++++++++++++++++++ src/Platform/GridData.h | 21 ++++++++++++ src/Platform/GridSearch.cc | 18 ++++++---- src/Platform/GridSearch.h | 4 +-- src/Platform/b_grid.cc | 2 -- 7 files changed, 107 insertions(+), 13 deletions(-) create mode 100644 src/Platform/GridData.cc create mode 100644 src/Platform/GridData.h diff --git a/Makefile b/Makefile index cb82162..2d3c9c1 100644 --- a/Makefile +++ b/Makefile @@ -35,8 +35,10 @@ dest ?= ${HOME}/bin install: ## Copy binary files to bin folder @echo "Destination folder: $(dest)" make buildr + @echo "*******************************************" @echo ">>> Copying files to $(dest)" - for item in $(app_targets); do \ + @echo "*******************************************" + @for item in $(app_targets); do \ echo ">>> Copying $$item" ; \ cp $(f_release)/src/Platform/$$item $(dest) ; \ done diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 8fc33a4..6063bdc 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -9,13 +9,13 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) include_directories(${Python3_INCLUDE_DIRS}) add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) -add_executable(b_grid b_grid.cc GridSearch.cc Folding.cc) +add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc Folding.cc Datasets.cc Dataset.cc) add_executable(b_list b_list.cc Datasets.cc Dataset.cc) add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc) add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) target_link_libraries(b_best Boost::boost "${XLSXWRITER_LIB}" "${TORCH_LIBRARIES}" ArffFiles mdlp) -target_link_libraries(b_grid BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap) +target_link_libraries(b_grid BayesNet PyWrap) target_link_libraries(b_list ArffFiles mdlp "${TORCH_LIBRARIES}") target_link_libraries(b_main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}" PyWrap) target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" ArffFiles mdlp) \ No newline at end of file diff --git a/src/Platform/GridData.cc b/src/Platform/GridData.cc new file mode 100644 index 0000000..6514616 --- /dev/null +++ b/src/Platform/GridData.cc @@ -0,0 +1,67 @@ +#include "GridData.h" +#include + +namespace platform { + GridData::GridData() + { + auto boostaode = R"( + [ + { + "convergence": [true, false], + "ascending": [true, false], + "repeatSparent": [true, false], + "select_features": ["CFS", "FCBF"], + "tolerance": [0, 3, 5], + "threshold": [1e-7] + }, + { + "convergence": [true, false], + "ascending": [true, false], + "repeatSparent": [true, false], + "select_features": ["IWSS"], + "tolerance": [0, 3, 5], + "threshold": [0.5] + + } + ] + )"_json; + grid["BoostAODE"] = boostaode; + } + int GridData::computeNumCombinations(const json& line) + { + int numCombinations = 1; + for (const auto& item : line) { + for (const auto& hyperparam : item.items()) { + numCombinations *= item.size(); + } + } + return numCombinations; + } + std::vector GridData::doCombination(const std::string& model) + { + int numTotal = 0; + for (const auto& item : grid[model]) { + numTotal += computeNumCombinations(item); + } + auto result = std::vector(numTotal); + int base = 0; + for (const auto& item : grid[model]) { + int numCombinations = computeNumCombinations(item); + int line = 0; + for (const auto& hyperparam : item.items()) { + int numValues = hyperparam.value().size(); + for (const auto& value : hyperparam.value()) { + for (int i = 0; i < numCombinations / numValues; i++) { + result[base + line++][hyperparam.key()] = value; + //std::cout << "line=" << base + line << " " << hyperparam.key() << "=" << value << std::endl; + } + } + } + base += numCombinations; + } + for (const auto& item : result) { + std::cout << item.dump() << std::endl; + } + return result; + } +} /* namespace platform */ \ No newline at end of file diff --git a/src/Platform/GridData.h b/src/Platform/GridData.h new file mode 100644 index 0000000..de60986 --- /dev/null +++ b/src/Platform/GridData.h @@ -0,0 +1,21 @@ +#ifndef GRIDDATA_H +#define GRIDDATA_H +#include +#include +#include +#include + +namespace platform { + using json = nlohmann::json; + class GridData { + public: + GridData(); + ~GridData() = default; + std::vector getGrid(const std::string& model) { return doCombination(model); } + private: + int computeNumCombinations(const json& line); + std::vector doCombination(const std::string& model); + std::map grid; + }; +} /* namespace platform */ +#endif /* GRIDDATA_H */ \ No newline at end of file diff --git a/src/Platform/GridSearch.cc b/src/Platform/GridSearch.cc index 1a9d1f2..3bd3f67 100644 --- a/src/Platform/GridSearch.cc +++ b/src/Platform/GridSearch.cc @@ -1,19 +1,25 @@ +#include #include "GridSearch.h" +#include "Paths.h" +#include "Datasets.h" +#include "HyperParameters.h" namespace platform { - GridSearch::GridSearch(struct ConfigGrid& config) : config(config) { - this->config.input_file = config.path + "grid_" + config.model + "_input.json"; this->config.output_file = config.path + "grid_" + config.model + "_output.json"; } void GridSearch::go() { - // // Load datasets - // auto datasets = platform::Datasets(config.input_file); - // // Load hyperparameters + // Load datasets + auto datasets = platform::Datasets(config.discretize, Paths::datasets()); + int i = 0; + for (const auto& item : grid.getGrid("BoostAODE")) { + std::cout << i++ << " hyperparams: " << item.dump() << std::endl; + } + // Load hyperparameters // auto hyperparameters = platform::HyperParameters(datasets.getNames(), config.input_file); - // // Check if hyperparameters are valid + // Check if hyperparameters are valid // auto valid_hyperparameters = platform::Models::instance()->getHyperparameters(config.model); // hyperparameters.check(valid_hyperparameters, config.model); // // Load model diff --git a/src/Platform/GridSearch.h b/src/Platform/GridSearch.h index 9ded996..1db303c 100644 --- a/src/Platform/GridSearch.h +++ b/src/Platform/GridSearch.h @@ -2,6 +2,7 @@ #define GRIDSEARCH_H #include #include +#include "GridData.h" namespace platform { struct ConfigGrid { @@ -23,8 +24,7 @@ namespace platform { ~GridSearch() = default; private: struct ConfigGrid config; - + GridData grid; }; - } /* namespace platform */ #endif /* GRIDSEARCH_H */ \ No newline at end of file diff --git a/src/Platform/b_grid.cc b/src/Platform/b_grid.cc index 0d0851c..de905bf 100644 --- a/src/Platform/b_grid.cc +++ b/src/Platform/b_grid.cc @@ -8,7 +8,6 @@ #include "Timer.h" - argparse::ArgumentParser manageArguments(std::string program_name) { auto env = platform::DotEnv(); @@ -63,7 +62,6 @@ int main(int argc, char** argv) cerr << program; exit(1); } - /* * Begin Processing */ From b657762c0c8d1df91fb9b1af101133da441d3e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Wed, 22 Nov 2023 00:18:24 +0100 Subject: [PATCH 3/9] Generate combinations sample --- src/Platform/combinations.cc | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/Platform/combinations.cc diff --git a/src/Platform/combinations.cc b/src/Platform/combinations.cc new file mode 100644 index 0000000..fbaca83 --- /dev/null +++ b/src/Platform/combinations.cc @@ -0,0 +1,57 @@ +#include +#include +#include + +using json = nlohmann::json; + +json generateCombinations(json::iterator index, const json::iterator last, std::vector& output, json currentCombination) +{ + if (index == last) { + // If we reached the end of input, store the current combination + output.push_back(currentCombination); + return currentCombination; + } + const auto& key = index.key(); + const auto& values = index.value(); + for (const auto& value : values) { + auto combination = currentCombination; + combination[key] = value; + json::iterator nextIndex = index; + generateCombinations(++nextIndex, last, output, combination); + } + return currentCombination; +} + +int main() +{ + json input = R"( + [ + { + "convergence": [true, false], + "ascending": [true, false], + "repeatSparent": [true, false], + "select_features": ["CFS", "FCBF"], + "tolerance": [0, 3, 5], + "threshold": [1e-7] + }, + { + "convergence": [true, false], + "ascending": [true, false], + "repeatSparent": [true, false], + "select_features": ["IWSS"], + "tolerance": [0, 3, 5], + "threshold": [0.5] + } + ] + )"_json; + auto output = std::vector(); + for (json line : input) { + generateCombinations(line.begin(), line.end(), output, json({})); + } + // Print the generated combinations + int i = 0; + for (const auto& item : output) { + std::cout << i++ << " " << item.dump() << std::endl; + } + return 0; +} From fb347ed5b96f8f2df15a93179eba360c068930fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Wed, 22 Nov 2023 12:22:30 +0100 Subject: [PATCH 4/9] Begin gridsearch implementation --- src/Platform/CMakeLists.txt | 2 +- src/Platform/Experiment.cc | 2 +- src/Platform/GridData.cc | 54 +++++++++++---------- src/Platform/GridData.h | 5 +- src/Platform/GridSearch.cc | 93 ++++++++++++++++++++++++++++-------- src/Platform/GridSearch.h | 3 ++ src/Platform/combinations.cc | 57 ---------------------- 7 files changed, 110 insertions(+), 106 deletions(-) delete mode 100644 src/Platform/combinations.cc diff --git a/src/Platform/CMakeLists.txt b/src/Platform/CMakeLists.txt index 6063bdc..d35989f 100644 --- a/src/Platform/CMakeLists.txt +++ b/src/Platform/CMakeLists.txt @@ -9,7 +9,7 @@ include_directories(${BayesNet_SOURCE_DIR}/lib/libxlsxwriter/include) include_directories(${Python3_INCLUDE_DIRS}) add_executable(b_best b_best.cc BestResults.cc Result.cc Statistics.cc BestResultsExcel.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) -add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc Folding.cc Datasets.cc Dataset.cc) +add_executable(b_grid b_grid.cc GridSearch.cc GridData.cc HyperParameters.cc Folding.cc Datasets.cc Dataset.cc) add_executable(b_list b_list.cc Datasets.cc Dataset.cc) add_executable(b_main b_main.cc Folding.cc Experiment.cc Datasets.cc Dataset.cc Models.cc HyperParameters.cc ReportConsole.cc ReportBase.cc) add_executable(b_manage b_manage.cc Results.cc ManageResults.cc CommandParser.cc Result.cc ReportConsole.cc ReportExcel.cc ReportBase.cc Datasets.cc Dataset.cc ExcelFile.cc) diff --git a/src/Platform/Experiment.cc b/src/Platform/Experiment.cc index 5e80fc3..1574f73 100644 --- a/src/Platform/Experiment.cc +++ b/src/Platform/Experiment.cc @@ -133,7 +133,7 @@ namespace platform { } void Experiment::cross_validation(const std::string& fileName, bool quiet) { - auto datasets = platform::Datasets(discretized, Paths::datasets()); + auto datasets = Datasets(discretized, Paths::datasets()); // Get dataset auto [X, y] = datasets.getTensors(fileName); auto states = datasets.getStates(fileName); diff --git a/src/Platform/GridData.cc b/src/Platform/GridData.cc index 6514616..5935d73 100644 --- a/src/Platform/GridData.cc +++ b/src/Platform/GridData.cc @@ -30,37 +30,41 @@ namespace platform { int GridData::computeNumCombinations(const json& line) { int numCombinations = 1; - for (const auto& item : line) { - for (const auto& hyperparam : item.items()) { - numCombinations *= item.size(); - } + for (const auto& item : line.items()) { + numCombinations *= item.value().size(); } return numCombinations; } - std::vector GridData::doCombination(const std::string& model) + int GridData::getNumCombinations(const std::string& model) { - int numTotal = 0; - for (const auto& item : grid[model]) { - numTotal += computeNumCombinations(item); + int numCombinations = 0; + for (const auto& line : grid.at(model)) { + numCombinations += computeNumCombinations(line); } - auto result = std::vector(numTotal); - int base = 0; - for (const auto& item : grid[model]) { - int numCombinations = computeNumCombinations(item); - int line = 0; - for (const auto& hyperparam : item.items()) { - int numValues = hyperparam.value().size(); - for (const auto& value : hyperparam.value()) { - for (int i = 0; i < numCombinations / numValues; i++) { - result[base + line++][hyperparam.key()] = value; - //std::cout << "line=" << base + line << " " << hyperparam.key() << "=" << value << std::endl; - } - } - } - base += numCombinations; + return numCombinations; + } + json GridData::generateCombinations(json::iterator index, const json::iterator last, std::vector& output, json currentCombination) + { + if (index == last) { + // If we reached the end of input, store the current combination + output.push_back(currentCombination); + return currentCombination; } - for (const auto& item : result) { - std::cout << item.dump() << std::endl; + const auto& key = index.key(); + const auto& values = index.value(); + for (const auto& value : values) { + auto combination = currentCombination; + combination[key] = value; + json::iterator nextIndex = index; + generateCombinations(++nextIndex, last, output, combination); + } + return currentCombination; + } + std::vector GridData::getGrid(const std::string& model) + { + auto result = std::vector(); + for (json line : grid.at(model)) { + generateCombinations(line.begin(), line.end(), result, json({})); } return result; } diff --git a/src/Platform/GridData.h b/src/Platform/GridData.h index de60986..87ab74c 100644 --- a/src/Platform/GridData.h +++ b/src/Platform/GridData.h @@ -11,10 +11,11 @@ namespace platform { public: GridData(); ~GridData() = default; - std::vector getGrid(const std::string& model) { return doCombination(model); } + std::vector getGrid(const std::string& model); + int getNumCombinations(const std::string& model); private: + json generateCombinations(json::iterator index, const json::iterator last, std::vector& output, json currentCombination); int computeNumCombinations(const json& line); - std::vector doCombination(const std::string& model); std::map grid; }; } /* namespace platform */ diff --git a/src/Platform/GridSearch.cc b/src/Platform/GridSearch.cc index 3bd3f67..e5f072a 100644 --- a/src/Platform/GridSearch.cc +++ b/src/Platform/GridSearch.cc @@ -1,38 +1,91 @@ #include +#include #include "GridSearch.h" +#include "Models.h" #include "Paths.h" -#include "Datasets.h" -#include "HyperParameters.h" +#include "Folding.h" +#include "Colors.h" namespace platform { GridSearch::GridSearch(struct ConfigGrid& config) : config(config) { this->config.output_file = config.path + "grid_" + config.model + "_output.json"; } + void showProgress(int fold, const std::string& color, const std::string& phase) + { + std::string prefix = phase == "a" ? "" : "\b\b\b\b"; + std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush; + } + std::string getColor(bayesnet::status_t status) + { + switch (status) { + case bayesnet::NORMAL: + return Colors::GREEN(); + case bayesnet::WARNING: + return Colors::YELLOW(); + case bayesnet::ERROR: + return Colors::RED(); + default: + return Colors::RESET(); + } + } + void GridSearch::processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters) + { + // Get dataset + auto [X, y] = datasets.getTensors(fileName); + auto states = datasets.getStates(fileName); + auto features = datasets.getFeatures(fileName); + auto samples = datasets.getNSamples(fileName); + auto className = datasets.getClassName(fileName); + std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush; + for (const auto& seed : config.seeds) { + std::cout << "(" << seed << ") doing Fold: " << flush; + Fold* fold; + if (config.stratified) + fold = new StratifiedKFold(config.n_folds, y, seed); + else + fold = new KFold(config.n_folds, y.size(0), seed); + for (int nfold = 0; nfold < config.n_folds; nfold++) { + auto clf = Models::instance()->create(config.model); + auto [train, test] = fold->getFold(nfold); + // auto train_t = torch::tensor(train); + // auto test_t = torch::tensor(test); + // auto X_train = X.index({ "...", train_t }); + // auto y_train = y.index({ train_t }); + // auto X_test = X.index({ "...", test_t }); + // auto y_test = y.index({ test_t }); + showProgress(nfold + 1, getColor(clf->getStatus()), "a"); + // Train model + // clf->fit(X_train, y_train, features, className, states); + showProgress(nfold + 1, getColor(clf->getStatus()), "b"); + } + delete fold; + } + } void GridSearch::go() { // Load datasets - auto datasets = platform::Datasets(config.discretize, Paths::datasets()); - int i = 0; - for (const auto& item : grid.getGrid("BoostAODE")) { - std::cout << i++ << " hyperparams: " << item.dump() << std::endl; + auto datasets = Datasets(config.discretize, Paths::datasets()); + // Create model + std::cout << "***************** Starting Gridsearch *****************" << std::endl; + std::cout << "* Doing " << grid.getNumCombinations(config.model) << " combinations for each dataset/seed/fold" << std::endl; + // Generate hyperparameters grid & run gridsearch + // Check each combination of hyperparameters for each dataset and each seed + for (const auto& dataset : datasets.getNames()) { + std::cout << "- " << setw(20) << left << dataset << " " << right << flush; + for (const auto& hyperparam_line : grid.getGrid(config.model)) { + auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line); + processFile(dataset, datasets, hyperparameters); + } + std::cout << std::endl; } - // Load hyperparameters - // auto hyperparameters = platform::HyperParameters(datasets.getNames(), config.input_file); - // Check if hyperparameters are valid - // auto valid_hyperparameters = platform::Models::instance()->getHyperparameters(config.model); - // hyperparameters.check(valid_hyperparameters, config.model); - // // Load model - // auto model = platform::Models::instance()->get(config.model); - // // Run gridsearch - // auto grid = platform::Grid(datasets, hyperparameters, model, config.score, config.discretize, config.stratified, config.n_folds, config.seeds); - // grid.run(); - // // Save results - // grid.save(config.output_file); + // Save results + save(); } void GridSearch::save() { - + std::ofstream file(config.output_file); + // file << results.dump(4); + file.close(); } - } /* namespace platform */ \ No newline at end of file diff --git a/src/Platform/GridSearch.h b/src/Platform/GridSearch.h index 1db303c..220eccc 100644 --- a/src/Platform/GridSearch.h +++ b/src/Platform/GridSearch.h @@ -2,6 +2,8 @@ #define GRIDSEARCH_H #include #include +#include "Datasets.h" +#include "HyperParameters.h" #include "GridData.h" namespace platform { @@ -23,6 +25,7 @@ namespace platform { void save(); ~GridSearch() = default; private: + void processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters); struct ConfigGrid config; GridData grid; }; diff --git a/src/Platform/combinations.cc b/src/Platform/combinations.cc deleted file mode 100644 index fbaca83..0000000 --- a/src/Platform/combinations.cc +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include -#include - -using json = nlohmann::json; - -json generateCombinations(json::iterator index, const json::iterator last, std::vector& output, json currentCombination) -{ - if (index == last) { - // If we reached the end of input, store the current combination - output.push_back(currentCombination); - return currentCombination; - } - const auto& key = index.key(); - const auto& values = index.value(); - for (const auto& value : values) { - auto combination = currentCombination; - combination[key] = value; - json::iterator nextIndex = index; - generateCombinations(++nextIndex, last, output, combination); - } - return currentCombination; -} - -int main() -{ - json input = R"( - [ - { - "convergence": [true, false], - "ascending": [true, false], - "repeatSparent": [true, false], - "select_features": ["CFS", "FCBF"], - "tolerance": [0, 3, 5], - "threshold": [1e-7] - }, - { - "convergence": [true, false], - "ascending": [true, false], - "repeatSparent": [true, false], - "select_features": ["IWSS"], - "tolerance": [0, 3, 5], - "threshold": [0.5] - } - ] - )"_json; - auto output = std::vector(); - for (json line : input) { - generateCombinations(line.begin(), line.end(), output, json({})); - } - // Print the generated combinations - int i = 0; - for (const auto& item : output) { - std::cout << i++ << " " << item.dump() << std::endl; - } - return 0; -} From c2eb727fc7cda664d2c183cfb61f04a5f3f631e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Wed, 22 Nov 2023 16:30:04 +0100 Subject: [PATCH 5/9] Complete output interface of gridsearch --- src/Platform/GridSearch.cc | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/Platform/GridSearch.cc b/src/Platform/GridSearch.cc index e5f072a..73c9ba3 100644 --- a/src/Platform/GridSearch.cc +++ b/src/Platform/GridSearch.cc @@ -11,7 +11,14 @@ namespace platform { { this->config.output_file = config.path + "grid_" + config.model + "_output.json"; } - void showProgress(int fold, const std::string& color, const std::string& phase) + void showProgressComb(const int num, const int total, const std::string& color) + { + int spaces = int(log(total) / log(10)) + 1; + int magic = 37 + 2 * spaces; + std::string prefix = num == 1 ? "" : string(magic, '\b') + string(magic + 1, ' ') + string(magic + 1, '\b'); + std::cout << prefix << color << "(" << setw(spaces) << num << "/" << setw(spaces) << total << ") " << Colors::RESET() << flush; + } + void showProgressFold(int fold, const std::string& color, const std::string& phase) { std::string prefix = phase == "a" ? "" : "\b\b\b\b"; std::cout << prefix << color << fold << Colors::RESET() << "(" << color << phase << Colors::RESET() << ")" << flush; @@ -37,7 +44,6 @@ namespace platform { auto features = datasets.getFeatures(fileName); auto samples = datasets.getNSamples(fileName); auto className = datasets.getClassName(fileName); - std::cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush; for (const auto& seed : config.seeds) { std::cout << "(" << seed << ") doing Fold: " << flush; Fold* fold; @@ -54,10 +60,13 @@ namespace platform { // auto y_train = y.index({ train_t }); // auto X_test = X.index({ "...", test_t }); // auto y_test = y.index({ test_t }); - showProgress(nfold + 1, getColor(clf->getStatus()), "a"); + showProgressFold(nfold + 1, getColor(clf->getStatus()), "a"); // Train model // clf->fit(X_train, y_train, features, className, states); - showProgress(nfold + 1, getColor(clf->getStatus()), "b"); + showProgressFold(nfold + 1, getColor(clf->getStatus()), "b"); + showProgressFold(nfold + 1, getColor(clf->getStatus()), "c"); + sleep(1); + std::cout << "\b\b\b, " << flush; } delete fold; } @@ -68,12 +77,15 @@ namespace platform { auto datasets = Datasets(config.discretize, Paths::datasets()); // Create model std::cout << "***************** Starting Gridsearch *****************" << std::endl; - std::cout << "* Doing " << grid.getNumCombinations(config.model) << " combinations for each dataset/seed/fold" << std::endl; + auto totalComb = grid.getNumCombinations(config.model); + std::cout << "* Doing " << totalComb << " combinations for each dataset/seed/fold" << std::endl; // Generate hyperparameters grid & run gridsearch - // Check each combination of hyperparameters for each dataset and each seed + // Check each combination of hyperparameters for each dataset and each seed for (const auto& dataset : datasets.getNames()) { std::cout << "- " << setw(20) << left << dataset << " " << right << flush; + int num = 0; for (const auto& hyperparam_line : grid.getGrid(config.model)) { + showProgressComb(++num, totalComb, Colors::CYAN()); auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line); processFile(dataset, datasets, hyperparameters); } From bbe5302ab1aceb74f063eb8a9b4c8c3310d3f674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Wed, 22 Nov 2023 16:38:50 +0100 Subject: [PATCH 6/9] Add info to output --- src/Platform/GridSearch.cc | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Platform/GridSearch.cc b/src/Platform/GridSearch.cc index 73c9ba3..1241ea8 100644 --- a/src/Platform/GridSearch.cc +++ b/src/Platform/GridSearch.cc @@ -54,19 +54,18 @@ namespace platform { for (int nfold = 0; nfold < config.n_folds; nfold++) { auto clf = Models::instance()->create(config.model); auto [train, test] = fold->getFold(nfold); - // auto train_t = torch::tensor(train); - // auto test_t = torch::tensor(test); - // auto X_train = X.index({ "...", train_t }); - // auto y_train = y.index({ train_t }); - // auto X_test = X.index({ "...", test_t }); - // auto y_test = y.index({ test_t }); + auto train_t = torch::tensor(train); + auto test_t = torch::tensor(test); + auto X_train = X.index({ "...", train_t }); + auto y_train = y.index({ train_t }); + auto X_test = X.index({ "...", test_t }); + auto y_test = y.index({ test_t }); showProgressFold(nfold + 1, getColor(clf->getStatus()), "a"); // Train model // clf->fit(X_train, y_train, features, className, states); showProgressFold(nfold + 1, getColor(clf->getStatus()), "b"); showProgressFold(nfold + 1, getColor(clf->getStatus()), "c"); - sleep(1); - std::cout << "\b\b\b, " << flush; + std::cout << "\b\b\b, \b" << flush; } delete fold; } @@ -89,7 +88,7 @@ namespace platform { auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line); processFile(dataset, datasets, hyperparameters); } - std::cout << std::endl; + std::cout << "end." << std::endl; } // Save results save(); From 8b7b59d42b0576e7169cc67dbfbb13c9979fdcd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Thu, 23 Nov 2023 12:59:21 +0100 Subject: [PATCH 7/9] Complete first step --- src/Platform/GridSearch.cc | 28 ++++++++++++++++++++++------ src/Platform/GridSearch.h | 5 ++++- src/Platform/Paths.h | 12 +++++++++++- src/Platform/b_grid.cc | 1 + 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/Platform/GridSearch.cc b/src/Platform/GridSearch.cc index 1241ea8..239cf50 100644 --- a/src/Platform/GridSearch.cc +++ b/src/Platform/GridSearch.cc @@ -36,7 +36,7 @@ namespace platform { return Colors::RESET(); } } - void GridSearch::processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters) + double GridSearch::processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters) { // Get dataset auto [X, y] = datasets.getTensors(fileName); @@ -44,6 +44,8 @@ namespace platform { auto features = datasets.getFeatures(fileName); auto samples = datasets.getNSamples(fileName); auto className = datasets.getClassName(fileName); + double totalScore = 0.0; + int numItems = 0; for (const auto& seed : config.seeds) { std::cout << "(" << seed << ") doing Fold: " << flush; Fold* fold; @@ -51,8 +53,10 @@ namespace platform { fold = new StratifiedKFold(config.n_folds, y, seed); else fold = new KFold(config.n_folds, y.size(0), seed); + double bestScore = 0.0; for (int nfold = 0; nfold < config.n_folds; nfold++) { auto clf = Models::instance()->create(config.model); + clf->setHyperparameters(hyperparameters.get(fileName)); auto [train, test] = fold->getFold(nfold); auto train_t = torch::tensor(train); auto test_t = torch::tensor(test); @@ -60,15 +64,18 @@ namespace platform { auto y_train = y.index({ train_t }); auto X_test = X.index({ "...", test_t }); auto y_test = y.index({ test_t }); - showProgressFold(nfold + 1, getColor(clf->getStatus()), "a"); // Train model - // clf->fit(X_train, y_train, features, className, states); + clf->fit(X_train, y_train, features, className, states); + showProgressFold(nfold + 1, getColor(clf->getStatus()), "a"); showProgressFold(nfold + 1, getColor(clf->getStatus()), "b"); + totalScore += clf->score(X_test, y_test); + numItems++; showProgressFold(nfold + 1, getColor(clf->getStatus()), "c"); std::cout << "\b\b\b, \b" << flush; } delete fold; } + return numItems == 0 ? 0.0 : totalScore / numItems; } void GridSearch::go() { @@ -83,12 +90,21 @@ namespace platform { for (const auto& dataset : datasets.getNames()) { std::cout << "- " << setw(20) << left << dataset << " " << right << flush; int num = 0; + double bestScore = 0.0; + json bestHyperparameters; for (const auto& hyperparam_line : grid.getGrid(config.model)) { showProgressComb(++num, totalComb, Colors::CYAN()); auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line); - processFile(dataset, datasets, hyperparameters); + double score = processFile(dataset, datasets, hyperparameters); + if (score > bestScore) { + bestScore = score; + bestHyperparameters = hyperparam_line; + } } - std::cout << "end." << std::endl; + std::cout << "end." << " Score: " << setw(9) << setprecision(7) << fixed + << bestScore << " [" << bestHyperparameters.dump() << "]" << std::endl; + results[dataset]["score"] = bestScore; + results[dataset]["hyperparameters"] = bestHyperparameters; } // Save results save(); @@ -96,7 +112,7 @@ namespace platform { void GridSearch::save() { std::ofstream file(config.output_file); - // file << results.dump(4); + file << results.dump(4); file.close(); } } /* namespace platform */ \ No newline at end of file diff --git a/src/Platform/GridSearch.h b/src/Platform/GridSearch.h index 220eccc..6bf9f1a 100644 --- a/src/Platform/GridSearch.h +++ b/src/Platform/GridSearch.h @@ -2,11 +2,13 @@ #define GRIDSEARCH_H #include #include +#include #include "Datasets.h" #include "HyperParameters.h" #include "GridData.h" namespace platform { + using json = nlohmann::json; struct ConfigGrid { std::string model; std::string score; @@ -25,7 +27,8 @@ namespace platform { void save(); ~GridSearch() = default; private: - void processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters); + double processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters); + json results; struct ConfigGrid config; GridData grid; }; diff --git a/src/Platform/Paths.h b/src/Platform/Paths.h index d3c4422..3f5d135 100644 --- a/src/Platform/Paths.h +++ b/src/Platform/Paths.h @@ -1,6 +1,7 @@ #ifndef PATHS_H #define PATHS_H #include +#include #include "DotEnv.h" namespace platform { class Paths { @@ -8,13 +9,22 @@ namespace platform { static std::string results() { return "results/"; } static std::string hiddenResults() { return "hidden_results/"; } static std::string excel() { return "excel/"; } - static std::string cfs() { return "cfs/"; } static std::string grid() { return "grid/"; } static std::string datasets() { auto env = platform::DotEnv(); return env.get("source_data"); } + static void createPath(const std::string& path) + { + // Create directory if it does not exist + try { + std::filesystem::create_directory(path); + } + catch (std::exception& e) { + throw std::runtime_error("Could not create directory " + path); + } + } static std::string excelResults() { return "some_results.xlsx"; } }; } diff --git a/src/Platform/b_grid.cc b/src/Platform/b_grid.cc index de905bf..b66050d 100644 --- a/src/Platform/b_grid.cc +++ b/src/Platform/b_grid.cc @@ -66,6 +66,7 @@ int main(int argc, char** argv) * Begin Processing */ auto env = platform::DotEnv(); + platform::Paths::createPath(platform::Paths::grid()); config.path = platform::Paths::grid(); auto grid_search = platform::GridSearch(config); platform::Timer timer; From 2121ba9b986749381057a7cfb76e52f7db0d6b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Fri, 24 Nov 2023 09:57:29 +0100 Subject: [PATCH 8/9] Refactor input grid parameters to json file --- src/Platform/GridData.cc | 40 ++++++++++++-------------------------- src/Platform/GridData.h | 8 ++++---- src/Platform/GridSearch.cc | 7 +++++-- src/Platform/GridSearch.h | 1 - 4 files changed, 21 insertions(+), 35 deletions(-) diff --git a/src/Platform/GridData.cc b/src/Platform/GridData.cc index 5935d73..0150ff3 100644 --- a/src/Platform/GridData.cc +++ b/src/Platform/GridData.cc @@ -1,31 +1,15 @@ #include "GridData.h" -#include +#include namespace platform { - GridData::GridData() + GridData::GridData(const std::string& fileName) { - auto boostaode = R"( - [ - { - "convergence": [true, false], - "ascending": [true, false], - "repeatSparent": [true, false], - "select_features": ["CFS", "FCBF"], - "tolerance": [0, 3, 5], - "threshold": [1e-7] - }, - { - "convergence": [true, false], - "ascending": [true, false], - "repeatSparent": [true, false], - "select_features": ["IWSS"], - "tolerance": [0, 3, 5], - "threshold": [0.5] - - } - ] - )"_json; - grid["BoostAODE"] = boostaode; + std::ifstream resultData(fileName); + if (resultData.is_open()) { + grid = json::parse(resultData); + } else { + throw std::invalid_argument("Unable to open input file. [" + fileName + "]"); + } } int GridData::computeNumCombinations(const json& line) { @@ -35,10 +19,10 @@ namespace platform { } return numCombinations; } - int GridData::getNumCombinations(const std::string& model) + int GridData::getNumCombinations() { int numCombinations = 0; - for (const auto& line : grid.at(model)) { + for (const auto& line : grid) { numCombinations += computeNumCombinations(line); } return numCombinations; @@ -60,10 +44,10 @@ namespace platform { } return currentCombination; } - std::vector GridData::getGrid(const std::string& model) + std::vector GridData::getGrid() { auto result = std::vector(); - for (json line : grid.at(model)) { + for (json line : grid) { generateCombinations(line.begin(), line.end(), result, json({})); } return result; diff --git a/src/Platform/GridData.h b/src/Platform/GridData.h index 87ab74c..b68a54a 100644 --- a/src/Platform/GridData.h +++ b/src/Platform/GridData.h @@ -9,14 +9,14 @@ namespace platform { using json = nlohmann::json; class GridData { public: - GridData(); + explicit GridData(const std::string& fileName); ~GridData() = default; - std::vector getGrid(const std::string& model); - int getNumCombinations(const std::string& model); + std::vector getGrid(); + int getNumCombinations(); private: json generateCombinations(json::iterator index, const json::iterator last, std::vector& output, json currentCombination); int computeNumCombinations(const json& line); - std::map grid; + json grid; }; } /* namespace platform */ #endif /* GRIDDATA_H */ \ No newline at end of file diff --git a/src/Platform/GridSearch.cc b/src/Platform/GridSearch.cc index 239cf50..d0b84ed 100644 --- a/src/Platform/GridSearch.cc +++ b/src/Platform/GridSearch.cc @@ -10,6 +10,7 @@ namespace platform { GridSearch::GridSearch(struct ConfigGrid& config) : config(config) { this->config.output_file = config.path + "grid_" + config.model + "_output.json"; + this->config.input_file = config.path + "grid_" + config.model + "_input.json"; } void showProgressComb(const int num, const int total, const std::string& color) { @@ -83,7 +84,9 @@ namespace platform { auto datasets = Datasets(config.discretize, Paths::datasets()); // Create model std::cout << "***************** Starting Gridsearch *****************" << std::endl; - auto totalComb = grid.getNumCombinations(config.model); + std::cout << "input file=" << config.input_file << std::endl; + auto grid = GridData(config.input_file); + auto totalComb = grid.getNumCombinations(); std::cout << "* Doing " << totalComb << " combinations for each dataset/seed/fold" << std::endl; // Generate hyperparameters grid & run gridsearch // Check each combination of hyperparameters for each dataset and each seed @@ -92,7 +95,7 @@ namespace platform { int num = 0; double bestScore = 0.0; json bestHyperparameters; - for (const auto& hyperparam_line : grid.getGrid(config.model)) { + for (const auto& hyperparam_line : grid.getGrid()) { showProgressComb(++num, totalComb, Colors::CYAN()); auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line); double score = processFile(dataset, datasets, hyperparameters); diff --git a/src/Platform/GridSearch.h b/src/Platform/GridSearch.h index 6bf9f1a..81f06b5 100644 --- a/src/Platform/GridSearch.h +++ b/src/Platform/GridSearch.h @@ -30,7 +30,6 @@ namespace platform { double processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters); json results; struct ConfigGrid config; - GridData grid; }; } /* namespace platform */ #endif /* GRIDSEARCH_H */ \ No newline at end of file From f94e2d6a27e54208fa3ba449b2b52e2f2b4e7933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Fri, 24 Nov 2023 21:16:20 +0100 Subject: [PATCH 9/9] Add quiet parameter --- src/Platform/GridSearch.cc | 29 +++++++++++++++++++---------- src/Platform/GridSearch.h | 3 ++- src/Platform/b_grid.cc | 2 ++ 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/Platform/GridSearch.cc b/src/Platform/GridSearch.cc index d0b84ed..fd2cbbe 100644 --- a/src/Platform/GridSearch.cc +++ b/src/Platform/GridSearch.cc @@ -48,7 +48,8 @@ namespace platform { double totalScore = 0.0; int numItems = 0; for (const auto& seed : config.seeds) { - std::cout << "(" << seed << ") doing Fold: " << flush; + if (!config.quiet) + std::cout << "(" << seed << ") doing Fold: " << flush; Fold* fold; if (config.stratified) fold = new StratifiedKFold(config.n_folds, y, seed); @@ -66,13 +67,16 @@ namespace platform { auto X_test = X.index({ "...", test_t }); auto y_test = y.index({ test_t }); // Train model + if (!config.quiet) + showProgressFold(nfold + 1, getColor(clf->getStatus()), "a"); clf->fit(X_train, y_train, features, className, states); - showProgressFold(nfold + 1, getColor(clf->getStatus()), "a"); - showProgressFold(nfold + 1, getColor(clf->getStatus()), "b"); + // Test model + if (!config.quiet) + showProgressFold(nfold + 1, getColor(clf->getStatus()), "b"); totalScore += clf->score(X_test, y_test); numItems++; - showProgressFold(nfold + 1, getColor(clf->getStatus()), "c"); - std::cout << "\b\b\b, \b" << flush; + if (!config.quiet) + std::cout << "\b\b\b, \b" << flush; } delete fold; } @@ -91,12 +95,14 @@ namespace platform { // Generate hyperparameters grid & run gridsearch // Check each combination of hyperparameters for each dataset and each seed for (const auto& dataset : datasets.getNames()) { - std::cout << "- " << setw(20) << left << dataset << " " << right << flush; + if (!config.quiet) + std::cout << "- " << setw(20) << left << dataset << " " << right << flush; int num = 0; double bestScore = 0.0; json bestHyperparameters; for (const auto& hyperparam_line : grid.getGrid()) { - showProgressComb(++num, totalComb, Colors::CYAN()); + if (!config.quiet) + showProgressComb(++num, totalComb, Colors::CYAN()); auto hyperparameters = platform::HyperParameters(datasets.getNames(), hyperparam_line); double score = processFile(dataset, datasets, hyperparameters); if (score > bestScore) { @@ -104,15 +110,18 @@ namespace platform { bestHyperparameters = hyperparam_line; } } - std::cout << "end." << " Score: " << setw(9) << setprecision(7) << fixed - << bestScore << " [" << bestHyperparameters.dump() << "]" << std::endl; + if (!config.quiet) { + std::cout << "end." << " Score: " << setw(9) << setprecision(7) << fixed + << bestScore << " [" << bestHyperparameters.dump() << "]" << std::endl; + } results[dataset]["score"] = bestScore; results[dataset]["hyperparameters"] = bestHyperparameters; } // Save results save(); + std::cout << "***************** Ending Gridsearch *******************" << std::endl; } - void GridSearch::save() + void GridSearch::save() const { std::ofstream file(config.output_file); file << results.dump(4); diff --git a/src/Platform/GridSearch.h b/src/Platform/GridSearch.h index 81f06b5..c5528d0 100644 --- a/src/Platform/GridSearch.h +++ b/src/Platform/GridSearch.h @@ -15,6 +15,7 @@ namespace platform { std::string path; std::string input_file; std::string output_file; + bool quiet; bool discretize; bool stratified; int n_folds; @@ -24,7 +25,7 @@ namespace platform { public: explicit GridSearch(struct ConfigGrid& config); void go(); - void save(); + void save() const; ~GridSearch() = default; private: double processFile(std::string fileName, Datasets& datasets, HyperParameters& hyperparameters); diff --git a/src/Platform/b_grid.cc b/src/Platform/b_grid.cc index b66050d..8887892 100644 --- a/src/Platform/b_grid.cc +++ b/src/Platform/b_grid.cc @@ -23,6 +23,7 @@ argparse::ArgumentParser manageArguments(std::string program_name) } ); program.add_argument("--discretize").help("Discretize input datasets").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); + program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); program.add_argument("--score").help("Score used in gridsearch").default_value("accuracy"); program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) { @@ -55,6 +56,7 @@ int main(int argc, char** argv) config.discretize = program.get("discretize"); config.stratified = program.get("stratified"); config.n_folds = program.get("folds"); + config.quiet = program.get("quiet"); config.seeds = program.get>("seeds"); } catch (const exception& err) {