diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f9908eb..5c8a536 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,7 +33,7 @@ set(grid_sources GridSearch.cpp GridData.cpp GridExperiment.cpp GridBase.cpp ) list(TRANSFORM grid_sources PREPEND grid/) add_executable(b_grid commands/b_grid.cpp ${grid_sources} common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp - main/HyperParameters.cpp main/Models.cpp main/Experiment.cpp main/Scores.cpp + main/HyperParameters.cpp main/Models.cpp main/Experiment.cpp main/Scores.cpp main/ArgumentsExperiment.cpp reports/ReportConsole.cpp reports/ReportBase.cpp results/Result.cpp ) @@ -49,7 +49,7 @@ add_executable(b_list commands/b_list.cpp target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}") # b_main -set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp) +set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp ArgumentsExperiment.cpp) list(TRANSFORM main_sources PREPEND main/) add_executable(b_main commands/b_main.cpp ${main_sources} common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp diff --git a/src/commands/b_grid.cpp b/src/commands/b_grid.cpp index 7653319..29f89eb 100644 --- a/src/commands/b_grid.cpp +++ b/src/commands/b_grid.cpp @@ -6,6 +6,7 @@ #include #include "main/Models.h" #include "main/modelRegister.h" +#include "main/ArgumentsExperiment.h" #include "common/Paths.h" #include "common/Timer.h" #include "common/Colors.h" @@ -32,76 +33,7 @@ void assignModel(argparse::ArgumentParser& parser) } ); } -void add_experiment_args(argparse::ArgumentParser& program) -{ - auto env = platform::DotEnv(); - auto datasets = platform::Datasets(false, platform::Paths::datasets()); - auto& group = program.add_mutually_exclusive_group(true); - group.add_argument("-d", "--dataset") - .help("Dataset file name: " + datasets.toString()) - .default_value("all") - .action([](const std::string& value) { - auto datasets = platform::Datasets(false, platform::Paths::datasets()); - static std::vector choices_datasets(datasets.getNames()); - choices_datasets.push_back("all"); - if (find(choices_datasets.begin(), choices_datasets.end(), value) != choices_datasets.end()) { - return value; - } - throw std::runtime_error("Dataset must be one of: " + datasets.toString()); - } - ); - group.add_argument("--datasets").nargs(1, 50).help("Datasets file names 1..50 separated by spaces").default_value(std::vector()); - group.add_argument("--datasets-file").default_value("").help("Datasets file name. Mutually exclusive with dataset. This file should contain a list of datasets to test."); - program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment"); - program.add_argument("--save").help("Save result (always save even if a dataset is supplied)").default_value(false).implicit_value(true); - program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \ - "Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format."); - program.add_argument("--hyper-best").default_value(false).help("Use best results of the model as source of hyperparameters").implicit_value(true); - program.add_argument("-m", "--model") - .help("Model to use: " + platform::Models::instance()->toString()) - .action([](const std::string& value) { - static const std::vector choices = platform::Models::instance()->getNames(); - if (find(choices.begin(), choices.end(), value) != choices.end()) { - return value; - } - throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString()); - } - ); - program.add_argument("--title").default_value("").help("Experiment title"); - program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); - auto valid_choices = env.valid_tokens("discretize_algo"); - auto& disc_arg = program.add_argument("--discretize-algo").help("Algorithm to use in discretization. Valid values: " + env.valid_values("discretize_algo")).default_value(env.get("discretize_algo")); - for (auto choice : valid_choices) { - disc_arg.choices(choice); - } - valid_choices = env.valid_tokens("smooth_strat"); - auto& smooth_arg = program.add_argument("--smooth-strat").help("Smooth strategy used in Bayes Network node initialization. Valid values: " + env.valid_values("smooth_strat")).default_value(env.get("smooth_strat")); - for (auto choice : valid_choices) { - smooth_arg.choices(choice); - } - auto& score_arg = program.add_argument("-s", "--score").help("Score to use. Valid values: " + env.valid_values("score")).default_value(env.get("score")); - valid_choices = env.valid_tokens("score"); - for (auto choice : valid_choices) { - score_arg.choices(choice); - } - program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); - program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) { - try { - auto k = stoi(value); - if (k < 2) { - throw std::runtime_error("Number of folds must be greater than 1"); - } - return k; - } - catch (const runtime_error& err) { - throw std::runtime_error(err.what()); - } - catch (...) { - throw std::runtime_error("Number of folds must be an integer"); - }}); - auto seed_values = env.getSeeds(); - program.add_argument("--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); -} + void add_search_args(argparse::ArgumentParser& program) { auto env = platform::DotEnv(); @@ -276,9 +208,6 @@ void search(argparse::ArgumentParser& program) } auto excluded = program.get("exclude"); config.excluded = json::parse(excluded); - - auto env = platform::DotEnv(); - config.platform = env.get("platform"); platform::Paths::createPath(platform::Paths::grid()); auto grid_search = platform::GridSearch(config); platform::Timer timer; @@ -303,10 +232,9 @@ void search(argparse::ArgumentParser& program) void experiment(argparse::ArgumentParser& program) { struct platform::ConfigGrid config; - - auto env = platform::DotEnv(); - config.platform = env.get("platform"); - auto grid_experiment = platform::GridExperiment(program, config); + auto arguments = platform::ArgumentsExperiment(program, platform::experiment_t::GRID); + arguments.parse(); + auto grid_experiment = platform::GridExperiment(arguments, config); platform::Timer timer; timer.start(); struct platform::ConfigMPI mpi_config; @@ -326,7 +254,7 @@ void experiment(argparse::ArgumentParser& program) if (grid_experiment.haveToSaveResults()) { experiment.saveResult(); } - experiment.report(grid_experiment.numFiles() == 1); + experiment.report(); std::cout << "Process took " << duration << std::endl; } MPI_Finalize(); @@ -356,9 +284,7 @@ int main(int argc, char** argv) // grid experiment subparser argparse::ArgumentParser experiment_command("experiment"); experiment_command.add_description("Experiment like b_main using mpi."); - assignModel(experiment_command); - add_experiment_args(experiment_command); - + auto arguments = platform::ArgumentsExperiment(experiment_command, platform::experiment_t::GRID); program.add_subparser(dump_command); program.add_subparser(report_command); program.add_subparser(search_command); diff --git a/src/commands/b_main.cpp b/src/commands/b_main.cpp index 4fc38f2..98dca58 100644 --- a/src/commands/b_main.cpp +++ b/src/commands/b_main.cpp @@ -1,234 +1,35 @@ -#include #include -#include #include "main/Experiment.h" -#include "common/Datasets.h" -#include "common/DotEnv.h" -#include "common/Paths.h" -#include "main/Models.h" -#include "main/modelRegister.h" +#include "main/ArgumentsExperiment.h" #include "config_platform.h" using json = nlohmann::ordered_json; -void manageArguments(argparse::ArgumentParser& program) -{ - auto env = platform::DotEnv(); - auto datasets = platform::Datasets(false, platform::Paths::datasets()); - auto& group = program.add_mutually_exclusive_group(true); - group.add_argument("-d", "--dataset") - .help("Dataset file name: " + datasets.toString()) - .default_value("all") - .action([](const std::string& value) { - auto datasets = platform::Datasets(false, platform::Paths::datasets()); - static std::vector choices_datasets(datasets.getNames()); - choices_datasets.push_back("all"); - if (find(choices_datasets.begin(), choices_datasets.end(), value) != choices_datasets.end()) { - return value; - } - throw std::runtime_error("Dataset must be one of: " + datasets.toString()); - } - ); - group.add_argument("--datasets").nargs(1, 50).help("Datasets file names 1..50 separated by spaces").default_value(std::vector()); - group.add_argument("--datasets-file").default_value("").help("Datasets file name. Mutually exclusive with dataset. This file should contain a list of datasets to test."); - program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment"); - program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \ - "Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format."); - program.add_argument("--hyper-best").default_value(false).help("Use best results of the model as source of hyperparameters").implicit_value(true); - program.add_argument("-m", "--model") - .help("Model to use: " + platform::Models::instance()->toString()) - .action([](const std::string& value) { - static const std::vector choices = platform::Models::instance()->getNames(); - if (find(choices.begin(), choices.end(), value) != choices.end()) { - return value; - } - throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString()); - } - ); - program.add_argument("--title").default_value("").help("Experiment title"); - program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); - auto valid_choices = env.valid_tokens("discretize_algo"); - auto& disc_arg = program.add_argument("--discretize-algo").help("Algorithm to use in discretization. Valid values: " + env.valid_values("discretize_algo")).default_value(env.get("discretize_algo")); - for (auto choice : valid_choices) { - disc_arg.choices(choice); - } - valid_choices = env.valid_tokens("smooth_strat"); - auto& smooth_arg = program.add_argument("--smooth-strat").help("Smooth strategy used in Bayes Network node initialization. Valid values: " + env.valid_values("smooth_strat")).default_value(env.get("smooth_strat")); - for (auto choice : valid_choices) { - smooth_arg.choices(choice); - } - auto& score_arg = program.add_argument("-s", "--score").help("Score to use. Valid values: " + env.valid_values("score")).default_value(env.get("score")); - valid_choices = env.valid_tokens("score"); - for (auto choice : valid_choices) { - score_arg.choices(choice); - } - program.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true); - program.add_argument("--graph").help("generate graphviz dot files with the model").default_value(false).implicit_value(true); - program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true); - program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); - program.add_argument("--save").help("Save result (always save even if a dataset is supplied)").default_value(false).implicit_value(true); - program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); - program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) { - try { - auto k = stoi(value); - if (k < 2) { - throw std::runtime_error("Number of folds must be greater than 1"); - } - return k; - } - catch (const runtime_error& err) { - throw std::runtime_error(err.what()); - } - catch (...) { - throw std::runtime_error("Number of folds must be an integer"); - }}); - auto seed_values = env.getSeeds(); - program.add_argument("--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); -} - int main(int argc, char** argv) { argparse::ArgumentParser program("b_main", { platform_project_version.begin(), platform_project_version.end() }); - manageArguments(program); - std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat, score; - json hyperparameters_json; - bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files, graph, hyper_best; - std::vector seeds; - std::vector file_names; - std::vector filesToTest; - int n_folds; - try { - program.parse_args(argc, argv); - file_name = program.get("dataset"); - file_names = program.get>("datasets"); - datasets_file = program.get("datasets-file"); - model_name = program.get("model"); - discretize_dataset = program.get("discretize"); - discretize_algo = program.get("discretize-algo"); - smooth_strat = program.get("smooth-strat"); - stratified = program.get("stratified"); - quiet = program.get("quiet"); - graph = program.get("graph"); - n_folds = program.get("folds"); - score = program.get("score"); - seeds = program.get>("seeds"); - auto hyperparameters = program.get("hyperparameters"); - hyperparameters_json = json::parse(hyperparameters); - hyperparameters_file = program.get("hyper-file"); - no_train_score = program.get("no-train-score"); - hyper_best = program.get("hyper-best"); - generate_fold_files = program.get("generate-fold-files"); - if (hyper_best) { - // Build the best results file_name - hyperparameters_file = platform::Paths::results() + platform::Paths::bestResultsFile(score, model_name); - // ignore this parameter - hyperparameters = "{}"; - } else { - if (hyperparameters_file != "" && hyperparameters != "{}") { - throw runtime_error("hyperparameters and hyper_file are mutually exclusive"); - } - } - title = program.get("title"); - if (title == "" && file_name == "all") { - throw runtime_error("title is mandatory if all datasets are to be tested"); - } - saveResults = program.get("save"); - } - catch (const exception& err) { - cerr << err.what() << std::endl; - cerr << program; - exit(1); - } - auto datasets = platform::Datasets(false, platform::Paths::datasets()); - if (datasets_file != "") { - ifstream catalog(datasets_file); - if (catalog.is_open()) { - std::string line; - while (getline(catalog, line)) { - if (line.empty() || line[0] == '#') { - continue; - } - if (!datasets.isDataset(line)) { - cerr << "Dataset " << line << " not found" << std::endl; - exit(1); - } - filesToTest.push_back(line); - } - catalog.close(); - saveResults = true; - if (title == "") { - title = "Test " + to_string(filesToTest.size()) + " datasets (" + datasets_file + ") "\ - + model_name + " " + to_string(n_folds) + " folds"; - } - } else { - throw std::invalid_argument("Unable to open catalog file. [" + datasets_file + "]"); - } - } else { - if (file_names.size() > 0) { - for (auto file : file_names) { - if (!datasets.isDataset(file)) { - cerr << "Dataset " << file << " not found" << std::endl; - exit(1); - } - } - filesToTest = file_names; - saveResults = true; - if (title == "") { - title = "Test " + to_string(file_names.size()) + " datasets " + model_name + " " + to_string(n_folds) + " folds"; - } - } else { - if (file_name != "all") { - if (!datasets.isDataset(file_name)) { - cerr << "Dataset " << file_name << " not found" << std::endl; - exit(1); - } - if (title == "") { - title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds"; - } - filesToTest.push_back(file_name); - } else { - filesToTest = datasets.getNames(); - saveResults = true; - } - } - } - - platform::HyperParameters test_hyperparams; - if (hyperparameters_file != "") { - test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file, hyper_best); - } else { - test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json); - } - + auto arguments = platform::ArgumentsExperiment(program, platform::experiment_t::NORMAL); + arguments.parse_args(argc, argv); /* * Begin Processing */ - auto env = platform::DotEnv(); - auto experiment = platform::Experiment(); - experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1"); - experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat); - experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); - experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score); - experiment.setHyperparameters(test_hyperparams); - for (auto seed : seeds) { - experiment.addRandomSeed(seed); - } + // Initialize the experiment class with the command line arguments + auto experiment = arguments.initializedExperiment(); platform::Timer timer; timer.start(); - experiment.go(filesToTest, quiet, no_train_score, generate_fold_files, graph); + experiment.go(); experiment.setDuration(timer.getDuration()); - if (!quiet) { + if (!arguments.isQuiet()) { // Classification report if only one dataset is tested - experiment.report(filesToTest.size() == 1); + experiment.report(); } - if (saveResults) { + if (arguments.haveToSaveResults()) { experiment.saveResult(); } - if (graph) { + if (arguments.doGraph()) { experiment.saveGraph(); } - std::cout << "Done!" << std::endl; return 0; } diff --git a/src/grid/GridBase.cpp b/src/grid/GridBase.cpp index 70ef323..3626db7 100644 --- a/src/grid/GridBase.cpp +++ b/src/grid/GridBase.cpp @@ -2,6 +2,7 @@ #include #include "common/DotEnv.h" #include "common/Paths.h" +#include "common/DotEnv.h" #include "GridBase.h" namespace platform { @@ -9,6 +10,8 @@ namespace platform { GridBase::GridBase(struct ConfigGrid& config) { this->config = config; + auto env = platform::DotEnv(); + this->config.platform = env.get("platform"); } void GridBase::validate_config() diff --git a/src/grid/GridExperiment.cpp b/src/grid/GridExperiment.cpp index 67ce91b..62e9699 100644 --- a/src/grid/GridExperiment.cpp +++ b/src/grid/GridExperiment.cpp @@ -8,120 +8,18 @@ #include "GridExperiment.h" namespace platform { - GridExperiment::GridExperiment(argparse::ArgumentParser& program, struct ConfigGrid& config) : arguments(program), GridBase(config) + // GridExperiment::GridExperiment(argparse::ArgumentParser& program, struct ConfigGrid& config) : arguments(program), GridBase(config) + GridExperiment::GridExperiment(ArgumentsExperiment& program, struct ConfigGrid& config) : arguments(program), GridBase(config) { - std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat, score; - json hyperparameters_json; - bool discretize_dataset, stratified, hyper_best; - std::vector seeds; - std::vector file_names; - int n_folds; - file_name = program.get("dataset"); - file_names = program.get>("datasets"); - datasets_file = program.get("datasets-file"); - model_name = program.get("model"); - discretize_dataset = program.get("discretize"); - saveResults = program.get("save"); - discretize_algo = program.get("discretize-algo"); - smooth_strat = program.get("smooth-strat"); - stratified = program.get("stratified"); - n_folds = program.get("folds"); - score = program.get("score"); - seeds = program.get>("seeds"); - auto hyperparameters = program.get("hyperparameters"); - hyperparameters_json = json::parse(hyperparameters); - hyperparameters_file = program.get("hyper-file"); - hyper_best = program.get("hyper-best"); - if (hyper_best) { - // Build the best results file_name - hyperparameters_file = platform::Paths::results() + platform::Paths::bestResultsFile(score, model_name); - // ignore this parameter - hyperparameters = "{}"; - } else { - if (hyperparameters_file != "" && hyperparameters != "{}") { - throw runtime_error("hyperparameters and hyper_file are mutually exclusive"); - } - } - title = program.get("title"); - if (title == "" && file_name == "all") { - throw runtime_error("title is mandatory if all datasets are to be tested"); - } - auto datasets = platform::Datasets(false, platform::Paths::datasets()); - if (datasets_file != "") { - ifstream catalog(datasets_file); - if (catalog.is_open()) { - std::string line; - while (getline(catalog, line)) { - if (line.empty() || line[0] == '#') { - continue; - } - if (!datasets.isDataset(line)) { - cerr << "Dataset " << line << " not found" << std::endl; - exit(1); - } - filesToTest.push_back(line); - } - catalog.close(); - saveResults = true; - if (title == "") { - title = "Test " + to_string(filesToTest.size()) + " datasets (" + datasets_file + ") "\ - + model_name + " " + to_string(n_folds) + " folds"; - } - } else { - throw std::invalid_argument("Unable to open catalog file. [" + datasets_file + "]"); - } - } else { - if (file_names.size() > 0) { - for (auto file : file_names) { - if (!datasets.isDataset(file)) { - cerr << "Dataset " << file << " not found" << std::endl; - exit(1); - } - } - filesToTest = file_names; - saveResults = true; - if (title == "") { - title = "Test " + to_string(file_names.size()) + " datasets " + model_name + " " + to_string(n_folds) + " folds"; - } - } else { - if (file_name != "all") { - if (!datasets.isDataset(file_name)) { - cerr << "Dataset " << file_name << " not found" << std::endl; - exit(1); - } - if (title == "") { - title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds"; - } - filesToTest.push_back(file_name); - } else { - filesToTest = datasets.getNames(); - saveResults = true; - } - } - } - platform::HyperParameters test_hyperparams; - if (hyperparameters_file != "") { - test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file, hyper_best); - } else { - test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json); - } - this->config.model = model_name; - this->config.score = score; - this->config.discretize = discretize_dataset; - this->config.stratified = stratified; - this->config.smooth_strategy = smooth_strat; - this->config.n_folds = n_folds; - this->config.seeds = seeds; - this->config.quiet = false; - auto env = platform::DotEnv(); - experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1"); - experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat); - experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); - experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score); - experiment.setHyperparameters(test_hyperparams); - for (auto seed : seeds) { - experiment.addRandomSeed(seed); - } + experiment = arguments.initializedExperiment(); + this->config.model = experiment.getModel(); + this->config.score = experiment.getScore(); + this->config.discretize = experiment.isDiscretized(); + this->config.stratified = experiment.isStratified(); + this->config.smooth_strategy = experiment.getSmoothStrategy(); + this->config.n_folds = experiment.getNFolds(); + this->config.seeds = experiment.getRandomSeeds(); + this->config.quiet = experiment.isQuiet(); } json GridExperiment::getResults() { diff --git a/src/grid/GridExperiment.h b/src/grid/GridExperiment.h index dddb8e9..8bcb27c 100644 --- a/src/grid/GridExperiment.h +++ b/src/grid/GridExperiment.h @@ -9,6 +9,7 @@ #include "common/DotEnv.h" #include "main/Experiment.h" #include "main/HyperParameters.h" +#include "main/ArgumentsExperiment.h" #include "GridData.h" #include "GridBase.h" #include "bayesnet/network/Network.h" @@ -18,14 +19,14 @@ namespace platform { using json = nlohmann::ordered_json; class GridExperiment : public GridBase { public: - explicit GridExperiment(argparse::ArgumentParser& program, struct ConfigGrid& config); + explicit GridExperiment(ArgumentsExperiment& program, struct ConfigGrid& config); ~GridExperiment() = default; json getResults(); Experiment& getExperiment() { return experiment; } size_t numFiles() const { return filesToTest.size(); } bool haveToSaveResults() const { return saveResults; } private: - argparse::ArgumentParser& arguments; + ArgumentsExperiment& arguments; Experiment experiment; json computed_results; bool saveResults; diff --git a/src/main/ArgumentsExperiment.cpp b/src/main/ArgumentsExperiment.cpp new file mode 100644 index 0000000..f77b23a --- /dev/null +++ b/src/main/ArgumentsExperiment.cpp @@ -0,0 +1,224 @@ +#include "common/Datasets.h" +#include "common/DotEnv.h" +#include "common/Paths.h" +#include "main/Models.h" +#include "main/modelRegister.h" +#include "ArgumentsExperiment.h" +namespace platform { + ArgumentsExperiment::ArgumentsExperiment(argparse::ArgumentParser& program, experiment_t type) : arguments{ program }, type{ type } + { + auto env = platform::DotEnv(); + auto datasets = platform::Datasets(false, platform::Paths::datasets()); + auto& group = arguments.add_mutually_exclusive_group(true); + group.add_argument("-d", "--dataset") + .help("Dataset file name: " + datasets.toString()) + .default_value("all") + .action([](const std::string& value) { + auto datasets = platform::Datasets(false, platform::Paths::datasets()); + static std::vector choices_datasets(datasets.getNames()); + choices_datasets.push_back("all"); + if (find(choices_datasets.begin(), choices_datasets.end(), value) != choices_datasets.end()) { + return value; + } + throw std::runtime_error("Dataset must be one of: " + datasets.toString()); + } + ); + group.add_argument("--datasets").nargs(1, 50).help("Datasets file names 1..50 separated by spaces").default_value(std::vector()); + group.add_argument("--datasets-file").default_value("").help("Datasets file name. Mutually exclusive with dataset. This file should contain a list of datasets to test."); + arguments.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment"); + arguments.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \ + "Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format."); + arguments.add_argument("--hyper-best").default_value(false).help("Use best results of the model as source of hyperparameters").implicit_value(true); + arguments.add_argument("-m", "--model") + .help("Model to use: " + platform::Models::instance()->toString()) + .action([](const std::string& value) { + static const std::vector choices = platform::Models::instance()->getNames(); + if (find(choices.begin(), choices.end(), value) != choices.end()) { + return value; + } + throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString()); + } + ); + arguments.add_argument("--title").default_value("").help("Experiment title"); + arguments.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); + auto valid_choices = env.valid_tokens("discretize_algo"); + auto& disc_arg = arguments.add_argument("--discretize-algo").help("Algorithm to use in discretization. Valid values: " + env.valid_values("discretize_algo")).default_value(env.get("discretize_algo")); + for (auto choice : valid_choices) { + disc_arg.choices(choice); + } + valid_choices = env.valid_tokens("smooth_strat"); + auto& smooth_arg = arguments.add_argument("--smooth-strat").help("Smooth strategy used in Bayes Network node initialization. Valid values: " + env.valid_values("smooth_strat")).default_value(env.get("smooth_strat")); + for (auto choice : valid_choices) { + smooth_arg.choices(choice); + } + auto& score_arg = arguments.add_argument("-s", "--score").help("Score to use. Valid values: " + env.valid_values("score")).default_value(env.get("score")); + valid_choices = env.valid_tokens("score"); + for (auto choice : valid_choices) { + score_arg.choices(choice); + } + arguments.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true); + arguments.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); + arguments.add_argument("--save").help("Save result (always save even if a dataset is supplied)").default_value(false).implicit_value(true); + arguments.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); + arguments.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) { + try { + auto k = stoi(value); + if (k < 2) { + throw std::runtime_error("Number of folds must be greater than 1"); + } + return k; + } + catch (const runtime_error& err) { + throw std::runtime_error(err.what()); + } + catch (...) { + throw std::runtime_error("Number of folds must be an integer"); + }}); + auto seed_values = env.getSeeds(); + arguments.add_argument("--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); + if (type == experiment_t::NORMAL) { + arguments.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true); + arguments.add_argument("--graph").help("generate graphviz dot files with the model").default_value(false).implicit_value(true); + } + } + void ArgumentsExperiment::parse_args(int argc, char** argv) + { + try { + arguments.parse_args(argc, argv); + } + catch (const exception& err) { + cerr << err.what() << std::endl; + cerr << arguments; + exit(1); + } + parse(); + } + + void ArgumentsExperiment::parse() + { + try { + file_name = arguments.get("dataset"); + file_names = arguments.get>("datasets"); + datasets_file = arguments.get("datasets-file"); + model_name = arguments.get("model"); + discretize_dataset = arguments.get("discretize"); + discretize_algo = arguments.get("discretize-algo"); + smooth_strat = arguments.get("smooth-strat"); + stratified = arguments.get("stratified"); + quiet = arguments.get("quiet"); + + n_folds = arguments.get("folds"); + score = arguments.get("score"); + seeds = arguments.get>("seeds"); + auto hyperparameters = arguments.get("hyperparameters"); + hyperparameters_json = json::parse(hyperparameters); + hyperparameters_file = arguments.get("hyper-file"); + no_train_score = arguments.get("no-train-score"); + hyper_best = arguments.get("hyper-best"); + if (hyper_best) { + // Build the best results file_name + hyperparameters_file = platform::Paths::results() + platform::Paths::bestResultsFile(score, model_name); + // ignore this parameter + hyperparameters = "{}"; + } else { + if (hyperparameters_file != "" && hyperparameters != "{}") { + throw runtime_error("hyperparameters and hyper_file are mutually exclusive"); + } + } + title = arguments.get("title"); + if (title == "" && file_name == "all") { + throw runtime_error("title is mandatory if all datasets are to be tested"); + } + saveResults = arguments.get("save"); + if (type == experiment_t::NORMAL) { + graph = arguments.get("graph"); + generate_fold_files = arguments.get("generate-fold-files"); + } else { + graph = false; + generate_fold_files = false; + } + } + catch (const exception& err) { + cerr << err.what() << std::endl; + cerr << arguments; + exit(1); + } + auto datasets = platform::Datasets(false, platform::Paths::datasets()); + if (datasets_file != "") { + ifstream catalog(datasets_file); + if (catalog.is_open()) { + std::string line; + while (getline(catalog, line)) { + if (line.empty() || line[0] == '#') { + continue; + } + if (!datasets.isDataset(line)) { + cerr << "Dataset " << line << " not found" << std::endl; + exit(1); + } + filesToTest.push_back(line); + } + catalog.close(); + saveResults = true; + if (title == "") { + title = "Test " + to_string(filesToTest.size()) + " datasets (" + datasets_file + ") "\ + + model_name + " " + to_string(n_folds) + " folds"; + } + } else { + throw std::invalid_argument("Unable to open catalog file. [" + datasets_file + "]"); + } + } else { + if (file_names.size() > 0) { + for (auto file : file_names) { + if (!datasets.isDataset(file)) { + cerr << "Dataset " << file << " not found" << std::endl; + exit(1); + } + } + filesToTest = file_names; + saveResults = true; + if (title == "") { + title = "Test " + to_string(file_names.size()) + " datasets " + model_name + " " + to_string(n_folds) + " folds"; + } + } else { + if (file_name != "all") { + if (!datasets.isDataset(file_name)) { + cerr << "Dataset " << file_name << " not found" << std::endl; + exit(1); + } + if (title == "") { + title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds"; + } + filesToTest.push_back(file_name); + } else { + filesToTest = datasets.getNames(); + saveResults = true; + } + } + } + + if (hyperparameters_file != "") { + test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file, hyper_best); + } else { + test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json); + } + } + Experiment& ArgumentsExperiment::initializedExperiment() + { + auto env = platform::DotEnv(); + experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1"); + experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat); + experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); + experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score); + experiment.setHyperparameters(test_hyperparams); + for (auto seed : seeds) { + experiment.addRandomSeed(seed); + } + experiment.setFilesToTest(filesToTest); + experiment.setQuiet(quiet); + experiment.setNoTrainScore(no_train_score); + experiment.setGenerateFoldFiles(generate_fold_files); + experiment.setGraph(graph); + return experiment; + } +} \ No newline at end of file diff --git a/src/main/ArgumentsExperiment.h b/src/main/ArgumentsExperiment.h new file mode 100644 index 0000000..06cd08e --- /dev/null +++ b/src/main/ArgumentsExperiment.h @@ -0,0 +1,38 @@ +#ifndef ARGUMENTSEXPERIMENT_H +#define ARGUMENTSEXPERIMENT_H +#include +#include +#include +#include +#include +#include "Experiment.h" + +namespace platform { + using json = nlohmann::ordered_json; + enum class experiment_t { NORMAL, GRID }; + class ArgumentsExperiment { + public: + ArgumentsExperiment(argparse::ArgumentParser& program, experiment_t type); + ~ArgumentsExperiment() = default; + std::vector getFilesToTest() const { return filesToTest; } + void parse_args(int argc, char** argv); + void parse(); + Experiment& initializedExperiment(); + bool isQuiet() const { return quiet; } + bool haveToSaveResults() const { return saveResults; } + bool doGraph() const { return graph; } + private: + Experiment experiment; + experiment_t type; + argparse::ArgumentParser& arguments; + std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat, score; + json hyperparameters_json; + bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files, graph, hyper_best; + std::vector seeds; + std::vector file_names; + std::vector filesToTest; + platform::HyperParameters test_hyperparams; + int n_folds; + }; +} +#endif \ No newline at end of file diff --git a/src/main/Experiment.cpp b/src/main/Experiment.cpp index f33a99f..0208175 100644 --- a/src/main/Experiment.cpp +++ b/src/main/Experiment.cpp @@ -14,11 +14,11 @@ namespace platform { result.save(); std::cout << "Result saved in " << Paths::results() << result.getFilename() << std::endl; } - void Experiment::report(bool classification_report) + void Experiment::report() { ReportConsole report(result.getJson()); report.show(); - if (classification_report) { + if (filesToTest.size() == 1) { std::cout << report.showClassificationReport(Colors::BLUE()); } } @@ -43,9 +43,9 @@ namespace platform { } } } - void Experiment::go(std::vector filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph) + void Experiment::go() { - for (auto fileName : filesToProcess) { + for (auto fileName : filesToTest) { if (fileName.size() > max_name) max_name = fileName.size(); } @@ -64,10 +64,10 @@ namespace platform { std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << " ----------" << Colors::RESET() << std::endl; } int num = 0; - for (auto fileName : filesToProcess) { + for (auto fileName : filesToTest) { if (!quiet) std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush; - cross_validation(fileName, quiet, no_train_score, generate_fold_files, graph); + cross_validation(fileName); if (!quiet) std::cout << std::endl; } @@ -139,7 +139,7 @@ namespace platform { file << output.dump(4); file.close(); } - void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph) + void Experiment::cross_validation(const std::string& fileName) { // // Load dataset and prepare data diff --git a/src/main/Experiment.h b/src/main/Experiment.h index 838d80d..0553833 100644 --- a/src/main/Experiment.h +++ b/src/main/Experiment.h @@ -20,7 +20,6 @@ namespace platform { Experiment& setTitle(const std::string& title) { this->result.setTitle(title); return *this; } Experiment& setModelVersion(const std::string& model_version) { this->result.setModelVersion(model_version); return *this; } Experiment& setModel(const std::string& model) { this->result.setModel(model); return *this; } - std::string getModel() const { return result.getModel(); } Experiment& setLanguage(const std::string& language) { this->result.setLanguage(language); return *this; } Experiment& setDiscretizationAlgorithm(const std::string& discretization_algo) { @@ -28,7 +27,8 @@ namespace platform { } Experiment& setSmoothSrategy(const std::string& smooth_strategy) { - this->smooth_strategy = smooth_strategy; this->result.setSmoothStrategy(smooth_strategy); + this->smooth_strategy = smooth_strategy; + this->result.setSmoothStrategy(smooth_strategy); if (smooth_strategy == "ORIGINAL") smooth_type = bayesnet::Smoothing_t::ORIGINAL; else if (smooth_strategy == "LAPLACE") @@ -50,18 +50,32 @@ namespace platform { Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; } Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; } HyperParameters& getHyperParameters() { return hyperparameters; } - void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph); - void go(std::vector filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph); + std::string getModel() const { return result.getModel(); } + std::string getScore() const { return result.getScoreName(); } + bool isDiscretized() const { return discretized; } + bool isStratified() const { return stratified; } + bool isQuiet() const { return quiet; } + std::string getSmoothStrategy() const { return smooth_strategy; } + int getNFolds() const { return nfolds; } + std::vector getRandomSeeds() const { return randomSeeds; } + void cross_validation(const std::string& fileName); + void go(); void saveResult(); void show(); void saveGraph(); - void report(bool classification_report = false); + void report(); + void setFilesToTest(const std::vector& filesToTest) { this->filesToTest = filesToTest; } + void setQuiet(bool quiet) { this->quiet = quiet; } + void setNoTrainScore(bool no_train_score) { this->no_train_score = no_train_score; } + void setGenerateFoldFiles(bool generate_fold_files) { this->generate_fold_files = generate_fold_files; } + void setGraph(bool graph) { this->graph = graph; } private: score_t parse_score() const; Result result; - bool discretized{ false }, stratified{ false }; + bool discretized{ false }, stratified{ false }, generate_fold_files{ false }, graph{ false }, quiet{ false }, no_train_score{ false }; std::vector results; std::vector randomSeeds; + std::vector filesToTest; std::string discretization_algo; std::string smooth_strategy; bayesnet::Smoothing_t smooth_type{ bayesnet::Smoothing_t::NONE };