#include #include #include #include "main/Experiment.h" #include "common/Datasets.h" #include "common/DotEnv.h" #include "common/Paths.h" #include "main/Models.h" #include "main/modelRegister.h" #include "config.h" using json = nlohmann::ordered_json; void manageArguments(argparse::ArgumentParser& program) { auto env = platform::DotEnv(); auto datasets = platform::Datasets(false, platform::Paths::datasets()); auto& group = program.add_mutually_exclusive_group(true); group.add_argument("-d", "--dataset") .help("Dataset file name: " + datasets.toString()) .default_value("all") .action([](const std::string& value) { auto datasets = platform::Datasets(false, platform::Paths::datasets()); static std::vector choices_datasets(datasets.getNames()); choices_datasets.push_back("all"); if (find(choices_datasets.begin(), choices_datasets.end(), value) != choices_datasets.end()) { return value; } throw std::runtime_error("Dataset must be one of: " + datasets.toString()); } ); group.add_argument("--datasets").nargs(1, 50).help("Datasets file names 1..50 separated by spaces").default_value(std::vector()); group.add_argument("--datasets-file").default_value("").help("Datasets file name. Mutually exclusive with dataset. This file should contain a list of datasets to test."); program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment"); program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \ "Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format."); program.add_argument("-m", "--model") .help("Model to use: " + platform::Models::instance()->toString()) .action([](const std::string& value) { static const std::vector choices = platform::Models::instance()->getNames(); if (find(choices.begin(), choices.end(), value) != choices.end()) { return value; } throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString()); } ); program.add_argument("--title").default_value("").help("Experiment title"); program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true); auto valid_choices = env.valid_tokens("discretize_algo"); auto& disc_arg = program.add_argument("--discretize-algo").help("Algorithm to use in discretization. Valid values: " + env.valid_values("discretize_algo")).default_value(env.get("discretize_algo")); for (auto choice : valid_choices) { disc_arg.choices(choice); } valid_choices = env.valid_tokens("smooth_strat"); auto& smooth_arg = program.add_argument("--smooth-strat").help("Smooth strategy used in Bayes Network node initialization. Valid values: " + env.valid_values("smooth_strat")).default_value(env.get("smooth_strat")); for (auto choice : valid_choices) { smooth_arg.choices(choice); } program.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true); program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true); program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true); program.add_argument("--save").help("Save result (always save if no dataset is supplied)").default_value(false).implicit_value(true); program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true); program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) { try { auto k = stoi(value); if (k < 2) { throw std::runtime_error("Number of folds must be greater than 1"); } return k; } catch (const runtime_error& err) { throw std::runtime_error(err.what()); } catch (...) { throw std::runtime_error("Number of folds must be an integer"); }}); auto seed_values = env.getSeeds(); program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values); } int main(int argc, char** argv) { argparse::ArgumentParser program("b_main", { platform_project_version.begin(), platform_project_version.end() }); manageArguments(program); std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat; json hyperparameters_json; bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files; std::vector seeds; std::vector file_names; std::vector filesToTest; int n_folds; try { program.parse_args(argc, argv); file_name = program.get("dataset"); file_names = program.get>("datasets"); datasets_file = program.get("datasets-file"); model_name = program.get("model"); discretize_dataset = program.get("discretize"); discretize_algo = program.get("discretize-algo"); smooth_strat = program.get("smooth-strat"); stratified = program.get("stratified"); quiet = program.get("quiet"); n_folds = program.get("folds"); seeds = program.get>("seeds"); auto hyperparameters = program.get("hyperparameters"); hyperparameters_json = json::parse(hyperparameters); hyperparameters_file = program.get("hyper-file"); no_train_score = program.get("no-train-score"); generate_fold_files = program.get("generate-fold-files"); if (hyperparameters_file != "" && hyperparameters != "{}") { throw runtime_error("hyperparameters and hyper_file are mutually exclusive"); } title = program.get("title"); if (title == "" && file_name == "all") { throw runtime_error("title is mandatory if all datasets are to be tested"); } saveResults = program.get("save"); } catch (const exception& err) { cerr << err.what() << std::endl; cerr << program; exit(1); } auto datasets = platform::Datasets(false, platform::Paths::datasets()); if (datasets_file != "") { ifstream catalog(datasets_file); if (catalog.is_open()) { std::string line; while (getline(catalog, line)) { if (line.empty() || line[0] == '#') { continue; } if (!datasets.isDataset(line)) { cerr << "Dataset " << line << " not found" << std::endl; exit(1); } filesToTest.push_back(line); } catalog.close(); saveResults = true; if (title == "") { title = "Test " + to_string(filesToTest.size()) + " datasets (" + datasets_file + ") "\ + model_name + " " + to_string(n_folds) + " folds"; } } else { throw std::invalid_argument("Unable to open catalog file. [" + datasets_file + "]"); } } else { if (file_names.size() > 0) { for (auto file : file_names) { if (!datasets.isDataset(file)) { cerr << "Dataset " << file << " not found" << std::endl; exit(1); } } filesToTest = file_names; saveResults = true; if (title == "") { title = "Test " + to_string(file_names.size()) + " datasets " + model_name + " " + to_string(n_folds) + " folds"; } } else { if (file_name != "all") { if (!datasets.isDataset(file_name)) { cerr << "Dataset " << file_name << " not found" << std::endl; exit(1); } if (title == "") { title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds"; } filesToTest.push_back(file_name); } else { filesToTest = datasets.getNames(); saveResults = true; } } } platform::HyperParameters test_hyperparams; if (hyperparameters_file != "") { test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file); } else { test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json); } /* * Begin Processing */ auto env = platform::DotEnv(); auto experiment = platform::Experiment(); experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1"); experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat); experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform")); experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy"); experiment.setHyperparameters(test_hyperparams); for (auto seed : seeds) { experiment.addRandomSeed(seed); } platform::Timer timer; timer.start(); experiment.go(filesToTest, quiet, no_train_score, generate_fold_files); experiment.setDuration(timer.getDuration()); if (!quiet) { // Classification report if only one dataset is tested experiment.report(filesToTest.size() == 1); } if (saveResults) { experiment.saveResult(); } std::cout << "Done!" << std::endl; return 0; }