Refactor arguments management for Experimentation

This commit is contained in:
2025-01-18 18:26:34 +01:00
parent 7aaf6d1bf8
commit 3397d0962f
10 changed files with 325 additions and 420 deletions

View File

@@ -33,7 +33,7 @@ set(grid_sources GridSearch.cpp GridData.cpp GridExperiment.cpp GridBase.cpp )
list(TRANSFORM grid_sources PREPEND grid/)
add_executable(b_grid commands/b_grid.cpp ${grid_sources}
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
main/HyperParameters.cpp main/Models.cpp main/Experiment.cpp main/Scores.cpp
main/HyperParameters.cpp main/Models.cpp main/Experiment.cpp main/Scores.cpp main/ArgumentsExperiment.cpp
reports/ReportConsole.cpp reports/ReportBase.cpp
results/Result.cpp
)
@@ -49,7 +49,7 @@ add_executable(b_list commands/b_list.cpp
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
# b_main
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp ArgumentsExperiment.cpp)
list(TRANSFORM main_sources PREPEND main/)
add_executable(b_main commands/b_main.cpp ${main_sources}
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp

View File

@@ -6,6 +6,7 @@
#include <mpi.h>
#include "main/Models.h"
#include "main/modelRegister.h"
#include "main/ArgumentsExperiment.h"
#include "common/Paths.h"
#include "common/Timer.h"
#include "common/Colors.h"
@@ -32,76 +33,7 @@ void assignModel(argparse::ArgumentParser& parser)
}
);
}
void add_experiment_args(argparse::ArgumentParser& program)
{
auto env = platform::DotEnv();
auto datasets = platform::Datasets(false, platform::Paths::datasets());
auto& group = program.add_mutually_exclusive_group(true);
group.add_argument("-d", "--dataset")
.help("Dataset file name: " + datasets.toString())
.default_value("all")
.action([](const std::string& value) {
auto datasets = platform::Datasets(false, platform::Paths::datasets());
static std::vector<std::string> choices_datasets(datasets.getNames());
choices_datasets.push_back("all");
if (find(choices_datasets.begin(), choices_datasets.end(), value) != choices_datasets.end()) {
return value;
}
throw std::runtime_error("Dataset must be one of: " + datasets.toString());
}
);
group.add_argument("--datasets").nargs(1, 50).help("Datasets file names 1..50 separated by spaces").default_value(std::vector<std::string>());
group.add_argument("--datasets-file").default_value("").help("Datasets file name. Mutually exclusive with dataset. This file should contain a list of datasets to test.");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
program.add_argument("--save").help("Save result (always save even if a dataset is supplied)").default_value(false).implicit_value(true);
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
program.add_argument("--hyper-best").default_value(false).help("Use best results of the model as source of hyperparameters").implicit_value(true);
program.add_argument("-m", "--model")
.help("Model to use: " + platform::Models::instance()->toString())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString());
}
);
program.add_argument("--title").default_value("").help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
auto valid_choices = env.valid_tokens("discretize_algo");
auto& disc_arg = program.add_argument("--discretize-algo").help("Algorithm to use in discretization. Valid values: " + env.valid_values("discretize_algo")).default_value(env.get("discretize_algo"));
for (auto choice : valid_choices) {
disc_arg.choices(choice);
}
valid_choices = env.valid_tokens("smooth_strat");
auto& smooth_arg = program.add_argument("--smooth-strat").help("Smooth strategy used in Bayes Network node initialization. Valid values: " + env.valid_values("smooth_strat")).default_value(env.get("smooth_strat"));
for (auto choice : valid_choices) {
smooth_arg.choices(choice);
}
auto& score_arg = program.add_argument("-s", "--score").help("Score to use. Valid values: " + env.valid_values("score")).default_value(env.get("score"));
valid_choices = env.valid_tokens("score");
for (auto choice : valid_choices) {
score_arg.choices(choice);
}
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an integer");
}});
auto seed_values = env.getSeeds();
program.add_argument("--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
}
void add_search_args(argparse::ArgumentParser& program)
{
auto env = platform::DotEnv();
@@ -276,9 +208,6 @@ void search(argparse::ArgumentParser& program)
}
auto excluded = program.get<std::string>("exclude");
config.excluded = json::parse(excluded);
auto env = platform::DotEnv();
config.platform = env.get("platform");
platform::Paths::createPath(platform::Paths::grid());
auto grid_search = platform::GridSearch(config);
platform::Timer timer;
@@ -303,10 +232,9 @@ void search(argparse::ArgumentParser& program)
void experiment(argparse::ArgumentParser& program)
{
struct platform::ConfigGrid config;
auto env = platform::DotEnv();
config.platform = env.get("platform");
auto grid_experiment = platform::GridExperiment(program, config);
auto arguments = platform::ArgumentsExperiment(program, platform::experiment_t::GRID);
arguments.parse();
auto grid_experiment = platform::GridExperiment(arguments, config);
platform::Timer timer;
timer.start();
struct platform::ConfigMPI mpi_config;
@@ -326,7 +254,7 @@ void experiment(argparse::ArgumentParser& program)
if (grid_experiment.haveToSaveResults()) {
experiment.saveResult();
}
experiment.report(grid_experiment.numFiles() == 1);
experiment.report();
std::cout << "Process took " << duration << std::endl;
}
MPI_Finalize();
@@ -356,9 +284,7 @@ int main(int argc, char** argv)
// grid experiment subparser
argparse::ArgumentParser experiment_command("experiment");
experiment_command.add_description("Experiment like b_main using mpi.");
assignModel(experiment_command);
add_experiment_args(experiment_command);
auto arguments = platform::ArgumentsExperiment(experiment_command, platform::experiment_t::GRID);
program.add_subparser(dump_command);
program.add_subparser(report_command);
program.add_subparser(search_command);

View File

@@ -1,234 +1,35 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "main/Experiment.h"
#include "common/Datasets.h"
#include "common/DotEnv.h"
#include "common/Paths.h"
#include "main/Models.h"
#include "main/modelRegister.h"
#include "main/ArgumentsExperiment.h"
#include "config_platform.h"
using json = nlohmann::ordered_json;
void manageArguments(argparse::ArgumentParser& program)
{
auto env = platform::DotEnv();
auto datasets = platform::Datasets(false, platform::Paths::datasets());
auto& group = program.add_mutually_exclusive_group(true);
group.add_argument("-d", "--dataset")
.help("Dataset file name: " + datasets.toString())
.default_value("all")
.action([](const std::string& value) {
auto datasets = platform::Datasets(false, platform::Paths::datasets());
static std::vector<std::string> choices_datasets(datasets.getNames());
choices_datasets.push_back("all");
if (find(choices_datasets.begin(), choices_datasets.end(), value) != choices_datasets.end()) {
return value;
}
throw std::runtime_error("Dataset must be one of: " + datasets.toString());
}
);
group.add_argument("--datasets").nargs(1, 50).help("Datasets file names 1..50 separated by spaces").default_value(std::vector<std::string>());
group.add_argument("--datasets-file").default_value("").help("Datasets file name. Mutually exclusive with dataset. This file should contain a list of datasets to test.");
program.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
program.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
program.add_argument("--hyper-best").default_value(false).help("Use best results of the model as source of hyperparameters").implicit_value(true);
program.add_argument("-m", "--model")
.help("Model to use: " + platform::Models::instance()->toString())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString());
}
);
program.add_argument("--title").default_value("").help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
auto valid_choices = env.valid_tokens("discretize_algo");
auto& disc_arg = program.add_argument("--discretize-algo").help("Algorithm to use in discretization. Valid values: " + env.valid_values("discretize_algo")).default_value(env.get("discretize_algo"));
for (auto choice : valid_choices) {
disc_arg.choices(choice);
}
valid_choices = env.valid_tokens("smooth_strat");
auto& smooth_arg = program.add_argument("--smooth-strat").help("Smooth strategy used in Bayes Network node initialization. Valid values: " + env.valid_values("smooth_strat")).default_value(env.get("smooth_strat"));
for (auto choice : valid_choices) {
smooth_arg.choices(choice);
}
auto& score_arg = program.add_argument("-s", "--score").help("Score to use. Valid values: " + env.valid_values("score")).default_value(env.get("score"));
valid_choices = env.valid_tokens("score");
for (auto choice : valid_choices) {
score_arg.choices(choice);
}
program.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true);
program.add_argument("--graph").help("generate graphviz dot files with the model").default_value(false).implicit_value(true);
program.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true);
program.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
program.add_argument("--save").help("Save result (always save even if a dataset is supplied)").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an integer");
}});
auto seed_values = env.getSeeds();
program.add_argument("--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
}
int main(int argc, char** argv)
{
argparse::ArgumentParser program("b_main", { platform_project_version.begin(), platform_project_version.end() });
manageArguments(program);
std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat, score;
json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files, graph, hyper_best;
std::vector<int> seeds;
std::vector<std::string> file_names;
std::vector<std::string> filesToTest;
int n_folds;
try {
program.parse_args(argc, argv);
file_name = program.get<std::string>("dataset");
file_names = program.get<std::vector<std::string>>("datasets");
datasets_file = program.get<std::string>("datasets-file");
model_name = program.get<std::string>("model");
discretize_dataset = program.get<bool>("discretize");
discretize_algo = program.get<std::string>("discretize-algo");
smooth_strat = program.get<std::string>("smooth-strat");
stratified = program.get<bool>("stratified");
quiet = program.get<bool>("quiet");
graph = program.get<bool>("graph");
n_folds = program.get<int>("folds");
score = program.get<std::string>("score");
seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters);
hyperparameters_file = program.get<std::string>("hyper-file");
no_train_score = program.get<bool>("no-train-score");
hyper_best = program.get<bool>("hyper-best");
generate_fold_files = program.get<bool>("generate-fold-files");
if (hyper_best) {
// Build the best results file_name
hyperparameters_file = platform::Paths::results() + platform::Paths::bestResultsFile(score, model_name);
// ignore this parameter
hyperparameters = "{}";
} else {
if (hyperparameters_file != "" && hyperparameters != "{}") {
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
}
}
title = program.get<std::string>("title");
if (title == "" && file_name == "all") {
throw runtime_error("title is mandatory if all datasets are to be tested");
}
saveResults = program.get<bool>("save");
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << program;
exit(1);
}
auto datasets = platform::Datasets(false, platform::Paths::datasets());
if (datasets_file != "") {
ifstream catalog(datasets_file);
if (catalog.is_open()) {
std::string line;
while (getline(catalog, line)) {
if (line.empty() || line[0] == '#') {
continue;
}
if (!datasets.isDataset(line)) {
cerr << "Dataset " << line << " not found" << std::endl;
exit(1);
}
filesToTest.push_back(line);
}
catalog.close();
saveResults = true;
if (title == "") {
title = "Test " + to_string(filesToTest.size()) + " datasets (" + datasets_file + ") "\
+ model_name + " " + to_string(n_folds) + " folds";
}
} else {
throw std::invalid_argument("Unable to open catalog file. [" + datasets_file + "]");
}
} else {
if (file_names.size() > 0) {
for (auto file : file_names) {
if (!datasets.isDataset(file)) {
cerr << "Dataset " << file << " not found" << std::endl;
exit(1);
}
}
filesToTest = file_names;
saveResults = true;
if (title == "") {
title = "Test " + to_string(file_names.size()) + " datasets " + model_name + " " + to_string(n_folds) + " folds";
}
} else {
if (file_name != "all") {
if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << std::endl;
exit(1);
}
if (title == "") {
title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds";
}
filesToTest.push_back(file_name);
} else {
filesToTest = datasets.getNames();
saveResults = true;
}
}
}
platform::HyperParameters test_hyperparams;
if (hyperparameters_file != "") {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file, hyper_best);
} else {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
}
auto arguments = platform::ArgumentsExperiment(program, platform::experiment_t::NORMAL);
arguments.parse_args(argc, argv);
/*
* Begin Processing
*/
auto env = platform::DotEnv();
auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1");
experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat);
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score);
experiment.setHyperparameters(test_hyperparams);
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}
// Initialize the experiment class with the command line arguments
auto experiment = arguments.initializedExperiment();
platform::Timer timer;
timer.start();
experiment.go(filesToTest, quiet, no_train_score, generate_fold_files, graph);
experiment.go();
experiment.setDuration(timer.getDuration());
if (!quiet) {
if (!arguments.isQuiet()) {
// Classification report if only one dataset is tested
experiment.report(filesToTest.size() == 1);
experiment.report();
}
if (saveResults) {
if (arguments.haveToSaveResults()) {
experiment.saveResult();
}
if (graph) {
if (arguments.doGraph()) {
experiment.saveGraph();
}
std::cout << "Done!" << std::endl;
return 0;
}

View File

@@ -2,6 +2,7 @@
#include <cstddef>
#include "common/DotEnv.h"
#include "common/Paths.h"
#include "common/DotEnv.h"
#include "GridBase.h"
namespace platform {
@@ -9,6 +10,8 @@ namespace platform {
GridBase::GridBase(struct ConfigGrid& config)
{
this->config = config;
auto env = platform::DotEnv();
this->config.platform = env.get("platform");
}
void GridBase::validate_config()

View File

@@ -8,120 +8,18 @@
#include "GridExperiment.h"
namespace platform {
GridExperiment::GridExperiment(argparse::ArgumentParser& program, struct ConfigGrid& config) : arguments(program), GridBase(config)
// GridExperiment::GridExperiment(argparse::ArgumentParser& program, struct ConfigGrid& config) : arguments(program), GridBase(config)
GridExperiment::GridExperiment(ArgumentsExperiment& program, struct ConfigGrid& config) : arguments(program), GridBase(config)
{
std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat, score;
json hyperparameters_json;
bool discretize_dataset, stratified, hyper_best;
std::vector<int> seeds;
std::vector<std::string> file_names;
int n_folds;
file_name = program.get<std::string>("dataset");
file_names = program.get<std::vector<std::string>>("datasets");
datasets_file = program.get<std::string>("datasets-file");
model_name = program.get<std::string>("model");
discretize_dataset = program.get<bool>("discretize");
saveResults = program.get<bool>("save");
discretize_algo = program.get<std::string>("discretize-algo");
smooth_strat = program.get<std::string>("smooth-strat");
stratified = program.get<bool>("stratified");
n_folds = program.get<int>("folds");
score = program.get<std::string>("score");
seeds = program.get<std::vector<int>>("seeds");
auto hyperparameters = program.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters);
hyperparameters_file = program.get<std::string>("hyper-file");
hyper_best = program.get<bool>("hyper-best");
if (hyper_best) {
// Build the best results file_name
hyperparameters_file = platform::Paths::results() + platform::Paths::bestResultsFile(score, model_name);
// ignore this parameter
hyperparameters = "{}";
} else {
if (hyperparameters_file != "" && hyperparameters != "{}") {
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
}
}
title = program.get<std::string>("title");
if (title == "" && file_name == "all") {
throw runtime_error("title is mandatory if all datasets are to be tested");
}
auto datasets = platform::Datasets(false, platform::Paths::datasets());
if (datasets_file != "") {
ifstream catalog(datasets_file);
if (catalog.is_open()) {
std::string line;
while (getline(catalog, line)) {
if (line.empty() || line[0] == '#') {
continue;
}
if (!datasets.isDataset(line)) {
cerr << "Dataset " << line << " not found" << std::endl;
exit(1);
}
filesToTest.push_back(line);
}
catalog.close();
saveResults = true;
if (title == "") {
title = "Test " + to_string(filesToTest.size()) + " datasets (" + datasets_file + ") "\
+ model_name + " " + to_string(n_folds) + " folds";
}
} else {
throw std::invalid_argument("Unable to open catalog file. [" + datasets_file + "]");
}
} else {
if (file_names.size() > 0) {
for (auto file : file_names) {
if (!datasets.isDataset(file)) {
cerr << "Dataset " << file << " not found" << std::endl;
exit(1);
}
}
filesToTest = file_names;
saveResults = true;
if (title == "") {
title = "Test " + to_string(file_names.size()) + " datasets " + model_name + " " + to_string(n_folds) + " folds";
}
} else {
if (file_name != "all") {
if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << std::endl;
exit(1);
}
if (title == "") {
title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds";
}
filesToTest.push_back(file_name);
} else {
filesToTest = datasets.getNames();
saveResults = true;
}
}
}
platform::HyperParameters test_hyperparams;
if (hyperparameters_file != "") {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file, hyper_best);
} else {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
}
this->config.model = model_name;
this->config.score = score;
this->config.discretize = discretize_dataset;
this->config.stratified = stratified;
this->config.smooth_strategy = smooth_strat;
this->config.n_folds = n_folds;
this->config.seeds = seeds;
this->config.quiet = false;
auto env = platform::DotEnv();
experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1");
experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat);
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score);
experiment.setHyperparameters(test_hyperparams);
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}
experiment = arguments.initializedExperiment();
this->config.model = experiment.getModel();
this->config.score = experiment.getScore();
this->config.discretize = experiment.isDiscretized();
this->config.stratified = experiment.isStratified();
this->config.smooth_strategy = experiment.getSmoothStrategy();
this->config.n_folds = experiment.getNFolds();
this->config.seeds = experiment.getRandomSeeds();
this->config.quiet = experiment.isQuiet();
}
json GridExperiment::getResults()
{

View File

@@ -9,6 +9,7 @@
#include "common/DotEnv.h"
#include "main/Experiment.h"
#include "main/HyperParameters.h"
#include "main/ArgumentsExperiment.h"
#include "GridData.h"
#include "GridBase.h"
#include "bayesnet/network/Network.h"
@@ -18,14 +19,14 @@ namespace platform {
using json = nlohmann::ordered_json;
class GridExperiment : public GridBase {
public:
explicit GridExperiment(argparse::ArgumentParser& program, struct ConfigGrid& config);
explicit GridExperiment(ArgumentsExperiment& program, struct ConfigGrid& config);
~GridExperiment() = default;
json getResults();
Experiment& getExperiment() { return experiment; }
size_t numFiles() const { return filesToTest.size(); }
bool haveToSaveResults() const { return saveResults; }
private:
argparse::ArgumentParser& arguments;
ArgumentsExperiment& arguments;
Experiment experiment;
json computed_results;
bool saveResults;

View File

@@ -0,0 +1,224 @@
#include "common/Datasets.h"
#include "common/DotEnv.h"
#include "common/Paths.h"
#include "main/Models.h"
#include "main/modelRegister.h"
#include "ArgumentsExperiment.h"
namespace platform {
ArgumentsExperiment::ArgumentsExperiment(argparse::ArgumentParser& program, experiment_t type) : arguments{ program }, type{ type }
{
auto env = platform::DotEnv();
auto datasets = platform::Datasets(false, platform::Paths::datasets());
auto& group = arguments.add_mutually_exclusive_group(true);
group.add_argument("-d", "--dataset")
.help("Dataset file name: " + datasets.toString())
.default_value("all")
.action([](const std::string& value) {
auto datasets = platform::Datasets(false, platform::Paths::datasets());
static std::vector<std::string> choices_datasets(datasets.getNames());
choices_datasets.push_back("all");
if (find(choices_datasets.begin(), choices_datasets.end(), value) != choices_datasets.end()) {
return value;
}
throw std::runtime_error("Dataset must be one of: " + datasets.toString());
}
);
group.add_argument("--datasets").nargs(1, 50).help("Datasets file names 1..50 separated by spaces").default_value(std::vector<std::string>());
group.add_argument("--datasets-file").default_value("").help("Datasets file name. Mutually exclusive with dataset. This file should contain a list of datasets to test.");
arguments.add_argument("--hyperparameters").default_value("{}").help("Hyperparameters passed to the model in Experiment");
arguments.add_argument("--hyper-file").default_value("").help("Hyperparameters file name." \
"Mutually exclusive with hyperparameters. This file should contain hyperparameters for each dataset in json format.");
arguments.add_argument("--hyper-best").default_value(false).help("Use best results of the model as source of hyperparameters").implicit_value(true);
arguments.add_argument("-m", "--model")
.help("Model to use: " + platform::Models::instance()->toString())
.action([](const std::string& value) {
static const std::vector<std::string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw std::runtime_error("Model must be one of " + platform::Models::instance()->toString());
}
);
arguments.add_argument("--title").default_value("").help("Experiment title");
arguments.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
auto valid_choices = env.valid_tokens("discretize_algo");
auto& disc_arg = arguments.add_argument("--discretize-algo").help("Algorithm to use in discretization. Valid values: " + env.valid_values("discretize_algo")).default_value(env.get("discretize_algo"));
for (auto choice : valid_choices) {
disc_arg.choices(choice);
}
valid_choices = env.valid_tokens("smooth_strat");
auto& smooth_arg = arguments.add_argument("--smooth-strat").help("Smooth strategy used in Bayes Network node initialization. Valid values: " + env.valid_values("smooth_strat")).default_value(env.get("smooth_strat"));
for (auto choice : valid_choices) {
smooth_arg.choices(choice);
}
auto& score_arg = arguments.add_argument("-s", "--score").help("Score to use. Valid values: " + env.valid_values("score")).default_value(env.get("score"));
valid_choices = env.valid_tokens("score");
for (auto choice : valid_choices) {
score_arg.choices(choice);
}
arguments.add_argument("--no-train-score").help("Don't compute train score").default_value(false).implicit_value(true);
arguments.add_argument("--quiet").help("Don't display detailed progress").default_value(false).implicit_value(true);
arguments.add_argument("--save").help("Save result (always save even if a dataset is supplied)").default_value(false).implicit_value(true);
arguments.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
arguments.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const std::string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw std::runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw std::runtime_error(err.what());
}
catch (...) {
throw std::runtime_error("Number of folds must be an integer");
}});
auto seed_values = env.getSeeds();
arguments.add_argument("--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
if (type == experiment_t::NORMAL) {
arguments.add_argument("--generate-fold-files").help("generate fold information in datasets_experiment folder").default_value(false).implicit_value(true);
arguments.add_argument("--graph").help("generate graphviz dot files with the model").default_value(false).implicit_value(true);
}
}
void ArgumentsExperiment::parse_args(int argc, char** argv)
{
try {
arguments.parse_args(argc, argv);
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << arguments;
exit(1);
}
parse();
}
void ArgumentsExperiment::parse()
{
try {
file_name = arguments.get<std::string>("dataset");
file_names = arguments.get<std::vector<std::string>>("datasets");
datasets_file = arguments.get<std::string>("datasets-file");
model_name = arguments.get<std::string>("model");
discretize_dataset = arguments.get<bool>("discretize");
discretize_algo = arguments.get<std::string>("discretize-algo");
smooth_strat = arguments.get<std::string>("smooth-strat");
stratified = arguments.get<bool>("stratified");
quiet = arguments.get<bool>("quiet");
n_folds = arguments.get<int>("folds");
score = arguments.get<std::string>("score");
seeds = arguments.get<std::vector<int>>("seeds");
auto hyperparameters = arguments.get<std::string>("hyperparameters");
hyperparameters_json = json::parse(hyperparameters);
hyperparameters_file = arguments.get<std::string>("hyper-file");
no_train_score = arguments.get<bool>("no-train-score");
hyper_best = arguments.get<bool>("hyper-best");
if (hyper_best) {
// Build the best results file_name
hyperparameters_file = platform::Paths::results() + platform::Paths::bestResultsFile(score, model_name);
// ignore this parameter
hyperparameters = "{}";
} else {
if (hyperparameters_file != "" && hyperparameters != "{}") {
throw runtime_error("hyperparameters and hyper_file are mutually exclusive");
}
}
title = arguments.get<std::string>("title");
if (title == "" && file_name == "all") {
throw runtime_error("title is mandatory if all datasets are to be tested");
}
saveResults = arguments.get<bool>("save");
if (type == experiment_t::NORMAL) {
graph = arguments.get<bool>("graph");
generate_fold_files = arguments.get<bool>("generate-fold-files");
} else {
graph = false;
generate_fold_files = false;
}
}
catch (const exception& err) {
cerr << err.what() << std::endl;
cerr << arguments;
exit(1);
}
auto datasets = platform::Datasets(false, platform::Paths::datasets());
if (datasets_file != "") {
ifstream catalog(datasets_file);
if (catalog.is_open()) {
std::string line;
while (getline(catalog, line)) {
if (line.empty() || line[0] == '#') {
continue;
}
if (!datasets.isDataset(line)) {
cerr << "Dataset " << line << " not found" << std::endl;
exit(1);
}
filesToTest.push_back(line);
}
catalog.close();
saveResults = true;
if (title == "") {
title = "Test " + to_string(filesToTest.size()) + " datasets (" + datasets_file + ") "\
+ model_name + " " + to_string(n_folds) + " folds";
}
} else {
throw std::invalid_argument("Unable to open catalog file. [" + datasets_file + "]");
}
} else {
if (file_names.size() > 0) {
for (auto file : file_names) {
if (!datasets.isDataset(file)) {
cerr << "Dataset " << file << " not found" << std::endl;
exit(1);
}
}
filesToTest = file_names;
saveResults = true;
if (title == "") {
title = "Test " + to_string(file_names.size()) + " datasets " + model_name + " " + to_string(n_folds) + " folds";
}
} else {
if (file_name != "all") {
if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << std::endl;
exit(1);
}
if (title == "") {
title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds";
}
filesToTest.push_back(file_name);
} else {
filesToTest = datasets.getNames();
saveResults = true;
}
}
}
if (hyperparameters_file != "") {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_file, hyper_best);
} else {
test_hyperparams = platform::HyperParameters(datasets.getNames(), hyperparameters_json);
}
}
Experiment& ArgumentsExperiment::initializedExperiment()
{
auto env = platform::DotEnv();
experiment.setTitle(title).setLanguage("c++").setLanguageVersion("gcc 14.1.1");
experiment.setDiscretizationAlgorithm(discretize_algo).setSmoothSrategy(smooth_strat);
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName(score);
experiment.setHyperparameters(test_hyperparams);
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}
experiment.setFilesToTest(filesToTest);
experiment.setQuiet(quiet);
experiment.setNoTrainScore(no_train_score);
experiment.setGenerateFoldFiles(generate_fold_files);
experiment.setGraph(graph);
return experiment;
}
}

View File

@@ -0,0 +1,38 @@
#ifndef ARGUMENTSEXPERIMENT_H
#define ARGUMENTSEXPERIMENT_H
#include <string>
#include <iostream>
#include <vector>
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include "Experiment.h"
namespace platform {
using json = nlohmann::ordered_json;
enum class experiment_t { NORMAL, GRID };
class ArgumentsExperiment {
public:
ArgumentsExperiment(argparse::ArgumentParser& program, experiment_t type);
~ArgumentsExperiment() = default;
std::vector<std::string> getFilesToTest() const { return filesToTest; }
void parse_args(int argc, char** argv);
void parse();
Experiment& initializedExperiment();
bool isQuiet() const { return quiet; }
bool haveToSaveResults() const { return saveResults; }
bool doGraph() const { return graph; }
private:
Experiment experiment;
experiment_t type;
argparse::ArgumentParser& arguments;
std::string file_name, model_name, title, hyperparameters_file, datasets_file, discretize_algo, smooth_strat, score;
json hyperparameters_json;
bool discretize_dataset, stratified, saveResults, quiet, no_train_score, generate_fold_files, graph, hyper_best;
std::vector<int> seeds;
std::vector<std::string> file_names;
std::vector<std::string> filesToTest;
platform::HyperParameters test_hyperparams;
int n_folds;
};
}
#endif

View File

@@ -14,11 +14,11 @@ namespace platform {
result.save();
std::cout << "Result saved in " << Paths::results() << result.getFilename() << std::endl;
}
void Experiment::report(bool classification_report)
void Experiment::report()
{
ReportConsole report(result.getJson());
report.show();
if (classification_report) {
if (filesToTest.size() == 1) {
std::cout << report.showClassificationReport(Colors::BLUE());
}
}
@@ -43,9 +43,9 @@ namespace platform {
}
}
}
void Experiment::go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
void Experiment::go()
{
for (auto fileName : filesToProcess) {
for (auto fileName : filesToTest) {
if (fileName.size() > max_name)
max_name = fileName.size();
}
@@ -64,10 +64,10 @@ namespace platform {
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << " ----------" << Colors::RESET() << std::endl;
}
int num = 0;
for (auto fileName : filesToProcess) {
for (auto fileName : filesToTest) {
if (!quiet)
std::cout << " " << setw(3) << right << num++ << " " << setw(max_name) << left << fileName << right << flush;
cross_validation(fileName, quiet, no_train_score, generate_fold_files, graph);
cross_validation(fileName);
if (!quiet)
std::cout << std::endl;
}
@@ -139,7 +139,7 @@ namespace platform {
file << output.dump(4);
file.close();
}
void Experiment::cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph)
void Experiment::cross_validation(const std::string& fileName)
{
//
// Load dataset and prepare data

View File

@@ -20,7 +20,6 @@ namespace platform {
Experiment& setTitle(const std::string& title) { this->result.setTitle(title); return *this; }
Experiment& setModelVersion(const std::string& model_version) { this->result.setModelVersion(model_version); return *this; }
Experiment& setModel(const std::string& model) { this->result.setModel(model); return *this; }
std::string getModel() const { return result.getModel(); }
Experiment& setLanguage(const std::string& language) { this->result.setLanguage(language); return *this; }
Experiment& setDiscretizationAlgorithm(const std::string& discretization_algo)
{
@@ -28,7 +27,8 @@ namespace platform {
}
Experiment& setSmoothSrategy(const std::string& smooth_strategy)
{
this->smooth_strategy = smooth_strategy; this->result.setSmoothStrategy(smooth_strategy);
this->smooth_strategy = smooth_strategy;
this->result.setSmoothStrategy(smooth_strategy);
if (smooth_strategy == "ORIGINAL")
smooth_type = bayesnet::Smoothing_t::ORIGINAL;
else if (smooth_strategy == "LAPLACE")
@@ -50,18 +50,32 @@ namespace platform {
Experiment& setDuration(float duration) { this->result.setDuration(duration); return *this; }
Experiment& setHyperparameters(const HyperParameters& hyperparameters_) { this->hyperparameters = hyperparameters_; return *this; }
HyperParameters& getHyperParameters() { return hyperparameters; }
void cross_validation(const std::string& fileName, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
void go(std::vector<std::string> filesToProcess, bool quiet, bool no_train_score, bool generate_fold_files, bool graph);
std::string getModel() const { return result.getModel(); }
std::string getScore() const { return result.getScoreName(); }
bool isDiscretized() const { return discretized; }
bool isStratified() const { return stratified; }
bool isQuiet() const { return quiet; }
std::string getSmoothStrategy() const { return smooth_strategy; }
int getNFolds() const { return nfolds; }
std::vector<int> getRandomSeeds() const { return randomSeeds; }
void cross_validation(const std::string& fileName);
void go();
void saveResult();
void show();
void saveGraph();
void report(bool classification_report = false);
void report();
void setFilesToTest(const std::vector<std::string>& filesToTest) { this->filesToTest = filesToTest; }
void setQuiet(bool quiet) { this->quiet = quiet; }
void setNoTrainScore(bool no_train_score) { this->no_train_score = no_train_score; }
void setGenerateFoldFiles(bool generate_fold_files) { this->generate_fold_files = generate_fold_files; }
void setGraph(bool graph) { this->graph = graph; }
private:
score_t parse_score() const;
Result result;
bool discretized{ false }, stratified{ false };
bool discretized{ false }, stratified{ false }, generate_fold_files{ false }, graph{ false }, quiet{ false }, no_train_score{ false };
std::vector<PartialResult> results;
std::vector<int> randomSeeds;
std::vector<std::string> filesToTest;
std::string discretization_algo;
std::string smooth_strategy;
bayesnet::Smoothing_t smooth_type{ bayesnet::Smoothing_t::NONE };