Complete b_grid experiment
This commit is contained in:
@@ -318,12 +318,13 @@ void experiment(argparse::ArgumentParser& program)
|
|||||||
}
|
}
|
||||||
grid_experiment.go(mpi_config);
|
grid_experiment.go(mpi_config);
|
||||||
if (mpi_config.rank == mpi_config.manager) {
|
if (mpi_config.rank == mpi_config.manager) {
|
||||||
auto results = grid_experiment.getResults();
|
auto experiment = grid_experiment.getExperiment();
|
||||||
//build_experiment_result(results);
|
std::cout << "* Report of the computed hyperparameters" << std::endl;
|
||||||
std::cout << "****** RESULTS ********" << std::endl;
|
auto duration = timer.getDuration();
|
||||||
std::cout << results.dump(4) << std::endl;
|
experiment.setDuration(duration);
|
||||||
// list_results(results, config.model);
|
// experiment.report(grid_experiment.numFiles() == 1);
|
||||||
std::cout << "Process took " << timer.getDurationString() << std::endl;
|
experiment.saveResult();
|
||||||
|
std::cout << "Process took " << duration << std::endl;
|
||||||
}
|
}
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
}
|
}
|
||||||
|
@@ -46,6 +46,41 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
std::cout << separator << std::endl << separator << std::flush;
|
std::cout << separator << std::endl << separator << std::flush;
|
||||||
}
|
}
|
||||||
|
json GridBase::build_tasks(Datasets& datasets)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Each task is a json object with the following structure:
|
||||||
|
* {
|
||||||
|
* "dataset": "dataset_name",
|
||||||
|
* "idx_dataset": idx_dataset, // used to identify the dataset in the results
|
||||||
|
* // this index is relative to the list of used datasets in the actual run not to the whole datasets list
|
||||||
|
* "seed": # of seed to use,
|
||||||
|
* "fold": # of fold to process
|
||||||
|
* }
|
||||||
|
* This way a task consists in process all combinations of hyperparameters for a dataset, seed and fold
|
||||||
|
*/
|
||||||
|
auto tasks = json::array();
|
||||||
|
auto grid = GridData(Paths::grid_input(config.model));
|
||||||
|
auto all_datasets = datasets.getNames();
|
||||||
|
auto datasets_names = filterDatasets(datasets);
|
||||||
|
for (int idx_dataset = 0; idx_dataset < datasets_names.size(); ++idx_dataset) {
|
||||||
|
auto dataset = datasets_names[idx_dataset];
|
||||||
|
for (const auto& seed : config.seeds) {
|
||||||
|
auto combinations = grid.getGrid(dataset);
|
||||||
|
for (int n_fold = 0; n_fold < config.n_folds; n_fold++) {
|
||||||
|
json task = {
|
||||||
|
{ "dataset", dataset },
|
||||||
|
{ "idx_dataset", idx_dataset},
|
||||||
|
{ "seed", seed },
|
||||||
|
{ "fold", n_fold},
|
||||||
|
};
|
||||||
|
tasks.push_back(task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
shuffle_and_progress_bar(tasks);
|
||||||
|
return tasks;
|
||||||
|
}
|
||||||
void GridBase::summary(json& all_results, json& tasks, struct ConfigMPI& config_mpi)
|
void GridBase::summary(json& all_results, json& tasks, struct ConfigMPI& config_mpi)
|
||||||
{
|
{
|
||||||
// Report the tasks done by each worker, showing dataset number, seed, fold and time spent
|
// Report the tasks done by each worker, showing dataset number, seed, fold and time spent
|
||||||
@@ -146,20 +181,21 @@ namespace platform {
|
|||||||
Task_Result result;
|
Task_Result result;
|
||||||
int tasks_size;
|
int tasks_size;
|
||||||
MPI_Datatype MPI_Result;
|
MPI_Datatype MPI_Result;
|
||||||
MPI_Datatype type[10] = { MPI_UNSIGNED, MPI_UNSIGNED, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT };
|
MPI_Datatype type[11] = { MPI_UNSIGNED, MPI_UNSIGNED, MPI_INT, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT };
|
||||||
int blocklen[10] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
|
int blocklen[11] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
|
||||||
MPI_Aint disp[10];
|
MPI_Aint disp[11];
|
||||||
disp[0] = offsetof(Task_Result, idx_dataset);
|
disp[0] = offsetof(Task_Result, idx_dataset);
|
||||||
disp[1] = offsetof(Task_Result, idx_combination);
|
disp[1] = offsetof(Task_Result, idx_combination);
|
||||||
disp[2] = offsetof(Task_Result, n_fold);
|
disp[2] = offsetof(Task_Result, n_fold);
|
||||||
disp[3] = offsetof(Task_Result, score);
|
disp[3] = offsetof(Task_Result, score);
|
||||||
disp[4] = offsetof(Task_Result, time);
|
disp[4] = offsetof(Task_Result, time);
|
||||||
disp[5] = offsetof(Task_Result, nodes);
|
disp[5] = offsetof(Task_Result, time_train);
|
||||||
disp[6] = offsetof(Task_Result, leaves);
|
disp[6] = offsetof(Task_Result, nodes);
|
||||||
disp[7] = offsetof(Task_Result, depth);
|
disp[7] = offsetof(Task_Result, leaves);
|
||||||
disp[8] = offsetof(Task_Result, process);
|
disp[8] = offsetof(Task_Result, depth);
|
||||||
disp[9] = offsetof(Task_Result, task);
|
disp[9] = offsetof(Task_Result, process);
|
||||||
MPI_Type_create_struct(10, blocklen, disp, type, &MPI_Result);
|
disp[10] = offsetof(Task_Result, task);
|
||||||
|
MPI_Type_create_struct(11, blocklen, disp, type, &MPI_Result);
|
||||||
MPI_Type_commit(&MPI_Result);
|
MPI_Type_commit(&MPI_Result);
|
||||||
//
|
//
|
||||||
// 0.2 Manager creates the tasks
|
// 0.2 Manager creates the tasks
|
||||||
|
@@ -22,7 +22,7 @@ namespace platform {
|
|||||||
void go(struct ConfigMPI& config_mpi);
|
void go(struct ConfigMPI& config_mpi);
|
||||||
void validate_config();
|
void validate_config();
|
||||||
protected:
|
protected:
|
||||||
virtual json build_tasks(Datasets& datasets) = 0;
|
json build_tasks(Datasets& datasets);
|
||||||
virtual void save(json& results) = 0;
|
virtual void save(json& results) = 0;
|
||||||
virtual std::vector<std::string> filterDatasets(Datasets& datasets) const = 0;
|
virtual std::vector<std::string> filterDatasets(Datasets& datasets) const = 0;
|
||||||
virtual json initializeResults() = 0;
|
virtual json initializeResults() = 0;
|
||||||
|
@@ -39,7 +39,8 @@ namespace platform {
|
|||||||
uint idx_combination;
|
uint idx_combination;
|
||||||
int n_fold;
|
int n_fold;
|
||||||
double score; // Experiment: Score test, no score train in this case
|
double score; // Experiment: Score test, no score train in this case
|
||||||
double time; // Experiment: Time train+test, no time train and/or time test in this case
|
double time; // Experiment: Time test
|
||||||
|
double time_train;
|
||||||
double nodes; // Experiment specific
|
double nodes; // Experiment specific
|
||||||
double leaves; // Experiment specific
|
double leaves; // Experiment specific
|
||||||
double depth; // Experiment specific
|
double depth; // Experiment specific
|
||||||
|
@@ -123,46 +123,8 @@ namespace platform {
|
|||||||
{
|
{
|
||||||
return computed_results;
|
return computed_results;
|
||||||
}
|
}
|
||||||
json GridExperiment::build_tasks(Datasets& datasets)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Each task is a json object with the following structure:
|
|
||||||
* {
|
|
||||||
* "dataset": "dataset_name",
|
|
||||||
* "idx_dataset": idx_dataset, // used to identify the dataset in the results
|
|
||||||
* // this index is relative to the list of used datasets in the actual run not to the whole datasets list
|
|
||||||
* "seed": # of seed to use,
|
|
||||||
* "fold": # of fold to process
|
|
||||||
* }
|
|
||||||
*/
|
|
||||||
auto tasks = json::array();
|
|
||||||
auto all_datasets = datasets.getNames();
|
|
||||||
auto datasets_names = filterDatasets(datasets);
|
|
||||||
for (int idx_dataset = 0; idx_dataset < datasets_names.size(); ++idx_dataset) {
|
|
||||||
auto dataset = datasets_names[idx_dataset];
|
|
||||||
for (const auto& seed : config.seeds) {
|
|
||||||
for (int n_fold = 0; n_fold < config.n_folds; n_fold++) {
|
|
||||||
json task = {
|
|
||||||
{ "dataset", dataset },
|
|
||||||
{ "idx_dataset", idx_dataset},
|
|
||||||
{ "seed", seed },
|
|
||||||
{ "fold", n_fold},
|
|
||||||
};
|
|
||||||
tasks.push_back(task);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
shuffle_and_progress_bar(tasks);
|
|
||||||
return tasks;
|
|
||||||
}
|
|
||||||
std::vector<std::string> GridExperiment::filterDatasets(Datasets& datasets) const
|
std::vector<std::string> GridExperiment::filterDatasets(Datasets& datasets) const
|
||||||
{
|
{
|
||||||
// Load datasets
|
|
||||||
// auto datasets_names = datasets.getNames();
|
|
||||||
// datasets_names.clear();
|
|
||||||
// datasets_names.push_back("iris");
|
|
||||||
// datasets_names.push_back("wine");
|
|
||||||
// datasets_names.push_back("balance-scale");
|
|
||||||
return filesToTest;
|
return filesToTest;
|
||||||
}
|
}
|
||||||
json GridExperiment::initializeResults()
|
json GridExperiment::initializeResults()
|
||||||
@@ -172,25 +134,9 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
void GridExperiment::save(json& results)
|
void GridExperiment::save(json& results)
|
||||||
{
|
{
|
||||||
// std::ofstream file(Paths::grid_output(config.model));
|
|
||||||
// json output = {
|
|
||||||
// { "model", config.model },
|
|
||||||
// { "score", config.score },
|
|
||||||
// { "discretize", config.discretize },
|
|
||||||
// { "stratified", config.stratified },
|
|
||||||
// { "n_folds", config.n_folds },
|
|
||||||
// { "seeds", config.seeds },
|
|
||||||
// { "date", get_date() + " " + get_time()},
|
|
||||||
// { "nested", config.nested},
|
|
||||||
// { "platform", config.platform },
|
|
||||||
// { "duration", timer.getDurationString(true)},
|
|
||||||
// { "results", results }
|
|
||||||
// };
|
|
||||||
// file << output.dump(4);
|
|
||||||
}
|
}
|
||||||
void GridExperiment::compile_results(json& results, json& all_results, std::string& model)
|
void GridExperiment::compile_results(json& results, json& all_results, std::string& model)
|
||||||
{
|
{
|
||||||
results = json::array();
|
|
||||||
auto datasets = Datasets(false, Paths::datasets());
|
auto datasets = Datasets(false, Paths::datasets());
|
||||||
for (const auto& result_item : all_results.items()) {
|
for (const auto& result_item : all_results.items()) {
|
||||||
// each result has the results of all the outer folds as each one were a different task
|
// each result has the results of all the outer folds as each one were a different task
|
||||||
@@ -199,52 +145,44 @@ namespace platform {
|
|||||||
auto result = json::object();
|
auto result = json::object();
|
||||||
int data_size = data.size();
|
int data_size = data.size();
|
||||||
auto score = torch::zeros({ data_size }, torch::kFloat64);
|
auto score = torch::zeros({ data_size }, torch::kFloat64);
|
||||||
auto time_t = torch::zeros({ data_size }, torch::kFloat64);
|
auto score_train = torch::zeros({ data_size }, torch::kFloat64);
|
||||||
|
auto time_test = torch::zeros({ data_size }, torch::kFloat64);
|
||||||
|
auto time_train = torch::zeros({ data_size }, torch::kFloat64);
|
||||||
auto nodes = torch::zeros({ data_size }, torch::kFloat64);
|
auto nodes = torch::zeros({ data_size }, torch::kFloat64);
|
||||||
auto leaves = torch::zeros({ data_size }, torch::kFloat64);
|
auto leaves = torch::zeros({ data_size }, torch::kFloat64);
|
||||||
auto depth = torch::zeros({ data_size }, torch::kFloat64);
|
auto depth = torch::zeros({ data_size }, torch::kFloat64);
|
||||||
|
auto& dataset = datasets.getDataset(dataset_name);
|
||||||
|
dataset.load();
|
||||||
|
//
|
||||||
|
// Prepare Result
|
||||||
|
//
|
||||||
|
auto partial_result = PartialResult();
|
||||||
|
partial_result.setSamples(dataset.getNSamples()).setFeatures(dataset.getNFeatures()).setClasses(dataset.getNClasses());
|
||||||
|
partial_result.setHyperparameters(experiment.getHyperParameters().get(dataset_name));
|
||||||
for (int fold = 0; fold < data_size; ++fold) {
|
for (int fold = 0; fold < data_size; ++fold) {
|
||||||
result["scores_test"].push_back(data[fold]["score"]);
|
partial_result.addScoreTest(data[fold]["score"]);
|
||||||
|
partial_result.addScoreTrain(0.0);
|
||||||
|
partial_result.addTimeTest(data[fold]["time"]);
|
||||||
|
partial_result.addTimeTrain(data[fold]["time_train"]);
|
||||||
score[fold] = data[fold]["score"].get<double>();
|
score[fold] = data[fold]["score"].get<double>();
|
||||||
time_t[fold] = data[fold]["time"].get<double>();
|
time_test[fold] = data[fold]["time"].get<double>();
|
||||||
|
time_train[fold] = data[fold]["time_train"].get<double>();
|
||||||
nodes[fold] = data[fold]["nodes"].get<double>();
|
nodes[fold] = data[fold]["nodes"].get<double>();
|
||||||
leaves[fold] = data[fold]["leaves"].get<double>();
|
leaves[fold] = data[fold]["leaves"].get<double>();
|
||||||
depth[fold] = data[fold]["depth"].get<double>();
|
depth[fold] = data[fold]["depth"].get<double>();
|
||||||
}
|
}
|
||||||
double score_mean = torch::mean(score).item<double>();
|
partial_result.setGraph(std::vector<std::string>());
|
||||||
double score_std = torch::std(score).item<double>();
|
partial_result.setScoreTest(torch::mean(score).item<double>()).setScoreTrain(0.0);
|
||||||
double time_mean = torch::mean(time_t).item<double>();
|
partial_result.setScoreTestStd(torch::std(score).item<double>()).setScoreTrainStd(0.0);
|
||||||
double time_std = torch::std(time_t).item<double>();
|
partial_result.setTrainTime(torch::mean(time_train).item<double>()).setTestTime(torch::mean(time_test).item<double>());
|
||||||
double nodes_mean = torch::mean(nodes).item<double>();
|
partial_result.setTrainTimeStd(torch::std(time_train).item<double>()).setTestTimeStd(torch::std(time_test).item<double>());
|
||||||
double leaves_mean = torch::mean(leaves).item<double>();
|
partial_result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(leaves).item<double>()).setDepth(torch::mean(depth).item<double>());
|
||||||
double depth_mean = torch::mean(depth).item<double>();
|
partial_result.setDataset(dataset_name).setNotes(std::vector<std::string>());
|
||||||
auto& dataset = datasets.getDataset(dataset_name);
|
partial_result.setConfusionMatrices(json::array());
|
||||||
dataset.load();
|
experiment.addResult(partial_result);
|
||||||
result["samples"] = dataset.getNSamples();
|
|
||||||
result["features"] = dataset.getNFeatures();
|
|
||||||
result["classes"] = dataset.getNClasses();
|
|
||||||
result["hyperparameters"] = experiment.getHyperParameters().get(dataset_name);
|
|
||||||
result["score"] = score_mean;
|
|
||||||
result["score_std"] = score_std;
|
|
||||||
result["time"] = time_mean;
|
|
||||||
result["time_std"] = time_std;
|
|
||||||
result["nodes"] = nodes_mean;
|
|
||||||
result["leaves"] = leaves_mean;
|
|
||||||
result["depth"] = depth_mean;
|
|
||||||
result["dataset"] = dataset_name;
|
|
||||||
// Fixed data
|
|
||||||
result["scores_train"] = json::array();
|
|
||||||
result["times_train"] = json::array();
|
|
||||||
result["times_test"] = json::array();
|
|
||||||
result["train_time"] = 0.0;
|
|
||||||
result["train_time_std"] = 0.0;
|
|
||||||
result["test_time"] = 0.0;
|
|
||||||
result["test_time_std"] = 0.0;
|
|
||||||
result["score_train"] = 0.0;
|
|
||||||
result["score_train_std"] = 0.0;
|
|
||||||
result["confusion_matrices"] = json::array();
|
|
||||||
results.push_back(result);
|
|
||||||
}
|
}
|
||||||
|
auto clf = Models::instance()->create(experiment.getModel());
|
||||||
|
experiment.setModelVersion(clf->getVersion());
|
||||||
computed_results = results;
|
computed_results = results;
|
||||||
}
|
}
|
||||||
json GridExperiment::store_result(std::vector<std::string>& names, Task_Result& result, json& results)
|
json GridExperiment::store_result(std::vector<std::string>& names, Task_Result& result, json& results)
|
||||||
@@ -254,6 +192,7 @@ namespace platform {
|
|||||||
{ "combination", result.idx_combination },
|
{ "combination", result.idx_combination },
|
||||||
{ "fold", result.n_fold },
|
{ "fold", result.n_fold },
|
||||||
{ "time", result.time },
|
{ "time", result.time },
|
||||||
|
{ "time_train", result.time_train },
|
||||||
{ "dataset", result.idx_dataset },
|
{ "dataset", result.idx_dataset },
|
||||||
{ "nodes", result.nodes },
|
{ "nodes", result.nodes },
|
||||||
{ "leaves", result.leaves },
|
{ "leaves", result.leaves },
|
||||||
@@ -273,8 +212,7 @@ namespace platform {
|
|||||||
//
|
//
|
||||||
// initialize
|
// initialize
|
||||||
//
|
//
|
||||||
Timer timer;
|
Timer train_timer, test_timer;
|
||||||
timer.start();
|
|
||||||
json task = tasks[n_task];
|
json task = tasks[n_task];
|
||||||
auto model = config.model;
|
auto model = config.model;
|
||||||
auto dataset_name = task["dataset"].get<std::string>();
|
auto dataset_name = task["dataset"].get<std::string>();
|
||||||
@@ -305,6 +243,7 @@ namespace platform {
|
|||||||
fold = new folding::StratifiedKFold(config.n_folds, y, seed);
|
fold = new folding::StratifiedKFold(config.n_folds, y, seed);
|
||||||
else
|
else
|
||||||
fold = new folding::KFold(config.n_folds, y.size(0), seed);
|
fold = new folding::KFold(config.n_folds, y.size(0), seed);
|
||||||
|
train_timer.start();
|
||||||
auto [train, test] = fold->getFold(n_fold);
|
auto [train, test] = fold->getFold(n_fold);
|
||||||
auto [X_train, X_test, y_train, y_test] = dataset.getTrainTestTensors(train, test);
|
auto [X_train, X_test, y_train, y_test] = dataset.getTrainTestTensors(train, test);
|
||||||
auto states = dataset.getStates(); // Get the states of the features Once they are discretized
|
auto states = dataset.getStates(); // Get the states of the features Once they are discretized
|
||||||
@@ -321,11 +260,14 @@ namespace platform {
|
|||||||
// Train model
|
// Train model
|
||||||
//
|
//
|
||||||
clf->fit(X_train, y_train, features, className, states, smooth);
|
clf->fit(X_train, y_train, features, className, states, smooth);
|
||||||
|
auto train_time = train_timer.getDuration();
|
||||||
//
|
//
|
||||||
// Test model
|
// Test model
|
||||||
//
|
//
|
||||||
|
test_timer.start();
|
||||||
double score = clf->score(X_test, y_test);
|
double score = clf->score(X_test, y_test);
|
||||||
delete fold;
|
delete fold;
|
||||||
|
auto test_time = test_timer.getDuration();
|
||||||
//
|
//
|
||||||
// Return the result
|
// Return the result
|
||||||
//
|
//
|
||||||
@@ -333,7 +275,8 @@ namespace platform {
|
|||||||
result->idx_combination = 0;
|
result->idx_combination = 0;
|
||||||
result->score = score;
|
result->score = score;
|
||||||
result->n_fold = n_fold;
|
result->n_fold = n_fold;
|
||||||
result->time = timer.getDuration();
|
result->time = test_time;
|
||||||
|
result->time_train = train_time;
|
||||||
result->nodes = clf->getNumberOfNodes();
|
result->nodes = clf->getNumberOfNodes();
|
||||||
result->leaves = clf->getNumberOfEdges();
|
result->leaves = clf->getNumberOfEdges();
|
||||||
result->depth = clf->getNumberOfStates();
|
result->depth = clf->getNumberOfStates();
|
||||||
|
@@ -21,6 +21,8 @@ namespace platform {
|
|||||||
explicit GridExperiment(argparse::ArgumentParser& program, struct ConfigGrid& config);
|
explicit GridExperiment(argparse::ArgumentParser& program, struct ConfigGrid& config);
|
||||||
~GridExperiment() = default;
|
~GridExperiment() = default;
|
||||||
json getResults();
|
json getResults();
|
||||||
|
Experiment& getExperiment() { return experiment; }
|
||||||
|
size_t numFiles() const { return filesToTest.size(); }
|
||||||
private:
|
private:
|
||||||
argparse::ArgumentParser& arguments;
|
argparse::ArgumentParser& arguments;
|
||||||
Experiment experiment;
|
Experiment experiment;
|
||||||
@@ -28,7 +30,6 @@ namespace platform {
|
|||||||
std::vector<std::string> filesToTest;
|
std::vector<std::string> filesToTest;
|
||||||
void save(json& results);
|
void save(json& results);
|
||||||
json initializeResults();
|
json initializeResults();
|
||||||
json build_tasks(Datasets& datasets);
|
|
||||||
std::vector<std::string> filterDatasets(Datasets& datasets) const;
|
std::vector<std::string> filterDatasets(Datasets& datasets) const;
|
||||||
void compile_results(json& results, json& all_results, std::string& model);
|
void compile_results(json& results, json& all_results, std::string& model);
|
||||||
json store_result(std::vector<std::string>& names, Task_Result& result, json& results);
|
json store_result(std::vector<std::string>& names, Task_Result& result, json& results);
|
||||||
|
@@ -19,41 +19,6 @@ namespace platform {
|
|||||||
}
|
}
|
||||||
return json();
|
return json();
|
||||||
}
|
}
|
||||||
json GridSearch::build_tasks(Datasets& datasets)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Each task is a json object with the following structure:
|
|
||||||
* {
|
|
||||||
* "dataset": "dataset_name",
|
|
||||||
* "idx_dataset": idx_dataset, // used to identify the dataset in the results
|
|
||||||
* // this index is relative to the list of used datasets in the actual run not to the whole datasets list
|
|
||||||
* "seed": # of seed to use,
|
|
||||||
* "fold": # of fold to process
|
|
||||||
* }
|
|
||||||
* This way a task consists in process all combinations of hyperparameters for a dataset, seed and fold
|
|
||||||
*/
|
|
||||||
auto tasks = json::array();
|
|
||||||
auto grid = GridData(Paths::grid_input(config.model));
|
|
||||||
auto all_datasets = datasets.getNames();
|
|
||||||
auto datasets_names = filterDatasets(datasets);
|
|
||||||
for (int idx_dataset = 0; idx_dataset < datasets_names.size(); ++idx_dataset) {
|
|
||||||
auto dataset = datasets_names[idx_dataset];
|
|
||||||
for (const auto& seed : config.seeds) {
|
|
||||||
auto combinations = grid.getGrid(dataset);
|
|
||||||
for (int n_fold = 0; n_fold < config.n_folds; n_fold++) {
|
|
||||||
json task = {
|
|
||||||
{ "dataset", dataset },
|
|
||||||
{ "idx_dataset", idx_dataset},
|
|
||||||
{ "seed", seed },
|
|
||||||
{ "fold", n_fold},
|
|
||||||
};
|
|
||||||
tasks.push_back(task);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
shuffle_and_progress_bar(tasks);
|
|
||||||
return tasks;
|
|
||||||
}
|
|
||||||
std::vector<std::string> GridSearch::filterDatasets(Datasets& datasets) const
|
std::vector<std::string> GridSearch::filterDatasets(Datasets& datasets) const
|
||||||
{
|
{
|
||||||
// Load datasets
|
// Load datasets
|
||||||
|
@@ -24,7 +24,6 @@ namespace platform {
|
|||||||
private:
|
private:
|
||||||
void save(json& results);
|
void save(json& results);
|
||||||
json initializeResults();
|
json initializeResults();
|
||||||
json build_tasks(Datasets& datasets);
|
|
||||||
std::vector<std::string> filterDatasets(Datasets& datasets) const;
|
std::vector<std::string> filterDatasets(Datasets& datasets) const;
|
||||||
void compile_results(json& results, json& all_results, std::string& model);
|
void compile_results(json& results, json& all_results, std::string& model);
|
||||||
json store_result(std::vector<std::string>& names, Task_Result& result, json& results);
|
json store_result(std::vector<std::string>& names, Task_Result& result, json& results);
|
||||||
|
@@ -9,6 +9,7 @@ namespace platform {
|
|||||||
|
|
||||||
void Experiment::saveResult()
|
void Experiment::saveResult()
|
||||||
{
|
{
|
||||||
|
result.setSchemaVersion("1.0");
|
||||||
result.check();
|
result.check();
|
||||||
result.save();
|
result.save();
|
||||||
std::cout << "Result saved in " << Paths::results() << result.getFilename() << std::endl;
|
std::cout << "Result saved in " << Paths::results() << result.getFilename() << std::endl;
|
||||||
|
@@ -20,6 +20,7 @@ namespace platform {
|
|||||||
Experiment& setTitle(const std::string& title) { this->result.setTitle(title); return *this; }
|
Experiment& setTitle(const std::string& title) { this->result.setTitle(title); return *this; }
|
||||||
Experiment& setModelVersion(const std::string& model_version) { this->result.setModelVersion(model_version); return *this; }
|
Experiment& setModelVersion(const std::string& model_version) { this->result.setModelVersion(model_version); return *this; }
|
||||||
Experiment& setModel(const std::string& model) { this->result.setModel(model); return *this; }
|
Experiment& setModel(const std::string& model) { this->result.setModel(model); return *this; }
|
||||||
|
std::string getModel() const { return result.getModel(); }
|
||||||
Experiment& setLanguage(const std::string& language) { this->result.setLanguage(language); return *this; }
|
Experiment& setLanguage(const std::string& language) { this->result.setLanguage(language); return *this; }
|
||||||
Experiment& setDiscretizationAlgorithm(const std::string& discretization_algo)
|
Experiment& setDiscretizationAlgorithm(const std::string& discretization_algo)
|
||||||
{
|
{
|
||||||
|
@@ -257,8 +257,9 @@ namespace platform {
|
|||||||
auto [index_from, index_to] = paginator[static_cast<int>(output_type)].getOffset();
|
auto [index_from, index_to] = paginator[static_cast<int>(output_type)].getOffset();
|
||||||
for (int i = index_from; i <= index_to; i++) {
|
for (int i = index_from; i <= index_to; i++) {
|
||||||
auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN();
|
auto color = (i % 2) ? Colors::BLUE() : Colors::CYAN();
|
||||||
std::cout << color << std::setw(3) << std::fixed << std::right << i << " ";
|
auto color_status = results.at(i).check().size() == 0 ? color : Colors::RED();
|
||||||
std::cout << results.at(i).to_string(maxModel, maxTitle) << std::endl;
|
std::cout << color_status << std::setw(3) << std::fixed << std::right << i << " ";
|
||||||
|
std::cout << color << results.at(i).to_string(maxModel, maxTitle) << std::endl;
|
||||||
}
|
}
|
||||||
//
|
//
|
||||||
// Status Area
|
// Status Area
|
||||||
|
@@ -49,7 +49,8 @@ namespace platform {
|
|||||||
oss << "Execution took " << timer.translate2String(data["duration"].get<float>())
|
oss << "Execution took " << timer.translate2String(data["duration"].get<float>())
|
||||||
<< " on " << data["platform"].get<std::string>() << " Language: " << data["language"].get<std::string>();
|
<< " on " << data["platform"].get<std::string>() << " Language: " << data["language"].get<std::string>();
|
||||||
sheader << headerLine(oss.str());
|
sheader << headerLine(oss.str());
|
||||||
sheader << headerLine("Score is " + data["score_name"].get<std::string>());
|
std::string schema_version = data.find("schema_version") != data.end() ? data["schema_version"].get<std::string>() : "-";
|
||||||
|
sheader << headerLine("Score is " + data["score_name"].get<std::string>() + " Schema version: " + schema_version);
|
||||||
sheader << std::string(MAXL, '*') << std::endl;
|
sheader << std::string(MAXL, '*') << std::endl;
|
||||||
sheader << std::endl;
|
sheader << std::endl;
|
||||||
}
|
}
|
||||||
@@ -250,7 +251,7 @@ namespace platform {
|
|||||||
if (train_data) {
|
if (train_data) {
|
||||||
oss << color_line << std::left << std::setw(maxLine) << output_train[i]
|
oss << color_line << std::left << std::setw(maxLine) << output_train[i]
|
||||||
<< suffix << Colors::BLUE() << " | " << color_line << std::left << std::setw(maxLine)
|
<< suffix << Colors::BLUE() << " | " << color_line << std::left << std::setw(maxLine)
|
||||||
<< output_test[i] << std::endl;
|
<< output_test[i] << std::endl;
|
||||||
} else {
|
} else {
|
||||||
oss << color_line << output_test[i] << std::endl;
|
oss << color_line << output_test[i] << std::endl;
|
||||||
}
|
}
|
||||||
|
@@ -64,18 +64,10 @@ namespace platform {
|
|||||||
{
|
{
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
void Result::check()
|
std::vector<std::string> Result::check()
|
||||||
{
|
{
|
||||||
platform::JsonValidator validator(platform::SchemaV1_0::schema);
|
platform::JsonValidator validator(platform::SchemaV1_0::schema);
|
||||||
data["schema_version"] = "1.0";
|
return validator.validate(data);
|
||||||
std::vector<std::string> errors = validator.validate(data);
|
|
||||||
if (!errors.empty()) {
|
|
||||||
std::string message;
|
|
||||||
for (const auto& error : errors) {
|
|
||||||
message += " - " + error + "\n";
|
|
||||||
}
|
|
||||||
throw std::runtime_error("* Result file has validation errors:\n" + message);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
void Result::save()
|
void Result::save()
|
||||||
{
|
{
|
||||||
|
@@ -16,7 +16,7 @@ namespace platform {
|
|||||||
Result();
|
Result();
|
||||||
Result& load(const std::string& path, const std::string& filename);
|
Result& load(const std::string& path, const std::string& filename);
|
||||||
void save();
|
void save();
|
||||||
void check();
|
std::vector<std::string> check();
|
||||||
// Getters
|
// Getters
|
||||||
json getJson();
|
json getJson();
|
||||||
std::string to_string(int maxModel, int maxTitle) const;
|
std::string to_string(int maxModel, int maxTitle) const;
|
||||||
@@ -29,7 +29,7 @@ namespace platform {
|
|||||||
std::string getModel() const { return data["model"].get<std::string>(); };
|
std::string getModel() const { return data["model"].get<std::string>(); };
|
||||||
std::string getPlatform() const { return data["platform"].get<std::string>(); };
|
std::string getPlatform() const { return data["platform"].get<std::string>(); };
|
||||||
std::string getScoreName() const { return data["score_name"].get<std::string>(); };
|
std::string getScoreName() const { return data["score_name"].get<std::string>(); };
|
||||||
|
void setSchemaVersion(const std::string& version) { data["schema_version"] = version; };
|
||||||
bool isComplete() const { return complete; };
|
bool isComplete() const { return complete; };
|
||||||
json getData() const { return data; }
|
json getData() const { return data; }
|
||||||
// Setters
|
// Setters
|
||||||
|
Reference in New Issue
Block a user