#include #include #include #include #include "main/Models.h" #include "common/Paths.h" #include "common/Utils.h" #include "GridExperiment.h" namespace platform { // GridExperiment::GridExperiment(argparse::ArgumentParser& program, struct ConfigGrid& config) : arguments(program), GridBase(config) GridExperiment::GridExperiment(ArgumentsExperiment& program, struct ConfigGrid& config) : arguments(program), GridBase(config) { experiment = arguments.initializedExperiment(); filesToTest = arguments.getFilesToTest(); saveResults = arguments.haveToSaveResults(); this->config.model = experiment.getModel(); this->config.score = experiment.getScore(); this->config.discretize = experiment.isDiscretized(); this->config.stratified = experiment.isStratified(); this->config.smooth_strategy = experiment.getSmoothStrategy(); this->config.n_folds = experiment.getNFolds(); this->config.seeds = experiment.getRandomSeeds(); this->config.quiet = experiment.isQuiet(); } json GridExperiment::getResults() { return computed_results; } std::vector GridExperiment::filterDatasets(Datasets& datasets) const { return filesToTest; } json GridExperiment::initializeResults() { json results; return results; } void GridExperiment::save(json& results) { } void GridExperiment::compile_results(json& results, json& all_results, std::string& model) { auto datasets = Datasets(false, Paths::datasets()); nlohmann::json temp = all_results; // To restore the order of the data by dataset name all_results = temp; for (const auto& result_item : all_results.items()) { // each result has the results of all the outer folds as each one were a different task auto dataset_name = result_item.key(); auto data = result_item.value(); auto result = json::object(); int data_size = data.size(); auto score = torch::zeros({ data_size }, torch::kFloat64); auto score_train = torch::zeros({ data_size }, torch::kFloat64); auto time_test = torch::zeros({ data_size }, torch::kFloat64); auto time_train = torch::zeros({ data_size }, torch::kFloat64); auto nodes = torch::zeros({ data_size }, torch::kFloat64); auto leaves = torch::zeros({ data_size }, torch::kFloat64); auto depth = torch::zeros({ data_size }, torch::kFloat64); auto& dataset = datasets.getDataset(dataset_name); dataset.load(); // // Prepare Result // auto partial_result = PartialResult(); partial_result.setSamples(dataset.getNSamples()).setFeatures(dataset.getNFeatures()).setClasses(dataset.getNClasses()); partial_result.setHyperparameters(experiment.getHyperParameters().get(dataset_name)); for (int fold = 0; fold < data_size; ++fold) { partial_result.addScoreTest(data[fold]["score"]); partial_result.addScoreTrain(0.0); partial_result.addTimeTest(data[fold]["time"]); partial_result.addTimeTrain(data[fold]["time_train"]); score[fold] = data[fold]["score"].get(); time_test[fold] = data[fold]["time"].get(); time_train[fold] = data[fold]["time_train"].get(); nodes[fold] = data[fold]["nodes"].get(); leaves[fold] = data[fold]["leaves"].get(); depth[fold] = data[fold]["depth"].get(); } partial_result.setGraph(std::vector()); partial_result.setScoreTest(torch::mean(score).item()).setScoreTrain(0.0); partial_result.setScoreTestStd(torch::std(score).item()).setScoreTrainStd(0.0); partial_result.setTrainTime(torch::mean(time_train).item()).setTestTime(torch::mean(time_test).item()); partial_result.setTrainTimeStd(torch::std(time_train).item()).setTestTimeStd(torch::std(time_test).item()); partial_result.setNodes(torch::mean(nodes).item()).setLeaves(torch::mean(leaves).item()).setDepth(torch::mean(depth).item()); partial_result.setDataset(dataset_name).setNotes(std::vector()); partial_result.setConfusionMatrices(json::array()); experiment.addResult(partial_result); } auto clf = Models::instance()->create(experiment.getModel()); experiment.setModelVersion(clf->getVersion()); computed_results = results; } json GridExperiment::store_result(std::vector& names, Task_Result& result, json& results) { json json_result = { { "score", result.score }, { "combination", result.idx_combination }, { "fold", result.n_fold }, { "time", result.time }, { "time_train", result.time_train }, { "dataset", result.idx_dataset }, { "nodes", result.nodes }, { "leaves", result.leaves }, { "depth", result.depth }, { "process", result.process }, { "task", result.task } }; auto name = names[result.idx_dataset]; if (!results.contains(name)) { results[name] = json::array(); } results[name].push_back(json_result); return results; } void GridExperiment::consumer_go(struct ConfigGrid& config, struct ConfigMPI& config_mpi, json& tasks, int n_task, Datasets& datasets, Task_Result* result) { // // initialize // Timer train_timer, test_timer; json task = tasks[n_task]; auto model = config.model; auto dataset_name = task["dataset"].get(); auto idx_dataset = task["idx_dataset"].get(); auto seed = task["seed"].get(); auto n_fold = task["fold"].get(); bool stratified = config.stratified; bayesnet::Smoothing_t smooth; if (config.smooth_strategy == "ORIGINAL") smooth = bayesnet::Smoothing_t::ORIGINAL; else if (config.smooth_strategy == "LAPLACE") smooth = bayesnet::Smoothing_t::LAPLACE; else if (config.smooth_strategy == "CESTNIK") smooth = bayesnet::Smoothing_t::CESTNIK; // // Generate the hyperparameters combinations // auto& dataset = datasets.getDataset(dataset_name); dataset.load(); auto [X, y] = dataset.getTensors(); auto features = dataset.getFeatures(); auto className = dataset.getClassName(); // // Start working on task // folding::Fold* fold; if (stratified) fold = new folding::StratifiedKFold(config.n_folds, y, seed); else fold = new folding::KFold(config.n_folds, y.size(0), seed); train_timer.start(); auto [train, test] = fold->getFold(n_fold); auto [X_train, X_test, y_train, y_test] = dataset.getTrainTestTensors(train, test); auto states = dataset.getStates(); // Get the states of the features Once they are discretized // // Build Classifier with selected hyperparameters // auto clf = Models::instance()->create(config.model); auto valid = clf->getValidHyperparameters(); auto hyperparameters = experiment.getHyperParameters(); hyperparameters.check(valid, dataset_name); clf->setHyperparameters(hyperparameters.get(dataset_name)); // // Train model // clf->fit(X_train, y_train, features, className, states, smooth); auto train_time = train_timer.getDuration(); // // Test model // test_timer.start(); double score = clf->score(X_test, y_test); delete fold; auto test_time = test_timer.getDuration(); // // Return the result // result->idx_dataset = task["idx_dataset"].get(); result->idx_combination = 0; result->score = score; result->n_fold = n_fold; result->time = test_time; result->time_train = train_time; result->nodes = clf->getNumberOfNodes(); result->leaves = clf->getNumberOfEdges(); result->depth = clf->getNumberOfStates(); result->process = config_mpi.rank; result->task = n_task; // // Update progress bar // std::cout << get_color_rank(config_mpi.rank) << std::flush; } } /* namespace platform */