diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 62a7d9a..b18256e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,7 +29,7 @@ add_executable( target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}") # b_grid -set(grid_sources GridSearch.cpp GridData.cpp GridExperiment.cpp) +set(grid_sources GridSearch.cpp GridData.cpp GridExperiment.cpp GridBase.cpp) list(TRANSFORM grid_sources PREPEND grid/) add_executable(b_grid commands/b_grid.cpp ${grid_sources} common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp diff --git a/src/grid/GridBase.cpp b/src/grid/GridBase.cpp new file mode 100644 index 0000000..f64b45c --- /dev/null +++ b/src/grid/GridBase.cpp @@ -0,0 +1,22 @@ +#include "common/DotEnv.h" +#include "common/Paths.h" +#include "GridBase.h" + +namespace platform { + + GridBase::GridBase(struct ConfigGrid& config) + { + this->config = config; + if (config.smooth_strategy == "ORIGINAL") + smooth_type = bayesnet::Smoothing_t::ORIGINAL; + else if (config.smooth_strategy == "LAPLACE") + smooth_type = bayesnet::Smoothing_t::LAPLACE; + else if (config.smooth_strategy == "CESTNIK") + smooth_type = bayesnet::Smoothing_t::CESTNIK; + else { + std::cerr << "GridBase: Unknown smoothing strategy: " << config.smooth_strategy << std::endl; + exit(1); + } + } + +} \ No newline at end of file diff --git a/src/grid/GridBase.h b/src/grid/GridBase.h index 66343a0..70f519a 100644 --- a/src/grid/GridBase.h +++ b/src/grid/GridBase.h @@ -6,6 +6,7 @@ #include #include "common/Datasets.h" #include "common/Timer.h" +#include "common/Colors.h" #include "main/HyperParameters.h" #include "GridData.h" #include "GridConfig.h" @@ -16,24 +17,11 @@ namespace platform { using json = nlohmann::ordered_json; class GridBase { public: - explicit GridBase(struct ConfigGrid& config) - { - this->config = config; - if (config.smooth_strategy == "ORIGINAL") - smooth_type = bayesnet::Smoothing_t::ORIGINAL; - else if (config.smooth_strategy == "LAPLACE") - smooth_type = bayesnet::Smoothing_t::LAPLACE; - else if (config.smooth_strategy == "CESTNIK") - smooth_type = bayesnet::Smoothing_t::CESTNIK; - else { - std::cerr << "GridBase: Unknown smoothing strategy: " << config.smooth_strategy << std::endl; - exit(1); - } - }; + explicit GridBase(struct ConfigGrid& config); ~GridBase() = default; - virtual void go(struct ConfigMPI& config_mpi) = 0; protected: virtual json build_tasks() = 0; + virtual void save(json& results) = 0; struct ConfigGrid config; Timer timer; // used to measure the time of the whole process const std::string separator = "|"; diff --git a/src/grid/GridExperiment.cpp b/src/grid/GridExperiment.cpp index 8baec34..180ae71 100644 --- a/src/grid/GridExperiment.cpp +++ b/src/grid/GridExperiment.cpp @@ -23,6 +23,16 @@ namespace platform { } json GridExperiment::build_tasks() { + /* + * Each task is a json object with the following structure: + * { + * "dataset": "dataset_name", + * "idx_dataset": idx_dataset, // used to identify the dataset in the results + * // this index is relative to the list of used datasets in the actual run not to the whole datasets list + * "seed": # of seed to use, + * "fold": # of fold to process + * } + */ auto tasks = json::array(); auto grid = GridData(Paths::grid_input(config.model)); auto datasets = Datasets(false, Paths::datasets()); @@ -57,104 +67,6 @@ namespace platform { std::cout << separator << std::endl << separator << std::flush; return tasks; } - void GridExperiment::go(struct ConfigMPI& config_mpi) - { - /* - * Each task is a json object with the following structure: - * { - * "dataset": "dataset_name", - * "idx_dataset": idx_dataset, // used to identify the dataset in the results - * // this index is relative to the list of used datasets in the actual run not to the whole datasets list - * "seed": # of seed to use, - * "fold": # of fold to process - * } - * - * This way a task consists in process all combinations of hyperparameters for a dataset, seed and fold - * - * The overall process consists in these steps: - * 0. Create the MPI result type & tasks - * 0.1 Create the MPI result type - * 0.2 Manager creates the tasks - * 1. Manager will broadcast the tasks to all the processes - * 1.1 Broadcast the number of tasks - * 1.2 Broadcast the length of the following string - * 1.2 Broadcast the tasks as a char* string - * 2a. Producer delivers the tasks to the consumers - * 2a.1 Producer will loop to send all the tasks to the consumers and receive the results - * 2a.2 Producer will send the end message to all the consumers - * 2b. Consumers process the tasks and send the results to the producer - * 2b.1 Consumers announce to the producer that they are ready to receive a task - * 2b.2 Consumers receive the task from the producer and process it - * 2b.3 Consumers send the result to the producer - * 3. Manager select the bests scores for each dataset - * 3.1 Loop thru all the results obtained from each outer fold (task) and select the best - * 3.2 Save the results - */ - // - // 0.1 Create the MPI result type - // - Task_Result result; - int tasks_size; - MPI_Datatype MPI_Result; - MPI_Datatype type[5] = { MPI_UNSIGNED, MPI_UNSIGNED, MPI_INT, MPI_DOUBLE, MPI_DOUBLE }; - int blocklen[5] = { 1, 1, 1, 1, 1 }; - MPI_Aint disp[5]; - disp[0] = offsetof(Task_Result, idx_dataset); - disp[1] = offsetof(Task_Result, idx_combination); - disp[2] = offsetof(Task_Result, n_fold); - disp[3] = offsetof(Task_Result, score); - disp[4] = offsetof(Task_Result, time); - MPI_Type_create_struct(5, blocklen, disp, type, &MPI_Result); - MPI_Type_commit(&MPI_Result); - // - // 0.2 Manager creates the tasks - // - char* msg; - json tasks; - if (config_mpi.rank == config_mpi.manager) { - timer.start(); - tasks = build_tasks(); - auto tasks_str = tasks.dump(); - tasks_size = tasks_str.size(); - msg = new char[tasks_size + 1]; - strcpy(msg, tasks_str.c_str()); - } - // - // 1. Manager will broadcast the tasks to all the processes - // - MPI_Bcast(&tasks_size, 1, MPI_INT, config_mpi.manager, MPI_COMM_WORLD); - if (config_mpi.rank != config_mpi.manager) { - msg = new char[tasks_size + 1]; - } - MPI_Bcast(msg, tasks_size + 1, MPI_CHAR, config_mpi.manager, MPI_COMM_WORLD); - tasks = json::parse(msg); - delete[] msg; - auto env = platform::DotEnv(); - auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretize_algo")); - - if (config_mpi.rank == config_mpi.manager) { - // - // 2a. Producer delivers the tasks to the consumers - // - auto datasets_names = std::vector(); - json all_results = MPI_EXPERIMENT::producer(datasets_names, tasks, config_mpi, MPI_Result); - std::cout << separator << std::endl; - // - // 3. Manager select the bests sccores for each dataset - // - auto results = initializeResults(); - //select_best_results_folds(results, all_results, config.model); - // - // 3.2 Save the results - // - save(results); - } else { - // - // 2b. Consumers prostore_search_resultcess the tasks and send the results to the producer - // - MPI_EXPERIMENT::consumer(datasets, tasks, config, config_mpi, MPI_Result); - } - } json GridExperiment::initializeResults() { // Load previous results if continue is set diff --git a/src/grid/GridExperiment.h b/src/grid/GridExperiment.h index 6a3b7bb..556152b 100644 --- a/src/grid/GridExperiment.h +++ b/src/grid/GridExperiment.h @@ -5,7 +5,6 @@ #include #include #include "common/Datasets.h" -#include "common/Timer.h" #include "main/HyperParameters.h" #include "GridData.h" #include "GridBase.h" @@ -17,9 +16,9 @@ namespace platform { class GridExperiment : public GridBase { public: explicit GridExperiment(struct ConfigGrid& config); - void go(struct ConfigMPI& config_mpi); ~GridExperiment() = default; json loadResults(); + void go(struct ConfigMPI& config_mpi); private: void save(json& results); json initializeResults(); @@ -27,7 +26,7 @@ namespace platform { }; /* ************************************************************************************************************* // - // MPI Search Functions + // MPI Experiment Functions // ************************************************************************************************************* */ class MPI_EXPERIMENT :public MPI_Base { diff --git a/src/grid/GridSearch.cpp b/src/grid/GridSearch.cpp index 9bb6574..e6b7b74 100644 --- a/src/grid/GridSearch.cpp +++ b/src/grid/GridSearch.cpp @@ -4,7 +4,6 @@ #include #include "main/Models.h" #include "common/Paths.h" -#include "common/Colors.h" #include "common/Utils.h" #include "GridSearch.h" @@ -55,6 +54,16 @@ namespace platform { } json GridSearch::build_tasks() { + /* + * Each task is a json object with the following structure: + * { + * "dataset": "dataset_name", + * "idx_dataset": idx_dataset, // used to identify the dataset in the results + * // this index is relative to the list of used datasets in the actual run not to the whole datasets list + * "seed": # of seed to use, + * "fold": # of fold to process + * } + */ auto tasks = json::array(); auto grid = GridData(Paths::grid_input(config.model)); auto datasets = Datasets(false, Paths::datasets()); diff --git a/src/grid/GridSearch.h b/src/grid/GridSearch.h index a30d148..6593ef9 100644 --- a/src/grid/GridSearch.h +++ b/src/grid/GridSearch.h @@ -18,10 +18,10 @@ namespace platform { class GridSearch : public GridBase { public: explicit GridSearch(struct ConfigGrid& config); - void go(struct ConfigMPI& config_mpi); ~GridSearch() = default; json loadResults(); static inline std::string NO_CONTINUE() { return "NO_CONTINUE"; } + void go(struct ConfigMPI& config_mpi); private: void save(json& results); json initializeResults();