Rename config.h to config_platform.h

Add mdlp as dependency
Remove lib/mdlp folder
2024-12-13 19:57:05 +01:00 · 2024-12-13 10:28:27 +01:00 · 2024-12-13 10:11:45 +01:00 · 2024-12-13 09:55:37 +01:00 · 2024-12-13 09:49:46 +01:00 · 2024-12-11 23:18:20 +01:00
31 changed files with 67 additions and 54 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,13 +10,12 @@
 [submodule "lib/libxlsxwriter"]
 	path = lib/libxlsxwriter
 	url = https://github.com/jmcnamara/libxlsxwriter.git
-[submodule "lib/mdlp"]
-	path = lib/mdlp
-	url = https://github.com/rmontanana/mdlp
-        update = merge
 [submodule "lib/folding"]
 	path = lib/folding
 	url = https://github.com/rmontanana/folding
 [submodule "lib/Files"]
 	path = lib/Files
 	url = https://github.com/rmontanana/ArffFiles
+[submodule "lib/mdlp"]
+	path = lib/mdlp
+	url = https://github.com/rmontanana/mdlp
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -90,7 +90,7 @@ cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
 configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
 add_subdirectory(config)
 add_subdirectory(src)
-add_subdirectory(sample)
+# add_subdirectory(sample)
 file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${Platform_SOURCE_DIR}/src/*.cpp)

 # Testing
--- a/README.md
+++ b/README.md
@@ -24,7 +24,14 @@ The solution is to erase the libstdc++ library from the miniconda installation a

 ### MPI

-In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
+In Linux just install openmpi & openmpi-devel packages.
+
+```bash
+source /etc/profile.d/modules.sh
+module load mpi/openmpi-x86_64
+```
+
+If cmake can't find openmpi installation (like in Oracle Linux) set the following variable:

 ```bash
 export MPI_HOME="/usr/lib64/openmpi"
--- a/config/CMakeLists.txt
+++ b/config/CMakeLists.txt
@@ -1,4 +1,4 @@
 configure_file(
  "config.h.in"
-  "${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES
+  "${CMAKE_BINARY_DIR}/configured_files/include/config_platform.h" ESCAPE_QUOTES
 )
--- a/5
+++ b/5
@@ -1,8 +1,3 @@
-[submodule "lib/mdlp"]
-	path = lib/mdlp
-	url = https://github.com/rmontanana/mdlp
-	main = main
-	update = merge
 [submodule "lib/catch2"]
 	path = lib/catch2
        main  = v2.x
--- a/lib/Files
+++ b/lib/Files
--- a/lib/argparse
+++ b/lib/argparse
--- a/lib/catch2
+++ b/lib/catch2
--- a/lib/json
+++ b/lib/json
--- a/lib/libxlsxwriter
+++ b/lib/libxlsxwriter
--- a/lib/mdlp
+++ b/lib/mdlp
--- a/sample/CMakeLists.txt
+++ b/sample/CMakeLists.txt
@@ -12,4 +12,4 @@ include_directories(
    ${Bayesnet_INCLUDE_DIRS}
 )
 add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp) 
-target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
+target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
--- a/sample/sample.cpp
+++ b/sample/sample.cpp
@@ -6,12 +6,12 @@
 #include <argparse/argparse.hpp>
 #include <nlohmann/json.hpp>
 #include <ArffFiles.hpp>
-#include <CPPFImdlp.h>
+#include <fimdlp/CPPFImdlp.h>
 #include <folding.hpp>
 #include <bayesnet/utils/BayesMetrics.h>
 #include "Models.h"
 #include "modelRegister.h"
-#include "config.h"
+#include "config_platform.h"

 const std::string PATH = { platform_data_path.begin(), platform_data_path.end() };

--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -26,7 +26,7 @@ add_executable(
    reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
    results/Result.cpp
 )
-target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
+target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")

 # b_grid
 set(grid_sources GridSearch.cpp GridData.cpp)
@@ -35,7 +35,7 @@ add_executable(b_grid commands/b_grid.cpp ${grid_sources}
    common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
    main/HyperParameters.cpp main/Models.cpp 
 )
-target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
+target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)

 # b_list
 add_executable(b_list commands/b_list.cpp
@@ -44,7 +44,7 @@ add_executable(b_list commands/b_list.cpp
    reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
    results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
 )
-target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
+target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")

 # b_main
 set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
@@ -54,7 +54,7 @@ add_executable(b_main commands/b_main.cpp ${main_sources}
    reports/ReportConsole.cpp reports/ReportBase.cpp 
    results/Result.cpp
 )
-target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
+target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)

 # b_manage
 set(manage_sources ManageScreen.cpp OptionsMenu.cpp ResultsManager.cpp)
@@ -66,4 +66,4 @@ add_executable(
    results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
    main/Scores.cpp
 )
-target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" mdlp "${BayesNet}")
+target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" fimdlp "${BayesNet}")
--- a/src/commands/b_best.cpp
+++ b/src/commands/b_best.cpp
@@ -5,7 +5,7 @@
 #include "common/Paths.h"
 #include "common/Colors.h"
 #include "best/BestResults.h"
-#include "config.h"
+#include "config_platform.h"

 void manageArguments(argparse::ArgumentParser& program)
 {
--- a/src/commands/b_grid.cpp
+++ b/src/commands/b_grid.cpp
@@ -11,7 +11,7 @@
 #include "common/Colors.h"
 #include "common/DotEnv.h"
 #include "grid/GridSearch.h"
-#include "config.h"
+#include "config_platform.h"

 using json = nlohmann::ordered_json;
 const int MAXL = 133;
@@ -93,8 +93,10 @@ void list_dump(std::string& model)
        if (item.first.size() > max_dataset) {
            max_dataset = item.first.size();
        }
-        if (item.second.dump().size() > max_hyper) {
-            max_hyper = item.second.dump().size();
+        for (auto const& [key, value] : item.second.items()) {
+            if (value.dump().size() > max_hyper) {
+                max_hyper = value.dump().size();
+            }
        }
    }
    std::cout << Colors::GREEN() << left << " #  " << left << setw(max_dataset) << "Dataset" << " #Com. "
@@ -106,7 +108,12 @@ void list_dump(std::string& model)
        std::cout << color;
        auto num_combinations = data.getNumCombinations(item.first);
        std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first
-            << " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << left << item.second.dump() << std::endl;
+            << " " << setw(5) << right << num_combinations << " ";
+        std::string prefix = "";
+        for (auto const& [key, value] : item.second.items()) {
+            std::cout << prefix << setw(max_hyper) << std::left << value.dump() << std::endl;
+            prefix = string(11 + max_dataset, ' ');
+        }
    }
    std::cout << Colors::RESET() << std::endl;
 }
--- a/src/commands/b_list.cpp
+++ b/src/commands/b_list.cpp
@@ -13,7 +13,7 @@
 #include "results/ResultsDatasetConsole.h"
 #include "results/ResultsDataset.h"
 #include "results/ResultsDatasetExcel.h"
-#include "config.h"
+#include "config_platform.h"


 void list_datasets(argparse::ArgumentParser& program)
--- a/src/commands/b_main.cpp
+++ b/src/commands/b_main.cpp
@@ -7,7 +7,7 @@
 #include "common/Paths.h"
 #include "main/Models.h"
 #include "main/modelRegister.h"
-#include "config.h"
+#include "config_platform.h"


 using json = nlohmann::ordered_json;
--- a/src/commands/b_manage.cpp
+++ b/src/commands/b_manage.cpp
@@ -5,7 +5,7 @@
 #include <argparse/argparse.hpp>
 #include "manage/ManageScreen.h"
 #include <signal.h>
-#include "config.h"
+#include "config_platform.h"

 platform::ManageScreen* manager = nullptr;

--- a/src/common/Datasets.cpp
+++ b/src/common/Datasets.cpp
@@ -8,7 +8,7 @@ namespace platform {
    Datasets::Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm) :
        discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
    {
-        if (discretizer_algorithm == "none" && discretize) {
+        if ((discretizer_algorithm == "none" || discretizer_algorithm == "") && discretize) {
            throw std::runtime_error("Can't discretize without discretization algorithm");
        }
        load();
--- a/src/common/Discretization.h
+++ b/src/common/Discretization.h
@@ -5,9 +5,9 @@
 #include <string>
 #include <functional>
 #include <vector>
-#include <Discretizer.h>
-#include <BinDisc.h>
-#include <CPPFImdlp.h>
+#include <fimdlp/Discretizer.h>
+#include <fimdlp/BinDisc.h>
+#include <fimdlp/CPPFImdlp.h>
 namespace platform {
    class Discretization {
    public:
--- a/src/grid/GridSearch.cpp
+++ b/src/grid/GridSearch.cpp
@@ -108,6 +108,7 @@ namespace platform {
        // Generate the hyperparamters combinations
        auto& dataset = datasets.getDataset(dataset_name);
        auto combinations = grid.getGrid(dataset_name);
+        dataset.load();
        auto [X, y] = dataset.getTensors();
        auto features = dataset.getFeatures();
        auto className = dataset.getClassName();
@@ -353,7 +354,8 @@ namespace platform {
        tasks = json::parse(msg);
        delete[] msg;
        auto env = platform::DotEnv();
-        auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretiz_algo"));
+        auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretize_algo"));
+
        if (config_mpi.rank == config_mpi.manager) {
            //
            // 2a. Producer delivers the tasks to the consumers
--- a/src/main/Experiment.cpp
+++ b/src/main/Experiment.cpp
@@ -58,8 +58,8 @@ namespace platform {
            std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " )  Scoring train dataset" << std::endl;
            std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " )  Scoring test dataset" << std::endl << std::endl;
            std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl;
-            std::cout << Colors::GREEN() << left << "  #  " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << std::endl;
-            std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << Colors::RESET() << std::endl;
+            std::cout << Colors::GREEN() << left << "  #  " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << string(3 * nfolds - 2, ' ') << " Time" << std::endl;
+            std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << " ----------" << Colors::RESET() << std::endl;
        }
        int num = 0;
        for (auto fileName : filesToProcess) {
@@ -176,7 +176,7 @@ namespace platform {
        json confusion_matrices_train = json::array();
        std::vector<std::string> notes;
        std::vector<std::string> graphs;
-        Timer train_timer, test_timer;
+        Timer train_timer, test_timer, seed_timer;
        int item = 0;
        bool first_seed = true;
        //
@@ -184,6 +184,7 @@ namespace platform {
        //
        auto score = parse_score();
        for (auto seed : randomSeeds) {
+            seed_timer.start();
            if (!quiet) {
                string prefix = " ";
                if (!first_seed) {
@@ -274,8 +275,10 @@ namespace platform {
                    graphs.push_back(result);
                }
            }
-            if (!quiet)
-                std::cout << "end. " << flush;
+            if (!quiet) {
+                seed_timer.stop();
+                std::cout << "end. [" << seed_timer.getDurationString() << "]" << std::endl;
+            }
            delete fold;
        }
        //
--- a/src/manage/ManageScreen.cpp
+++ b/src/manage/ManageScreen.cpp
@@ -12,7 +12,7 @@
 #include "reports/ReportExcel.h"
 #include "reports/ReportExcelCompared.h"
 #include <bayesnet/classifiers/TAN.h>
-#include "CPPFImdlp.h"
+#include <fimdlp/CPPFImdlp.h>

 namespace platform {
    const std::string STATUS_OK = "Ok.";
--- a/src/reports/ReportConsole.cpp
+++ b/src/reports/ReportConsole.cpp
@@ -24,8 +24,8 @@ namespace platform {
            + " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
        );
        sheader << headerLine(data["title"].get<std::string>());
-        std::string discretiz_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL";
-        std::string algorithm = data["discretized"].get<bool>() ? " (" + discretiz_algo + ")" : "";
+        std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL";
+        std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
        std::string smooth = data.find("smooth_strategy") != data.end() ? data["smooth_strategy"].get<std::string>() : "ORIGINAL";
        std::string stratified;
        try {
--- a/src/reports/ReportExcel.cpp
+++ b/src/reports/ReportExcel.cpp
@@ -68,8 +68,8 @@ namespace platform {
        worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
        oss.str("");
        oss.clear();
-        std::string discretiz_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp";
-        std::string algorithm = data["discretized"].get<bool>() ? " (" + discretiz_algo + ")" : "";
+        std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp";
+        std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
        oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False") << algorithm;
        worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
    }
--- a/tests/TestPlatform.cpp
+++ b/tests/TestPlatform.cpp
@@ -9,7 +9,7 @@
 #include "folding.hpp"
 #include <ArffFiles.hpp>
 #include <bayesnet/classifiers/TAN.h>
-#include "config.h"
+#include "config_platform.h"


 TEST_CASE("Test Platform version", "[Platform]")
--- a/tests/TestResult.cpp
+++ b/tests/TestResult.cpp
@@ -7,7 +7,7 @@
 #include "common/DotEnv.h"
 #include "common/Datasets.h"
 #include "common/Paths.h"
-#include "config.h"
+#include "config_platform.h"


 TEST_CASE("ZeroR comparison in reports", "[Report]")
--- a/tests/TestScores.cpp
+++ b/tests/TestScores.cpp
@@ -9,7 +9,7 @@
 #include "common/Paths.h"
 #include "common/Colors.h"
 #include "main/Scores.h"
-#include "config.h"
+#include "config_platform.h"

 using json = nlohmann::ordered_json;
 auto epsilon = 1e-4;
--- a/tests/TestUtils.cpp
+++ b/tests/TestUtils.cpp
@@ -1,5 +1,5 @@
 #include "TestUtils.h"
-#include "config.h"
+#include "config_platform.h"

 class Paths {
 public:
--- a/tests/TestUtils.h
+++ b/tests/TestUtils.h
@@ -6,7 +6,7 @@
 #include <map>
 #include <tuple>
 #include <ArffFiles.hpp>
-#include "CPPFImdlp.h"
+#include <fimdlp/CPPFImdlp.h>

 bool file_exists(const std::string& name);
 std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);
Author	SHA1	Message	Date
Ricardo Montañana Gómez	ba455bb934	Rename config.h to config_platform.h	2024-12-13 19:57:05 +01:00
Ricardo Montañana Gómez	a65955248a	Add mdlp as dependency	2024-12-13 10:28:27 +01:00
Ricardo Montañana Gómez	84930b0537	Remove lib/mdlp folder	2024-12-13 10:11:45 +01:00
Ricardo Montañana Gómez	10c65f44a0	Add mdlp library dependency	2024-12-13 09:55:37 +01:00
Ricardo Montañana Gómez	6d112f01e7	Remove external library dependency	2024-12-13 09:49:46 +01:00
Ricardo Montañana	401296293b	Add header to b_main time	2024-12-11 23:18:20 +01:00
Ricardo Montañana Gómez	9566ae4cf6	Fix gridsearch discretize_algo mistake	2024-12-11 12:45:16 +01:00
Ricardo Montañana Gómez	55187ee521	Add time to experiment seed	2024-12-11 10:05:24 +01:00
Ricardo Montañana Gómez	68ea06d129	Fix fimdlp library includes	2024-11-20 21:19:35 +01:00
Ricardo Montañana Gómez	6c1d1d0d32	Remove mdlp files	2024-11-20 21:14:42 +01:00
Ricardo Montañana Gómez	b0853d169b	Remove mdlp submodule	2024-11-20 21:14:19 +01:00