Compare commits

11 Commits

31 changed files with 67 additions and 54 deletions

7
.gitmodules vendored
View File

@@ -10,13 +10,12 @@
[submodule "lib/libxlsxwriter"] [submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git url = https://github.com/jmcnamara/libxlsxwriter.git
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
update = merge
[submodule "lib/folding"] [submodule "lib/folding"]
path = lib/folding path = lib/folding
url = https://github.com/rmontanana/folding url = https://github.com/rmontanana/folding
[submodule "lib/Files"] [submodule "lib/Files"]
path = lib/Files path = lib/Files
url = https://github.com/rmontanana/ArffFiles url = https://github.com/rmontanana/ArffFiles
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp

View File

@@ -90,7 +90,7 @@ cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h") configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
add_subdirectory(config) add_subdirectory(config)
add_subdirectory(src) add_subdirectory(src)
add_subdirectory(sample) # add_subdirectory(sample)
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${Platform_SOURCE_DIR}/src/*.cpp) file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${Platform_SOURCE_DIR}/src/*.cpp)
# Testing # Testing

View File

@@ -24,7 +24,14 @@ The solution is to erase the libstdc++ library from the miniconda installation a
### MPI ### MPI
In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable: In Linux just install openmpi & openmpi-devel packages.
```bash
source /etc/profile.d/modules.sh
module load mpi/openmpi-x86_64
```
If cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
```bash ```bash
export MPI_HOME="/usr/lib64/openmpi" export MPI_HOME="/usr/lib64/openmpi"

View File

@@ -1,4 +1,4 @@
configure_file( configure_file(
"config.h.in" "config.h.in"
"${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES "${CMAKE_BINARY_DIR}/configured_files/include/config_platform.h" ESCAPE_QUOTES
) )

View File

@@ -1,8 +1,3 @@
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"] [submodule "lib/catch2"]
path = lib/catch2 path = lib/catch2
main = v2.x main = v2.x

View File

@@ -12,4 +12,4 @@ include_directories(
${Bayesnet_INCLUDE_DIRS} ${Bayesnet_INCLUDE_DIRS}
) )
add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp) add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp)
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy) target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)

View File

@@ -6,12 +6,12 @@
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include <ArffFiles.hpp> #include <ArffFiles.hpp>
#include <CPPFImdlp.h> #include <fimdlp/CPPFImdlp.h>
#include <folding.hpp> #include <folding.hpp>
#include <bayesnet/utils/BayesMetrics.h> #include <bayesnet/utils/BayesMetrics.h>
#include "Models.h" #include "Models.h"
#include "modelRegister.h" #include "modelRegister.h"
#include "config.h" #include "config_platform.h"
const std::string PATH = { platform_data_path.begin(), platform_data_path.end() }; const std::string PATH = { platform_data_path.begin(), platform_data_path.end() };

View File

@@ -26,7 +26,7 @@ add_executable(
reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
results/Result.cpp results/Result.cpp
) )
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}") target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
# b_grid # b_grid
set(grid_sources GridSearch.cpp GridData.cpp) set(grid_sources GridSearch.cpp GridData.cpp)
@@ -35,7 +35,7 @@ add_executable(b_grid commands/b_grid.cpp ${grid_sources}
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
main/HyperParameters.cpp main/Models.cpp main/HyperParameters.cpp main/Models.cpp
) )
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy) target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
# b_list # b_list
add_executable(b_list commands/b_list.cpp add_executable(b_list commands/b_list.cpp
@@ -44,7 +44,7 @@ add_executable(b_list commands/b_list.cpp
reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
) )
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}") target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
# b_main # b_main
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp) set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
@@ -54,7 +54,7 @@ add_executable(b_main commands/b_main.cpp ${main_sources}
reports/ReportConsole.cpp reports/ReportBase.cpp reports/ReportConsole.cpp reports/ReportBase.cpp
results/Result.cpp results/Result.cpp
) )
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy) target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
# b_manage # b_manage
set(manage_sources ManageScreen.cpp OptionsMenu.cpp ResultsManager.cpp) set(manage_sources ManageScreen.cpp OptionsMenu.cpp ResultsManager.cpp)
@@ -66,4 +66,4 @@ add_executable(
results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
main/Scores.cpp main/Scores.cpp
) )
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" mdlp "${BayesNet}") target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" fimdlp "${BayesNet}")

View File

@@ -5,7 +5,7 @@
#include "common/Paths.h" #include "common/Paths.h"
#include "common/Colors.h" #include "common/Colors.h"
#include "best/BestResults.h" #include "best/BestResults.h"
#include "config.h" #include "config_platform.h"
void manageArguments(argparse::ArgumentParser& program) void manageArguments(argparse::ArgumentParser& program)
{ {

View File

@@ -11,7 +11,7 @@
#include "common/Colors.h" #include "common/Colors.h"
#include "common/DotEnv.h" #include "common/DotEnv.h"
#include "grid/GridSearch.h" #include "grid/GridSearch.h"
#include "config.h" #include "config_platform.h"
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
const int MAXL = 133; const int MAXL = 133;
@@ -93,8 +93,10 @@ void list_dump(std::string& model)
if (item.first.size() > max_dataset) { if (item.first.size() > max_dataset) {
max_dataset = item.first.size(); max_dataset = item.first.size();
} }
if (item.second.dump().size() > max_hyper) { for (auto const& [key, value] : item.second.items()) {
max_hyper = item.second.dump().size(); if (value.dump().size() > max_hyper) {
max_hyper = value.dump().size();
}
} }
} }
std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. " std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. "
@@ -106,7 +108,12 @@ void list_dump(std::string& model)
std::cout << color; std::cout << color;
auto num_combinations = data.getNumCombinations(item.first); auto num_combinations = data.getNumCombinations(item.first);
std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first
<< " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << left << item.second.dump() << std::endl; << " " << setw(5) << right << num_combinations << " ";
std::string prefix = "";
for (auto const& [key, value] : item.second.items()) {
std::cout << prefix << setw(max_hyper) << std::left << value.dump() << std::endl;
prefix = string(11 + max_dataset, ' ');
}
} }
std::cout << Colors::RESET() << std::endl; std::cout << Colors::RESET() << std::endl;
} }

View File

@@ -13,7 +13,7 @@
#include "results/ResultsDatasetConsole.h" #include "results/ResultsDatasetConsole.h"
#include "results/ResultsDataset.h" #include "results/ResultsDataset.h"
#include "results/ResultsDatasetExcel.h" #include "results/ResultsDatasetExcel.h"
#include "config.h" #include "config_platform.h"
void list_datasets(argparse::ArgumentParser& program) void list_datasets(argparse::ArgumentParser& program)

View File

@@ -7,7 +7,7 @@
#include "common/Paths.h" #include "common/Paths.h"
#include "main/Models.h" #include "main/Models.h"
#include "main/modelRegister.h" #include "main/modelRegister.h"
#include "config.h" #include "config_platform.h"
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;

View File

@@ -5,7 +5,7 @@
#include <argparse/argparse.hpp> #include <argparse/argparse.hpp>
#include "manage/ManageScreen.h" #include "manage/ManageScreen.h"
#include <signal.h> #include <signal.h>
#include "config.h" #include "config_platform.h"
platform::ManageScreen* manager = nullptr; platform::ManageScreen* manager = nullptr;

View File

@@ -8,7 +8,7 @@ namespace platform {
Datasets::Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm) : Datasets::Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm) :
discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm) discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
{ {
if (discretizer_algorithm == "none" && discretize) { if ((discretizer_algorithm == "none" || discretizer_algorithm == "") && discretize) {
throw std::runtime_error("Can't discretize without discretization algorithm"); throw std::runtime_error("Can't discretize without discretization algorithm");
} }
load(); load();

View File

@@ -5,9 +5,9 @@
#include <string> #include <string>
#include <functional> #include <functional>
#include <vector> #include <vector>
#include <Discretizer.h> #include <fimdlp/Discretizer.h>
#include <BinDisc.h> #include <fimdlp/BinDisc.h>
#include <CPPFImdlp.h> #include <fimdlp/CPPFImdlp.h>
namespace platform { namespace platform {
class Discretization { class Discretization {
public: public:

View File

@@ -108,6 +108,7 @@ namespace platform {
// Generate the hyperparamters combinations // Generate the hyperparamters combinations
auto& dataset = datasets.getDataset(dataset_name); auto& dataset = datasets.getDataset(dataset_name);
auto combinations = grid.getGrid(dataset_name); auto combinations = grid.getGrid(dataset_name);
dataset.load();
auto [X, y] = dataset.getTensors(); auto [X, y] = dataset.getTensors();
auto features = dataset.getFeatures(); auto features = dataset.getFeatures();
auto className = dataset.getClassName(); auto className = dataset.getClassName();
@@ -353,7 +354,8 @@ namespace platform {
tasks = json::parse(msg); tasks = json::parse(msg);
delete[] msg; delete[] msg;
auto env = platform::DotEnv(); auto env = platform::DotEnv();
auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretiz_algo")); auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretize_algo"));
if (config_mpi.rank == config_mpi.manager) { if (config_mpi.rank == config_mpi.manager) {
// //
// 2a. Producer delivers the tasks to the consumers // 2a. Producer delivers the tasks to the consumers

View File

@@ -58,8 +58,8 @@ namespace platform {
std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " ) Scoring train dataset" << std::endl; std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " ) Scoring train dataset" << std::endl;
std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " ) Scoring test dataset" << std::endl << std::endl; std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " ) Scoring test dataset" << std::endl << std::endl;
std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl; std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl;
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << std::endl; std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << string(3 * nfolds - 2, ' ') << " Time" << std::endl;
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << Colors::RESET() << std::endl; std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << " ----------" << Colors::RESET() << std::endl;
} }
int num = 0; int num = 0;
for (auto fileName : filesToProcess) { for (auto fileName : filesToProcess) {
@@ -176,7 +176,7 @@ namespace platform {
json confusion_matrices_train = json::array(); json confusion_matrices_train = json::array();
std::vector<std::string> notes; std::vector<std::string> notes;
std::vector<std::string> graphs; std::vector<std::string> graphs;
Timer train_timer, test_timer; Timer train_timer, test_timer, seed_timer;
int item = 0; int item = 0;
bool first_seed = true; bool first_seed = true;
// //
@@ -184,6 +184,7 @@ namespace platform {
// //
auto score = parse_score(); auto score = parse_score();
for (auto seed : randomSeeds) { for (auto seed : randomSeeds) {
seed_timer.start();
if (!quiet) { if (!quiet) {
string prefix = " "; string prefix = " ";
if (!first_seed) { if (!first_seed) {
@@ -274,8 +275,10 @@ namespace platform {
graphs.push_back(result); graphs.push_back(result);
} }
} }
if (!quiet) if (!quiet) {
std::cout << "end. " << flush; seed_timer.stop();
std::cout << "end. [" << seed_timer.getDurationString() << "]" << std::endl;
}
delete fold; delete fold;
} }
// //

View File

@@ -12,7 +12,7 @@
#include "reports/ReportExcel.h" #include "reports/ReportExcel.h"
#include "reports/ReportExcelCompared.h" #include "reports/ReportExcelCompared.h"
#include <bayesnet/classifiers/TAN.h> #include <bayesnet/classifiers/TAN.h>
#include "CPPFImdlp.h" #include <fimdlp/CPPFImdlp.h>
namespace platform { namespace platform {
const std::string STATUS_OK = "Ok."; const std::string STATUS_OK = "Ok.";

View File

@@ -24,8 +24,8 @@ namespace platform {
+ " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>() + " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
); );
sheader << headerLine(data["title"].get<std::string>()); sheader << headerLine(data["title"].get<std::string>());
std::string discretiz_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL"; std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL";
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretiz_algo + ")" : ""; std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
std::string smooth = data.find("smooth_strategy") != data.end() ? data["smooth_strategy"].get<std::string>() : "ORIGINAL"; std::string smooth = data.find("smooth_strategy") != data.end() ? data["smooth_strategy"].get<std::string>() : "ORIGINAL";
std::string stratified; std::string stratified;
try { try {

View File

@@ -68,8 +68,8 @@ namespace platform {
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]); worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
oss.str(""); oss.str("");
oss.clear(); oss.clear();
std::string discretiz_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp"; std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp";
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretiz_algo + ")" : ""; std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False") << algorithm; oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False") << algorithm;
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]); worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
} }

View File

@@ -9,7 +9,7 @@
#include "folding.hpp" #include "folding.hpp"
#include <ArffFiles.hpp> #include <ArffFiles.hpp>
#include <bayesnet/classifiers/TAN.h> #include <bayesnet/classifiers/TAN.h>
#include "config.h" #include "config_platform.h"
TEST_CASE("Test Platform version", "[Platform]") TEST_CASE("Test Platform version", "[Platform]")

View File

@@ -7,7 +7,7 @@
#include "common/DotEnv.h" #include "common/DotEnv.h"
#include "common/Datasets.h" #include "common/Datasets.h"
#include "common/Paths.h" #include "common/Paths.h"
#include "config.h" #include "config_platform.h"
TEST_CASE("ZeroR comparison in reports", "[Report]") TEST_CASE("ZeroR comparison in reports", "[Report]")

View File

@@ -9,7 +9,7 @@
#include "common/Paths.h" #include "common/Paths.h"
#include "common/Colors.h" #include "common/Colors.h"
#include "main/Scores.h" #include "main/Scores.h"
#include "config.h" #include "config_platform.h"
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
auto epsilon = 1e-4; auto epsilon = 1e-4;

View File

@@ -1,5 +1,5 @@
#include "TestUtils.h" #include "TestUtils.h"
#include "config.h" #include "config_platform.h"
class Paths { class Paths {
public: public:

View File

@@ -6,7 +6,7 @@
#include <map> #include <map>
#include <tuple> #include <tuple>
#include <ArffFiles.hpp> #include <ArffFiles.hpp>
#include "CPPFImdlp.h" #include <fimdlp/CPPFImdlp.h>
bool file_exists(const std::string& name); bool file_exists(const std::string& name);
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features); std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);