Compare commits

11 Commits

31 changed files with 67 additions and 54 deletions

7
.gitmodules vendored
View File

@@ -10,13 +10,12 @@
[submodule "lib/libxlsxwriter"]
path = lib/libxlsxwriter
url = https://github.com/jmcnamara/libxlsxwriter.git
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
update = merge
[submodule "lib/folding"]
path = lib/folding
url = https://github.com/rmontanana/folding
[submodule "lib/Files"]
path = lib/Files
url = https://github.com/rmontanana/ArffFiles
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp

View File

@@ -90,7 +90,7 @@ cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
add_subdirectory(config)
add_subdirectory(src)
add_subdirectory(sample)
# add_subdirectory(sample)
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${Platform_SOURCE_DIR}/src/*.cpp)
# Testing

View File

@@ -24,7 +24,14 @@ The solution is to erase the libstdc++ library from the miniconda installation a
### MPI
In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
In Linux just install openmpi & openmpi-devel packages.
```bash
source /etc/profile.d/modules.sh
module load mpi/openmpi-x86_64
```
If cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
```bash
export MPI_HOME="/usr/lib64/openmpi"

View File

@@ -1,4 +1,4 @@
configure_file(
"config.h.in"
"${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES
"${CMAKE_BINARY_DIR}/configured_files/include/config_platform.h" ESCAPE_QUOTES
)

View File

@@ -1,8 +1,3 @@
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"]
path = lib/catch2
main = v2.x

View File

@@ -12,4 +12,4 @@ include_directories(
${Bayesnet_INCLUDE_DIRS}
)
add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp)
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)

View File

@@ -6,12 +6,12 @@
#include <argparse/argparse.hpp>
#include <nlohmann/json.hpp>
#include <ArffFiles.hpp>
#include <CPPFImdlp.h>
#include <fimdlp/CPPFImdlp.h>
#include <folding.hpp>
#include <bayesnet/utils/BayesMetrics.h>
#include "Models.h"
#include "modelRegister.h"
#include "config.h"
#include "config_platform.h"
const std::string PATH = { platform_data_path.begin(), platform_data_path.end() };

View File

@@ -26,7 +26,7 @@ add_executable(
reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
results/Result.cpp
)
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
# b_grid
set(grid_sources GridSearch.cpp GridData.cpp)
@@ -35,7 +35,7 @@ add_executable(b_grid commands/b_grid.cpp ${grid_sources}
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
main/HyperParameters.cpp main/Models.cpp
)
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
# b_list
add_executable(b_list commands/b_list.cpp
@@ -44,7 +44,7 @@ add_executable(b_list commands/b_list.cpp
reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
)
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
# b_main
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
@@ -54,7 +54,7 @@ add_executable(b_main commands/b_main.cpp ${main_sources}
reports/ReportConsole.cpp reports/ReportBase.cpp
results/Result.cpp
)
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
# b_manage
set(manage_sources ManageScreen.cpp OptionsMenu.cpp ResultsManager.cpp)
@@ -66,4 +66,4 @@ add_executable(
results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
main/Scores.cpp
)
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" mdlp "${BayesNet}")
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" fimdlp "${BayesNet}")

View File

@@ -5,7 +5,7 @@
#include "common/Paths.h"
#include "common/Colors.h"
#include "best/BestResults.h"
#include "config.h"
#include "config_platform.h"
void manageArguments(argparse::ArgumentParser& program)
{

View File

@@ -11,7 +11,7 @@
#include "common/Colors.h"
#include "common/DotEnv.h"
#include "grid/GridSearch.h"
#include "config.h"
#include "config_platform.h"
using json = nlohmann::ordered_json;
const int MAXL = 133;
@@ -93,8 +93,10 @@ void list_dump(std::string& model)
if (item.first.size() > max_dataset) {
max_dataset = item.first.size();
}
if (item.second.dump().size() > max_hyper) {
max_hyper = item.second.dump().size();
for (auto const& [key, value] : item.second.items()) {
if (value.dump().size() > max_hyper) {
max_hyper = value.dump().size();
}
}
}
std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. "
@@ -106,7 +108,12 @@ void list_dump(std::string& model)
std::cout << color;
auto num_combinations = data.getNumCombinations(item.first);
std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first
<< " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << left << item.second.dump() << std::endl;
<< " " << setw(5) << right << num_combinations << " ";
std::string prefix = "";
for (auto const& [key, value] : item.second.items()) {
std::cout << prefix << setw(max_hyper) << std::left << value.dump() << std::endl;
prefix = string(11 + max_dataset, ' ');
}
}
std::cout << Colors::RESET() << std::endl;
}

View File

@@ -13,7 +13,7 @@
#include "results/ResultsDatasetConsole.h"
#include "results/ResultsDataset.h"
#include "results/ResultsDatasetExcel.h"
#include "config.h"
#include "config_platform.h"
void list_datasets(argparse::ArgumentParser& program)

View File

@@ -7,7 +7,7 @@
#include "common/Paths.h"
#include "main/Models.h"
#include "main/modelRegister.h"
#include "config.h"
#include "config_platform.h"
using json = nlohmann::ordered_json;

View File

@@ -5,7 +5,7 @@
#include <argparse/argparse.hpp>
#include "manage/ManageScreen.h"
#include <signal.h>
#include "config.h"
#include "config_platform.h"
platform::ManageScreen* manager = nullptr;

View File

@@ -8,7 +8,7 @@ namespace platform {
Datasets::Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm) :
discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
{
if (discretizer_algorithm == "none" && discretize) {
if ((discretizer_algorithm == "none" || discretizer_algorithm == "") && discretize) {
throw std::runtime_error("Can't discretize without discretization algorithm");
}
load();

View File

@@ -5,9 +5,9 @@
#include <string>
#include <functional>
#include <vector>
#include <Discretizer.h>
#include <BinDisc.h>
#include <CPPFImdlp.h>
#include <fimdlp/Discretizer.h>
#include <fimdlp/BinDisc.h>
#include <fimdlp/CPPFImdlp.h>
namespace platform {
class Discretization {
public:

View File

@@ -108,6 +108,7 @@ namespace platform {
// Generate the hyperparamters combinations
auto& dataset = datasets.getDataset(dataset_name);
auto combinations = grid.getGrid(dataset_name);
dataset.load();
auto [X, y] = dataset.getTensors();
auto features = dataset.getFeatures();
auto className = dataset.getClassName();
@@ -353,7 +354,8 @@ namespace platform {
tasks = json::parse(msg);
delete[] msg;
auto env = platform::DotEnv();
auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretiz_algo"));
auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretize_algo"));
if (config_mpi.rank == config_mpi.manager) {
//
// 2a. Producer delivers the tasks to the consumers

View File

@@ -58,8 +58,8 @@ namespace platform {
std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " ) Scoring train dataset" << std::endl;
std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " ) Scoring test dataset" << std::endl << std::endl;
std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl;
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << std::endl;
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << Colors::RESET() << std::endl;
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << string(3 * nfolds - 2, ' ') << " Time" << std::endl;
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << " ----------" << Colors::RESET() << std::endl;
}
int num = 0;
for (auto fileName : filesToProcess) {
@@ -176,7 +176,7 @@ namespace platform {
json confusion_matrices_train = json::array();
std::vector<std::string> notes;
std::vector<std::string> graphs;
Timer train_timer, test_timer;
Timer train_timer, test_timer, seed_timer;
int item = 0;
bool first_seed = true;
//
@@ -184,6 +184,7 @@ namespace platform {
//
auto score = parse_score();
for (auto seed : randomSeeds) {
seed_timer.start();
if (!quiet) {
string prefix = " ";
if (!first_seed) {
@@ -274,8 +275,10 @@ namespace platform {
graphs.push_back(result);
}
}
if (!quiet)
std::cout << "end. " << flush;
if (!quiet) {
seed_timer.stop();
std::cout << "end. [" << seed_timer.getDurationString() << "]" << std::endl;
}
delete fold;
}
//

View File

@@ -12,7 +12,7 @@
#include "reports/ReportExcel.h"
#include "reports/ReportExcelCompared.h"
#include <bayesnet/classifiers/TAN.h>
#include "CPPFImdlp.h"
#include <fimdlp/CPPFImdlp.h>
namespace platform {
const std::string STATUS_OK = "Ok.";

View File

@@ -24,8 +24,8 @@ namespace platform {
+ " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
);
sheader << headerLine(data["title"].get<std::string>());
std::string discretiz_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL";
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretiz_algo + ")" : "";
std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL";
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
std::string smooth = data.find("smooth_strategy") != data.end() ? data["smooth_strategy"].get<std::string>() : "ORIGINAL";
std::string stratified;
try {

View File

@@ -68,8 +68,8 @@ namespace platform {
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
oss.str("");
oss.clear();
std::string discretiz_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp";
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretiz_algo + ")" : "";
std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp";
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False") << algorithm;
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
}

View File

@@ -9,7 +9,7 @@
#include "folding.hpp"
#include <ArffFiles.hpp>
#include <bayesnet/classifiers/TAN.h>
#include "config.h"
#include "config_platform.h"
TEST_CASE("Test Platform version", "[Platform]")

View File

@@ -7,7 +7,7 @@
#include "common/DotEnv.h"
#include "common/Datasets.h"
#include "common/Paths.h"
#include "config.h"
#include "config_platform.h"
TEST_CASE("ZeroR comparison in reports", "[Report]")

View File

@@ -9,7 +9,7 @@
#include "common/Paths.h"
#include "common/Colors.h"
#include "main/Scores.h"
#include "config.h"
#include "config_platform.h"
using json = nlohmann::ordered_json;
auto epsilon = 1e-4;

View File

@@ -1,5 +1,5 @@
#include "TestUtils.h"
#include "config.h"
#include "config_platform.h"
class Paths {
public:

View File

@@ -6,7 +6,7 @@
#include <map>
#include <tuple>
#include <ArffFiles.hpp>
#include "CPPFImdlp.h"
#include <fimdlp/CPPFImdlp.h>
bool file_exists(const std::string& name);
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);