Compare commits
11 Commits
26f8e07774
...
alphablock
Author | SHA1 | Date | |
---|---|---|---|
ba455bb934
|
|||
a65955248a
|
|||
84930b0537
|
|||
10c65f44a0
|
|||
6d112f01e7
|
|||
401296293b
|
|||
9566ae4cf6
|
|||
55187ee521
|
|||
68ea06d129
|
|||
6c1d1d0d32
|
|||
b0853d169b
|
7
.gitmodules
vendored
7
.gitmodules
vendored
@@ -10,13 +10,12 @@
|
||||
[submodule "lib/libxlsxwriter"]
|
||||
path = lib/libxlsxwriter
|
||||
url = https://github.com/jmcnamara/libxlsxwriter.git
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
update = merge
|
||||
[submodule "lib/folding"]
|
||||
path = lib/folding
|
||||
url = https://github.com/rmontanana/folding
|
||||
[submodule "lib/Files"]
|
||||
path = lib/Files
|
||||
url = https://github.com/rmontanana/ArffFiles
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
|
@@ -90,7 +90,7 @@ cmake_path(SET TEST_DATA_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
|
||||
configure_file(src/common/SourceData.h.in "${CMAKE_BINARY_DIR}/configured_files/include/SourceData.h")
|
||||
add_subdirectory(config)
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(sample)
|
||||
# add_subdirectory(sample)
|
||||
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${Platform_SOURCE_DIR}/src/*.cpp)
|
||||
|
||||
# Testing
|
||||
|
@@ -24,7 +24,14 @@ The solution is to erase the libstdc++ library from the miniconda installation a
|
||||
|
||||
### MPI
|
||||
|
||||
In Linux just install openmpi & openmpi-devel packages. Only if cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
|
||||
In Linux just install openmpi & openmpi-devel packages.
|
||||
|
||||
```bash
|
||||
source /etc/profile.d/modules.sh
|
||||
module load mpi/openmpi-x86_64
|
||||
```
|
||||
|
||||
If cmake can't find openmpi installation (like in Oracle Linux) set the following variable:
|
||||
|
||||
```bash
|
||||
export MPI_HOME="/usr/lib64/openmpi"
|
||||
|
@@ -1,4 +1,4 @@
|
||||
configure_file(
|
||||
"config.h.in"
|
||||
"${CMAKE_BINARY_DIR}/configured_files/include/config.h" ESCAPE_QUOTES
|
||||
"${CMAKE_BINARY_DIR}/configured_files/include/config_platform.h" ESCAPE_QUOTES
|
||||
)
|
||||
|
@@ -1,8 +1,3 @@
|
||||
[submodule "lib/mdlp"]
|
||||
path = lib/mdlp
|
||||
url = https://github.com/rmontanana/mdlp
|
||||
main = main
|
||||
update = merge
|
||||
[submodule "lib/catch2"]
|
||||
path = lib/catch2
|
||||
main = v2.x
|
||||
|
Submodule lib/Files updated: a5316928d4...a4329f5f9d
Submodule lib/argparse updated: e462ab980c...cbd9fd8ed6
Submodule lib/catch2 updated: 4e8d92bf02...0321d2fce3
2
lib/json
2
lib/json
Submodule lib/json updated: 960b763ecd...620034ecec
Submodule lib/libxlsxwriter updated: cf887d65ce...8206bda64a
2
lib/mdlp
2
lib/mdlp
Submodule lib/mdlp updated: 2db60e007d...cfb993f5ec
@@ -12,4 +12,4 @@ include_directories(
|
||||
${Bayesnet_INCLUDE_DIRS}
|
||||
)
|
||||
add_executable(PlatformSample sample.cpp ${Platform_SOURCE_DIR}/src/main/Models.cpp)
|
||||
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
target_link_libraries(PlatformSample "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
@@ -6,12 +6,12 @@
|
||||
#include <argparse/argparse.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <ArffFiles.hpp>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
#include <folding.hpp>
|
||||
#include <bayesnet/utils/BayesMetrics.h>
|
||||
#include "Models.h"
|
||||
#include "modelRegister.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
const std::string PATH = { platform_data_path.begin(), platform_data_path.end() };
|
||||
|
||||
|
@@ -26,7 +26,7 @@ add_executable(
|
||||
reports/ReportExcel.cpp reports/ReportBase.cpp reports/ExcelFile.cpp
|
||||
results/Result.cpp
|
||||
)
|
||||
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||
target_link_libraries(b_best Boost::boost "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||
|
||||
# b_grid
|
||||
set(grid_sources GridSearch.cpp GridData.cpp)
|
||||
@@ -35,7 +35,7 @@ add_executable(b_grid commands/b_grid.cpp ${grid_sources}
|
||||
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
|
||||
main/HyperParameters.cpp main/Models.cpp
|
||||
)
|
||||
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
target_link_libraries(b_grid ${MPI_CXX_LIBRARIES} "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
|
||||
# b_list
|
||||
add_executable(b_list commands/b_list.cpp
|
||||
@@ -44,7 +44,7 @@ add_executable(b_list commands/b_list.cpp
|
||||
reports/ReportExcel.cpp reports/ExcelFile.cpp reports/ReportBase.cpp reports/DatasetsExcel.cpp reports/DatasetsConsole.cpp reports/ReportsPaged.cpp
|
||||
results/Result.cpp results/ResultsDatasetExcel.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
|
||||
)
|
||||
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||
|
||||
# b_main
|
||||
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
|
||||
@@ -54,7 +54,7 @@ add_executable(b_main commands/b_main.cpp ${main_sources}
|
||||
reports/ReportConsole.cpp reports/ReportBase.cpp
|
||||
results/Result.cpp
|
||||
)
|
||||
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
target_link_libraries(b_main "${PyClassifiers}" "${BayesNet}" fimdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy)
|
||||
|
||||
# b_manage
|
||||
set(manage_sources ManageScreen.cpp OptionsMenu.cpp ResultsManager.cpp)
|
||||
@@ -66,4 +66,4 @@ add_executable(
|
||||
results/Result.cpp results/ResultsDataset.cpp results/ResultsDatasetConsole.cpp
|
||||
main/Scores.cpp
|
||||
)
|
||||
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" mdlp "${BayesNet}")
|
||||
target_link_libraries(b_manage "${TORCH_LIBRARIES}" "${XLSXWRITER_LIB}" fimdlp "${BayesNet}")
|
||||
|
@@ -5,7 +5,7 @@
|
||||
#include "common/Paths.h"
|
||||
#include "common/Colors.h"
|
||||
#include "best/BestResults.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
void manageArguments(argparse::ArgumentParser& program)
|
||||
{
|
||||
|
@@ -11,7 +11,7 @@
|
||||
#include "common/Colors.h"
|
||||
#include "common/DotEnv.h"
|
||||
#include "grid/GridSearch.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
const int MAXL = 133;
|
||||
@@ -93,8 +93,10 @@ void list_dump(std::string& model)
|
||||
if (item.first.size() > max_dataset) {
|
||||
max_dataset = item.first.size();
|
||||
}
|
||||
if (item.second.dump().size() > max_hyper) {
|
||||
max_hyper = item.second.dump().size();
|
||||
for (auto const& [key, value] : item.second.items()) {
|
||||
if (value.dump().size() > max_hyper) {
|
||||
max_hyper = value.dump().size();
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << Colors::GREEN() << left << " # " << left << setw(max_dataset) << "Dataset" << " #Com. "
|
||||
@@ -106,7 +108,12 @@ void list_dump(std::string& model)
|
||||
std::cout << color;
|
||||
auto num_combinations = data.getNumCombinations(item.first);
|
||||
std::cout << setw(3) << fixed << right << ++index << left << " " << setw(max_dataset) << item.first
|
||||
<< " " << setw(5) << right << num_combinations << " " << setw(max_hyper) << left << item.second.dump() << std::endl;
|
||||
<< " " << setw(5) << right << num_combinations << " ";
|
||||
std::string prefix = "";
|
||||
for (auto const& [key, value] : item.second.items()) {
|
||||
std::cout << prefix << setw(max_hyper) << std::left << value.dump() << std::endl;
|
||||
prefix = string(11 + max_dataset, ' ');
|
||||
}
|
||||
}
|
||||
std::cout << Colors::RESET() << std::endl;
|
||||
}
|
||||
|
@@ -13,7 +13,7 @@
|
||||
#include "results/ResultsDatasetConsole.h"
|
||||
#include "results/ResultsDataset.h"
|
||||
#include "results/ResultsDatasetExcel.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
|
||||
void list_datasets(argparse::ArgumentParser& program)
|
||||
|
@@ -7,7 +7,7 @@
|
||||
#include "common/Paths.h"
|
||||
#include "main/Models.h"
|
||||
#include "main/modelRegister.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
@@ -5,7 +5,7 @@
|
||||
#include <argparse/argparse.hpp>
|
||||
#include "manage/ManageScreen.h"
|
||||
#include <signal.h>
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
platform::ManageScreen* manager = nullptr;
|
||||
|
||||
|
@@ -8,7 +8,7 @@ namespace platform {
|
||||
Datasets::Datasets(bool discretize, std::string sfileType, std::string discretizer_algorithm) :
|
||||
discretize(discretize), sfileType(sfileType), discretizer_algorithm(discretizer_algorithm)
|
||||
{
|
||||
if (discretizer_algorithm == "none" && discretize) {
|
||||
if ((discretizer_algorithm == "none" || discretizer_algorithm == "") && discretize) {
|
||||
throw std::runtime_error("Can't discretize without discretization algorithm");
|
||||
}
|
||||
load();
|
||||
|
@@ -5,9 +5,9 @@
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <Discretizer.h>
|
||||
#include <BinDisc.h>
|
||||
#include <CPPFImdlp.h>
|
||||
#include <fimdlp/Discretizer.h>
|
||||
#include <fimdlp/BinDisc.h>
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
namespace platform {
|
||||
class Discretization {
|
||||
public:
|
||||
|
@@ -108,6 +108,7 @@ namespace platform {
|
||||
// Generate the hyperparamters combinations
|
||||
auto& dataset = datasets.getDataset(dataset_name);
|
||||
auto combinations = grid.getGrid(dataset_name);
|
||||
dataset.load();
|
||||
auto [X, y] = dataset.getTensors();
|
||||
auto features = dataset.getFeatures();
|
||||
auto className = dataset.getClassName();
|
||||
@@ -353,7 +354,8 @@ namespace platform {
|
||||
tasks = json::parse(msg);
|
||||
delete[] msg;
|
||||
auto env = platform::DotEnv();
|
||||
auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretiz_algo"));
|
||||
auto datasets = Datasets(config.discretize, Paths::datasets(), env.get("discretize_algo"));
|
||||
|
||||
if (config_mpi.rank == config_mpi.manager) {
|
||||
//
|
||||
// 2a. Producer delivers the tasks to the consumers
|
||||
|
@@ -58,8 +58,8 @@ namespace platform {
|
||||
std::cout << " ( " << Colors::GREEN() << "b" << Colors::RESET() << " ) Scoring train dataset" << std::endl;
|
||||
std::cout << " ( " << Colors::GREEN() << "c" << Colors::RESET() << " ) Scoring test dataset" << std::endl << std::endl;
|
||||
std::cout << Colors::YELLOW() << "Note: fold number in this color means fitting had issues such as not using all features in BoostAODE classifier" << std::endl << std::endl;
|
||||
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << std::endl;
|
||||
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << Colors::RESET() << std::endl;
|
||||
std::cout << Colors::GREEN() << left << " # " << setw(max_name) << "Dataset" << " #Samp #Feat Seed Status" << string(3 * nfolds - 2, ' ') << " Time" << std::endl;
|
||||
std::cout << " --- " << string(max_name, '-') << " ----- ----- ---- " << string(4 + 3 * nfolds, '-') << " ----------" << Colors::RESET() << std::endl;
|
||||
}
|
||||
int num = 0;
|
||||
for (auto fileName : filesToProcess) {
|
||||
@@ -176,7 +176,7 @@ namespace platform {
|
||||
json confusion_matrices_train = json::array();
|
||||
std::vector<std::string> notes;
|
||||
std::vector<std::string> graphs;
|
||||
Timer train_timer, test_timer;
|
||||
Timer train_timer, test_timer, seed_timer;
|
||||
int item = 0;
|
||||
bool first_seed = true;
|
||||
//
|
||||
@@ -184,6 +184,7 @@ namespace platform {
|
||||
//
|
||||
auto score = parse_score();
|
||||
for (auto seed : randomSeeds) {
|
||||
seed_timer.start();
|
||||
if (!quiet) {
|
||||
string prefix = " ";
|
||||
if (!first_seed) {
|
||||
@@ -274,8 +275,10 @@ namespace platform {
|
||||
graphs.push_back(result);
|
||||
}
|
||||
}
|
||||
if (!quiet)
|
||||
std::cout << "end. " << flush;
|
||||
if (!quiet) {
|
||||
seed_timer.stop();
|
||||
std::cout << "end. [" << seed_timer.getDurationString() << "]" << std::endl;
|
||||
}
|
||||
delete fold;
|
||||
}
|
||||
//
|
||||
|
@@ -12,7 +12,7 @@
|
||||
#include "reports/ReportExcel.h"
|
||||
#include "reports/ReportExcelCompared.h"
|
||||
#include <bayesnet/classifiers/TAN.h>
|
||||
#include "CPPFImdlp.h"
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
|
||||
namespace platform {
|
||||
const std::string STATUS_OK = "Ok.";
|
||||
|
@@ -24,8 +24,8 @@ namespace platform {
|
||||
+ " random seeds. " + data["date"].get<std::string>() + " " + data["time"].get<std::string>()
|
||||
);
|
||||
sheader << headerLine(data["title"].get<std::string>());
|
||||
std::string discretiz_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL";
|
||||
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretiz_algo + ")" : "";
|
||||
std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "ORIGINAL";
|
||||
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
|
||||
std::string smooth = data.find("smooth_strategy") != data.end() ? data["smooth_strategy"].get<std::string>() : "ORIGINAL";
|
||||
std::string stratified;
|
||||
try {
|
||||
|
@@ -68,8 +68,8 @@ namespace platform {
|
||||
worksheet_merge_range(worksheet, 3, 10, 3, 11, oss.str().c_str(), styles["headerSmall"]);
|
||||
oss.str("");
|
||||
oss.clear();
|
||||
std::string discretiz_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp";
|
||||
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretiz_algo + ")" : "";
|
||||
std::string discretize_algo = data.find("discretization_algorithm") != data.end() ? data["discretization_algorithm"].get<std::string>() : "mdlp";
|
||||
std::string algorithm = data["discretized"].get<bool>() ? " (" + discretize_algo + ")" : "";
|
||||
oss << "Discretized: " << (data["discretized"].get<bool>() ? "True" : "False") << algorithm;
|
||||
worksheet_write_string(worksheet, 3, 12, oss.str().c_str(), styles["headerSmall"]);
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@
|
||||
#include "folding.hpp"
|
||||
#include <ArffFiles.hpp>
|
||||
#include <bayesnet/classifiers/TAN.h>
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
|
||||
TEST_CASE("Test Platform version", "[Platform]")
|
||||
|
@@ -7,7 +7,7 @@
|
||||
#include "common/DotEnv.h"
|
||||
#include "common/Datasets.h"
|
||||
#include "common/Paths.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
|
||||
TEST_CASE("ZeroR comparison in reports", "[Report]")
|
||||
|
@@ -9,7 +9,7 @@
|
||||
#include "common/Paths.h"
|
||||
#include "common/Colors.h"
|
||||
#include "main/Scores.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
auto epsilon = 1e-4;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#include "TestUtils.h"
|
||||
#include "config.h"
|
||||
#include "config_platform.h"
|
||||
|
||||
class Paths {
|
||||
public:
|
||||
|
@@ -6,7 +6,7 @@
|
||||
#include <map>
|
||||
#include <tuple>
|
||||
#include <ArffFiles.hpp>
|
||||
#include "CPPFImdlp.h"
|
||||
#include <fimdlp/CPPFImdlp.h>
|
||||
|
||||
bool file_exists(const std::string& name);
|
||||
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);
|
||||
|
Reference in New Issue
Block a user