Compare commits

..

1 Commits

Author SHA1 Message Date
9966ba4af8 Move tests library to tests/lib
Add PBC4cip classifier
2025-01-09 10:04:16 +01:00
20 changed files with 96 additions and 187 deletions

13
.gitmodules vendored Normal file
View File

@@ -0,0 +1,13 @@
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
[submodule "lib/catch2"]
path = tests/lib/catch2
url = https://github.com/catchorg/Catch2.git
[submodule "lib/mdlp"]
path = tests/lib/mdlp
url = https://github.com/rmontanana/mdlp
[submodule "tests/lib/Files"]
path = tests/lib/Files
url = https://github.com/rmontanana/ArffFiles

View File

@@ -45,8 +45,6 @@ endif()
find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED) find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}") message("Python3_LIBRARIES=${Python3_LIBRARIES}")
find_package(nlohmann_json CONFIG REQUIRED)
# CMakes modules # CMakes modules
# -------------- # --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
@@ -66,10 +64,9 @@ endif (ENABLE_CLANG_TIDY)
# External libraries - dependencies of PyClassifiers # External libraries - dependencies of PyClassifiers
# -------------------------------------------------- # --------------------------------------------------
find_library(bayesnet NAMES libbayesnet bayesnet libbayesnet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED) find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED)
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ../lib/include) find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ${PyClassifiers_SOURCE_DIR}/../lib/include)
message(STATUS "BayesNet=${BayesNet}")
message(STATUS "BayesNet=${bayesnet}")
message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}") message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
@@ -81,8 +78,9 @@ add_subdirectory(pyclfs)
# ------- # -------
if (ENABLE_TESTING) if (ENABLE_TESTING)
MESSAGE("Testing enabled") MESSAGE("Testing enabled")
find_package(Catch2 CONFIG REQUIRED) add_git_submodule(tests/lib/catch2)
find_package(arff-files CONFIG REQUIRED) add_git_submodule(tests/lib/mdlp)
add_subdirectory(tests/lib/Files)
include(CTest) include(CTest)
add_subdirectory(tests) add_subdirectory(tests)
endif (ENABLE_TESTING) endif (ENABLE_TESTING)

View File

@@ -52,14 +52,14 @@ debug: ## Build a debug version of the project
@echo ">>> Building Debug PyClassifiers..."; @echo ">>> Building Debug PyClassifiers...";
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi @if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@mkdir $(f_debug); @mkdir $(f_debug);
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake @cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
@echo ">>> Done"; @echo ">>> Done";
release: ## Build a Release version of the project release: ## Build a Release version of the project
@echo ">>> Building Release PyClassifiers..."; @echo ">>> Building Release PyClassifiers...";
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi @if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release); @mkdir $(f_release);
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake @cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
@echo ">>> Done"; @echo ">>> Done";
opt = "" opt = ""

1
lib/json Submodule

Submodule lib/json added at 48e7b4c23b

View File

@@ -1,20 +0,0 @@
#include "AdaBoostPy.h"
namespace pywrap {
AdaBoostPy::AdaBoostPy() : PyClassifier("sklearn.ensemble", "AdaBoostClassifier", true)
{
validHyperparameters = { "n_estimators", "n_jobs", "random_state" };
}
int AdaBoostPy::getNumberOfEdges() const
{
return callMethodSumOfItems("get_n_leaves");
}
int AdaBoostPy::getNumberOfStates() const
{
return callMethodSumOfItems("get_depth");
}
int AdaBoostPy::getNumberOfNodes() const
{
return callMethodSumOfItems("node_count");
}
} /* namespace pywrap */

View File

@@ -1,15 +0,0 @@
#ifndef ADABOOSTPY_H
#define ADABOOSTPY_H
#include "PyClassifier.h"
namespace pywrap {
class AdaBoostPy : public PyClassifier {
public:
AdaBoostPy();
~AdaBoostPy() = default;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
int getNumberOfNodes() const override;
};
} /* namespace pywrap */
#endif /* ADABOOST_H */

View File

@@ -4,5 +4,5 @@ include_directories(
${PyClassifiers_SOURCE_DIR}/lib/json/include ${PyClassifiers_SOURCE_DIR}/lib/json/include
${Bayesnet_INCLUDE_DIRS} ${Bayesnet_INCLUDE_DIRS}
) )
add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc AdaBoostPy.cc PyClassifier.cc PyWrap.cc) add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc PyClassifier.cc PyWrap.cc PBC4cip.cc)
target_link_libraries(PyClassifiers nlohmann_json::nlohmann_json ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy) target_link_libraries(PyClassifiers ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy)

8
pyclfs/PBC4cip.cc Normal file
View File

@@ -0,0 +1,8 @@
#include "PBC4cip.h"
namespace pywrap {
PBC4cip::PBC4cip() : PyClassifier("core.PBC4cip", "PBC4cip", true)
{
validHyperparameters = { "random_state" };
}
} /* namespace pywrap */

13
pyclfs/PBC4cip.h Normal file
View File

@@ -0,0 +1,13 @@
#ifndef PBC4CIP_H
#define PBC4CIP_H
#include "PyClassifier.h"
namespace pywrap {
class PBC4cip : public PyClassifier {
public:
PBC4cip();
~PBC4cip() = default;
};
} /* namespace pywrap */
#endif /* PBC4CIP_H */

View File

@@ -93,19 +93,11 @@ namespace pywrap {
PyErr_Print(); PyErr_Print();
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className); throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
} }
if (xgboost) { int* data = reinterpret_cast<int*>(prediction.get_data());
long* data = reinterpret_cast<long*>(prediction.get_data()); std::vector<int> vPrediction(data, data + prediction.shape(0));
std::vector<int> vPrediction(data, data + prediction.shape(0)); auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
auto resultTensor = torch::tensor(vPrediction, torch::kInt32); Py_XDECREF(incoming);
Py_XDECREF(incoming); return resultTensor;
return resultTensor;
} else {
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
}
} }
torch::Tensor PyClassifier::predict_proba(torch::Tensor& X) torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
{ {
@@ -126,19 +118,11 @@ namespace pywrap {
PyErr_Print(); PyErr_Print();
throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className); throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
} }
if (xgboost) { double* data = reinterpret_cast<double*>(prediction.get_data());
float* data = reinterpret_cast<float*>(prediction.get_data()); std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1)); auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) }); Py_XDECREF(incoming);
Py_XDECREF(incoming); return resultTensor;
return resultTensor;
} else {
double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
}
} }
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y) float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{ {

View File

@@ -49,7 +49,6 @@ namespace pywrap {
nlohmann::json hyperparameters; nlohmann::json hyperparameters;
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {}; void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
std::vector<std::string> notes; std::vector<std::string> notes;
bool xgboost = false;
private: private:
PyWrap* pyWrap; PyWrap* pyWrap;
std::string module; std::string module;

View File

@@ -5,6 +5,5 @@ namespace pywrap {
XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true) XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
{ {
validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" }; validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
xgboost = true;
} }
} /* namespace pywrap */ } /* namespace pywrap */

View File

@@ -2,13 +2,15 @@ if(ENABLE_TESTING)
set(TEST_PYCLASSIFIERS "unit_tests_pyclassifiers") set(TEST_PYCLASSIFIERS "unit_tests_pyclassifiers")
include_directories( include_directories(
${PyClassifiers_SOURCE_DIR} ${PyClassifiers_SOURCE_DIR}
${PyClassifiers_SOURCE_DIR}/lib/Files
${PyClassifiers_SOURCE_DIR}/lib/mdlp
${PyClassifiers_SOURCE_DIR}/lib/json/include
${Python3_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS} ${TORCH_INCLUDE_DIRS}
${CMAKE_BINARY_DIR}/configured_files/include
/usr/local/include /usr/local/include
) )
file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc") file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc")
set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES}) set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES})
add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS}) add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS})
target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy fimdlp Catch2::Catch2WithMain) target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles mdlp Catch2::Catch2WithMain)
endif(ENABLE_TESTING) endif(ENABLE_TESTING)

View File

@@ -10,16 +10,14 @@
#include "pyclfs/SVC.h" #include "pyclfs/SVC.h"
#include "pyclfs/RandomForest.h" #include "pyclfs/RandomForest.h"
#include "pyclfs/XGBoost.h" #include "pyclfs/XGBoost.h"
#include "pyclfs/AdaBoostPy.h"
#include "pyclfs/ODTE.h" #include "pyclfs/ODTE.h"
#include "TestUtils.h" #include "TestUtils.h"
#include <iostream>
TEST_CASE("Test Python Classifiers score", "[PyClassifiers]") TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{ {
map <pair<std::string, std::string>, float> scores = { map <pair<std::string, std::string>, float> scores = {
// Diabetes // Diabetes
{{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.856770813f}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0}, {{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.854166687}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
// Ecoli // Ecoli
{{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0}, {{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0},
// Glass // Glass
@@ -35,10 +33,10 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{"RandomForest", new pywrap::RandomForest()} {"RandomForest", new pywrap::RandomForest()}
}; };
map<std::string, std::string> versions = { map<std::string, std::string> versions = {
{"ODTE", "1.0.0-1"}, {"ODTE", "1.0.0"},
{"STree", "1.4.0"}, {"STree", "1.3.2"},
{"SVC", "1.5.2"}, {"SVC", "1.5.1"},
{"RandomForest", "1.5.2"} {"RandomForest", "1.5.1"}
}; };
auto clf = models[name]; auto clf = models[name];
@@ -60,15 +58,6 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
REQUIRE(clf->getVersion() == versions[name]); REQUIRE(clf->getVersion() == versions[name]);
} }
} }
TEST_CASE("AdaBoostClassifier", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
auto clf = pywrap::AdaBoostPy();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
clf.setHyperparameters(nlohmann::json::parse("{ \"n_estimators\": 100 }"));
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.9599999f).epsilon(raw.epsilon));
}
TEST_CASE("Classifiers features", "[PyClassifiers]") TEST_CASE("Classifiers features", "[PyClassifiers]")
{ {
auto raw = RawDatasets("iris", false); auto raw = RawDatasets("iris", false);
@@ -127,30 +116,33 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
clf.setHyperparameters(hyperparameters); clf.setHyperparameters(hyperparameters);
auto score = clf.score(raw.Xt, raw.yt); auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon)); REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
std::cout << "XGBoost score: " << score << std::endl;
} }
TEST_CASE("XGBoost predict proba", "[PyClassifiers]") // TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
// {
// auto raw = RawDatasets("iris", true);
// auto clf = pywrap::XGBoost();
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// // nlohmann::json hyperparameters = { "n_jobs=1" };
// // clf.setHyperparameters(hyperparameters);
// auto predict = clf.predict(raw.Xt);
// for (int row = 0; row < predict.size(0); row++) {
// auto sum = 0.0;
// for (int col = 0; col < predict.size(1); col++) {
// std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
// sum += predict[row][col].item<int>();
// }
// std::cout << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
// }
// std::cout << predict << std::endl;
// }
TEST_CASE("PBC4cip", "[PyClassifiers]")
{ {
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto clf = pywrap::XGBoost(); auto clf = pywrap::PBC4cip();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// nlohmann::json hyperparameters = { "n_jobs=1" }; nlohmann::json hyperparameters = { };
// clf.setHyperparameters(hyperparameters); clf.setHyperparameters(hyperparameters);
auto predict_proba = clf.predict_proba(raw.Xt); auto score = clf.score(raw.Xt, raw.yt);
auto predict = clf.predict(raw.Xt); REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
// std::cout << "Predict proba: " << predict_proba << std::endl;
// std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
// assert(predict.size(0) == predict_proba.size(0));
for (int row = 0; row < predict_proba.size(0); row++) {
// auto sum = 0.0;
// std::cout << "Row " << std::setw(3) << row << ": ";
// for (int col = 0; col < predict_proba.size(1); col++) {
// std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
// sum += predict_proba[row][col].item<double>();
// }
// std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
}
} }

View File

@@ -5,8 +5,8 @@
#include <vector> #include <vector>
#include <map> #include <map>
#include <tuple> #include <tuple>
#include "ArffFiles/ArffFiles.hpp" #include "ArffFiles.h"
#include "fimdlp/CPPFImdlp.h" #include "CPPFImdlp.h"
bool file_exists(const std::string& name); bool file_exists(const std::string& name);
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features); std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);

1
tests/lib/Files Submodule

Submodule tests/lib/Files added at a4329f5f9d

1
tests/lib/catch2 Submodule

Submodule tests/lib/catch2 added at 506276c592

1
tests/lib/mdlp Submodule

Submodule tests/lib/mdlp added at 7d62d6af4a

View File

@@ -1,21 +0,0 @@
{
"default-registry": {
"kind": "git",
"baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605",
"repository": "https://github.com/microsoft/vcpkg"
},
"registries": [
{
"kind": "git",
"repository": "https://github.com/rmontanana/vcpkg-stash",
"baseline": "1ea69243c0e8b0de77c9d1dd6e1d7593ae7f3627",
"packages": [
"arff-files",
"bayesnet",
"fimdlp",
"folding",
"libtorch-bin"
]
}
]
}

View File

@@ -1,47 +0,0 @@
{
"name": "platform",
"version-string": "1.1.0",
"dependencies": [
"arff-files",
"nlohmann-json",
"fimdlp",
"libtorch-bin",
"folding",
"argparse",
"catch2"
],
"overrides": [
{
"name": "arff-files",
"version": "1.1.0"
},
{
"name": "fimdlp",
"version": "2.0.1"
},
{
"name": "libtorch-bin",
"version": "2.7.0"
},
{
"name": "bayesnet",
"version": "1.1.1"
},
{
"name": "folding",
"version": "1.1.1"
},
{
"name": "argpase",
"version": "3.2"
},
{
"name": "catch2",
"version": "3.8.1"
},
{
"name": "nlohmann-json",
"version": "3.11.3"
}
]
}