Compare commits

..

1 Commits

Author SHA1 Message Date
9966ba4af8 Move tests library to tests/lib
Add PBC4cip classifier
2025-01-09 10:04:16 +01:00
20 changed files with 96 additions and 187 deletions

13
.gitmodules vendored Normal file
View File

@@ -0,0 +1,13 @@
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
[submodule "lib/catch2"]
path = tests/lib/catch2
url = https://github.com/catchorg/Catch2.git
[submodule "lib/mdlp"]
path = tests/lib/mdlp
url = https://github.com/rmontanana/mdlp
[submodule "tests/lib/Files"]
path = tests/lib/Files
url = https://github.com/rmontanana/ArffFiles

View File

@@ -45,8 +45,6 @@ endif()
find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}")
find_package(nlohmann_json CONFIG REQUIRED)
# CMakes modules
# --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
@@ -66,10 +64,9 @@ endif (ENABLE_CLANG_TIDY)
# External libraries - dependencies of PyClassifiers
# --------------------------------------------------
find_library(bayesnet NAMES libbayesnet bayesnet libbayesnet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED)
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ../lib/include)
message(STATUS "BayesNet=${bayesnet}")
find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED)
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ${PyClassifiers_SOURCE_DIR}/../lib/include)
message(STATUS "BayesNet=${BayesNet}")
message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
@@ -81,8 +78,9 @@ add_subdirectory(pyclfs)
# -------
if (ENABLE_TESTING)
MESSAGE("Testing enabled")
find_package(Catch2 CONFIG REQUIRED)
find_package(arff-files CONFIG REQUIRED)
add_git_submodule(tests/lib/catch2)
add_git_submodule(tests/lib/mdlp)
add_subdirectory(tests/lib/Files)
include(CTest)
add_subdirectory(tests)
endif (ENABLE_TESTING)
@@ -94,4 +92,4 @@ install(TARGETS PyClassifiers
LIBRARY DESTINATION lib
CONFIGURATIONS Release)
install(DIRECTORY pyclfs/ DESTINATION include/pyclassifiers FILES_MATCHING CONFIGURATIONS Release PATTERN "*.h" PATTERN "*.hpp")
install(FILES ${Bayesnet_INCLUDE_DIRS}/bayesnet/config.h DESTINATION include/pyclassifiers CONFIGURATIONS Release)
install(FILES ${Bayesnet_INCLUDE_DIRS}/bayesnet/config.h DESTINATION include/pyclassifiers CONFIGURATIONS Release)

View File

@@ -52,14 +52,14 @@ debug: ## Build a debug version of the project
@echo ">>> Building Debug PyClassifiers...";
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@mkdir $(f_debug);
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
@echo ">>> Done";
release: ## Build a Release version of the project
@echo ">>> Building Release PyClassifiers...";
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release);
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
@echo ">>> Done";
opt = ""

1
lib/json Submodule

Submodule lib/json added at 48e7b4c23b

View File

@@ -1,20 +0,0 @@
#include "AdaBoostPy.h"
namespace pywrap {
AdaBoostPy::AdaBoostPy() : PyClassifier("sklearn.ensemble", "AdaBoostClassifier", true)
{
validHyperparameters = { "n_estimators", "n_jobs", "random_state" };
}
int AdaBoostPy::getNumberOfEdges() const
{
return callMethodSumOfItems("get_n_leaves");
}
int AdaBoostPy::getNumberOfStates() const
{
return callMethodSumOfItems("get_depth");
}
int AdaBoostPy::getNumberOfNodes() const
{
return callMethodSumOfItems("node_count");
}
} /* namespace pywrap */

View File

@@ -1,15 +0,0 @@
#ifndef ADABOOSTPY_H
#define ADABOOSTPY_H
#include "PyClassifier.h"
namespace pywrap {
class AdaBoostPy : public PyClassifier {
public:
AdaBoostPy();
~AdaBoostPy() = default;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
int getNumberOfNodes() const override;
};
} /* namespace pywrap */
#endif /* ADABOOST_H */

View File

@@ -4,5 +4,5 @@ include_directories(
${PyClassifiers_SOURCE_DIR}/lib/json/include
${Bayesnet_INCLUDE_DIRS}
)
add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc AdaBoostPy.cc PyClassifier.cc PyWrap.cc)
target_link_libraries(PyClassifiers nlohmann_json::nlohmann_json ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy)
add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc PyClassifier.cc PyWrap.cc PBC4cip.cc)
target_link_libraries(PyClassifiers ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy)

8
pyclfs/PBC4cip.cc Normal file
View File

@@ -0,0 +1,8 @@
#include "PBC4cip.h"
namespace pywrap {
PBC4cip::PBC4cip() : PyClassifier("core.PBC4cip", "PBC4cip", true)
{
validHyperparameters = { "random_state" };
}
} /* namespace pywrap */

13
pyclfs/PBC4cip.h Normal file
View File

@@ -0,0 +1,13 @@
#ifndef PBC4CIP_H
#define PBC4CIP_H
#include "PyClassifier.h"
namespace pywrap {
class PBC4cip : public PyClassifier {
public:
PBC4cip();
~PBC4cip() = default;
};
} /* namespace pywrap */
#endif /* PBC4CIP_H */

View File

@@ -93,19 +93,11 @@ namespace pywrap {
PyErr_Print();
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
}
if (xgboost) {
long* data = reinterpret_cast<long*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
} else {
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
}
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
}
torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
{
@@ -126,19 +118,11 @@ namespace pywrap {
PyErr_Print();
throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
}
if (xgboost) {
float* data = reinterpret_cast<float*>(prediction.get_data());
std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
} else {
double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
}
double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
}
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{
@@ -151,4 +135,4 @@ namespace pywrap {
{
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */
} /* namespace pywrap */

View File

@@ -49,7 +49,6 @@ namespace pywrap {
nlohmann::json hyperparameters;
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
std::vector<std::string> notes;
bool xgboost = false;
private:
PyWrap* pyWrap;
std::string module;

View File

@@ -5,6 +5,5 @@ namespace pywrap {
XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
{
validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
xgboost = true;
}
} /* namespace pywrap */

View File

@@ -2,13 +2,15 @@ if(ENABLE_TESTING)
set(TEST_PYCLASSIFIERS "unit_tests_pyclassifiers")
include_directories(
${PyClassifiers_SOURCE_DIR}
${PyClassifiers_SOURCE_DIR}/lib/Files
${PyClassifiers_SOURCE_DIR}/lib/mdlp
${PyClassifiers_SOURCE_DIR}/lib/json/include
${Python3_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS}
${CMAKE_BINARY_DIR}/configured_files/include
/usr/local/include
)
file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc")
set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES})
add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS})
target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy fimdlp Catch2::Catch2WithMain)
target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles mdlp Catch2::Catch2WithMain)
endif(ENABLE_TESTING)

View File

@@ -10,16 +10,14 @@
#include "pyclfs/SVC.h"
#include "pyclfs/RandomForest.h"
#include "pyclfs/XGBoost.h"
#include "pyclfs/AdaBoostPy.h"
#include "pyclfs/ODTE.h"
#include "TestUtils.h"
#include <iostream>
TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{
map <pair<std::string, std::string>, float> scores = {
// Diabetes
{{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.856770813f}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
{{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.854166687}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
// Ecoli
{{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0},
// Glass
@@ -35,10 +33,10 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{"RandomForest", new pywrap::RandomForest()}
};
map<std::string, std::string> versions = {
{"ODTE", "1.0.0-1"},
{"STree", "1.4.0"},
{"SVC", "1.5.2"},
{"RandomForest", "1.5.2"}
{"ODTE", "1.0.0"},
{"STree", "1.3.2"},
{"SVC", "1.5.1"},
{"RandomForest", "1.5.1"}
};
auto clf = models[name];
@@ -60,15 +58,6 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
REQUIRE(clf->getVersion() == versions[name]);
}
}
TEST_CASE("AdaBoostClassifier", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
auto clf = pywrap::AdaBoostPy();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
clf.setHyperparameters(nlohmann::json::parse("{ \"n_estimators\": 100 }"));
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.9599999f).epsilon(raw.epsilon));
}
TEST_CASE("Classifiers features", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
@@ -127,30 +116,33 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
clf.setHyperparameters(hyperparameters);
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
std::cout << "XGBoost score: " << score << std::endl;
}
TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
// TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
// {
// auto raw = RawDatasets("iris", true);
// auto clf = pywrap::XGBoost();
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// // nlohmann::json hyperparameters = { "n_jobs=1" };
// // clf.setHyperparameters(hyperparameters);
// auto predict = clf.predict(raw.Xt);
// for (int row = 0; row < predict.size(0); row++) {
// auto sum = 0.0;
// for (int col = 0; col < predict.size(1); col++) {
// std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
// sum += predict[row][col].item<int>();
// }
// std::cout << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
// }
// std::cout << predict << std::endl;
// }
TEST_CASE("PBC4cip", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", true);
auto clf = pywrap::XGBoost();
auto clf = pywrap::PBC4cip();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// nlohmann::json hyperparameters = { "n_jobs=1" };
// clf.setHyperparameters(hyperparameters);
auto predict_proba = clf.predict_proba(raw.Xt);
auto predict = clf.predict(raw.Xt);
// std::cout << "Predict proba: " << predict_proba << std::endl;
// std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
// assert(predict.size(0) == predict_proba.size(0));
for (int row = 0; row < predict_proba.size(0); row++) {
// auto sum = 0.0;
// std::cout << "Row " << std::setw(3) << row << ": ";
// for (int col = 0; col < predict_proba.size(1); col++) {
// std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
// sum += predict_proba[row][col].item<double>();
// }
// std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
}
nlohmann::json hyperparameters = { };
clf.setHyperparameters(hyperparameters);
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
}

View File

@@ -5,8 +5,8 @@
#include <vector>
#include <map>
#include <tuple>
#include "ArffFiles/ArffFiles.hpp"
#include "fimdlp/CPPFImdlp.h"
#include "ArffFiles.h"
#include "CPPFImdlp.h"
bool file_exists(const std::string& name);
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);

1
tests/lib/Files Submodule

Submodule tests/lib/Files added at a4329f5f9d

1
tests/lib/catch2 Submodule

Submodule tests/lib/catch2 added at 506276c592

1
tests/lib/mdlp Submodule

Submodule tests/lib/mdlp added at 7d62d6af4a

View File

@@ -1,21 +0,0 @@
{
"default-registry": {
"kind": "git",
"baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605",
"repository": "https://github.com/microsoft/vcpkg"
},
"registries": [
{
"kind": "git",
"repository": "https://github.com/rmontanana/vcpkg-stash",
"baseline": "1ea69243c0e8b0de77c9d1dd6e1d7593ae7f3627",
"packages": [
"arff-files",
"bayesnet",
"fimdlp",
"folding",
"libtorch-bin"
]
}
]
}

View File

@@ -1,47 +0,0 @@
{
"name": "platform",
"version-string": "1.1.0",
"dependencies": [
"arff-files",
"nlohmann-json",
"fimdlp",
"libtorch-bin",
"folding",
"argparse",
"catch2"
],
"overrides": [
{
"name": "arff-files",
"version": "1.1.0"
},
{
"name": "fimdlp",
"version": "2.0.1"
},
{
"name": "libtorch-bin",
"version": "2.7.0"
},
{
"name": "bayesnet",
"version": "1.1.1"
},
{
"name": "folding",
"version": "1.1.1"
},
{
"name": "argpase",
"version": "3.2"
},
{
"name": "catch2",
"version": "3.8.1"
},
{
"name": "nlohmann-json",
"version": "3.11.3"
}
]
}