Compare commits

..

8 Commits

20 changed files with 187 additions and 96 deletions

13
.gitmodules vendored
View File

@@ -1,13 +0,0 @@
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
[submodule "lib/catch2"]
path = tests/lib/catch2
url = https://github.com/catchorg/Catch2.git
[submodule "lib/mdlp"]
path = tests/lib/mdlp
url = https://github.com/rmontanana/mdlp
[submodule "tests/lib/Files"]
path = tests/lib/Files
url = https://github.com/rmontanana/ArffFiles

View File

@@ -45,6 +45,8 @@ endif()
find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED) find_package(Python3 3.11 COMPONENTS Interpreter Development REQUIRED)
message("Python3_LIBRARIES=${Python3_LIBRARIES}") message("Python3_LIBRARIES=${Python3_LIBRARIES}")
find_package(nlohmann_json CONFIG REQUIRED)
# CMakes modules # CMakes modules
# -------------- # --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
@@ -64,9 +66,10 @@ endif (ENABLE_CLANG_TIDY)
# External libraries - dependencies of PyClassifiers # External libraries - dependencies of PyClassifiers
# -------------------------------------------------- # --------------------------------------------------
find_library(BayesNet NAMES libBayesNet BayesNet libBayesNet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED) find_library(bayesnet NAMES libbayesnet bayesnet libbayesnet.a PATHS ${PyClassifiers_SOURCE_DIR}/../lib/lib REQUIRED)
find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ${PyClassifiers_SOURCE_DIR}/../lib/include) find_path(Bayesnet_INCLUDE_DIRS REQUIRED NAMES bayesnet PATHS ../lib/include)
message(STATUS "BayesNet=${BayesNet}")
message(STATUS "BayesNet=${bayesnet}")
message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}") message(STATUS "Bayesnet_INCLUDE_DIRS=${Bayesnet_INCLUDE_DIRS}")
@@ -78,9 +81,8 @@ add_subdirectory(pyclfs)
# ------- # -------
if (ENABLE_TESTING) if (ENABLE_TESTING)
MESSAGE("Testing enabled") MESSAGE("Testing enabled")
add_git_submodule(tests/lib/catch2) find_package(Catch2 CONFIG REQUIRED)
add_git_submodule(tests/lib/mdlp) find_package(arff-files CONFIG REQUIRED)
add_subdirectory(tests/lib/Files)
include(CTest) include(CTest)
add_subdirectory(tests) add_subdirectory(tests)
endif (ENABLE_TESTING) endif (ENABLE_TESTING)

View File

@@ -52,14 +52,14 @@ debug: ## Build a debug version of the project
@echo ">>> Building Debug PyClassifiers..."; @echo ">>> Building Debug PyClassifiers...";
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi @if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@mkdir $(f_debug); @mkdir $(f_debug);
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON @cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
@echo ">>> Done"; @echo ">>> Done";
release: ## Build a Release version of the project release: ## Build a Release version of the project
@echo ">>> Building Release PyClassifiers..."; @echo ">>> Building Release PyClassifiers...";
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi @if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release); @mkdir $(f_release);
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release @cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake
@echo ">>> Done"; @echo ">>> Done";
opt = "" opt = ""

Submodule lib/json deleted from 48e7b4c23b

20
pyclfs/AdaBoostPy.cc Normal file
View File

@@ -0,0 +1,20 @@
#include "AdaBoostPy.h"
namespace pywrap {
AdaBoostPy::AdaBoostPy() : PyClassifier("sklearn.ensemble", "AdaBoostClassifier", true)
{
validHyperparameters = { "n_estimators", "n_jobs", "random_state" };
}
int AdaBoostPy::getNumberOfEdges() const
{
return callMethodSumOfItems("get_n_leaves");
}
int AdaBoostPy::getNumberOfStates() const
{
return callMethodSumOfItems("get_depth");
}
int AdaBoostPy::getNumberOfNodes() const
{
return callMethodSumOfItems("node_count");
}
} /* namespace pywrap */

15
pyclfs/AdaBoostPy.h Normal file
View File

@@ -0,0 +1,15 @@
#ifndef ADABOOSTPY_H
#define ADABOOSTPY_H
#include "PyClassifier.h"
namespace pywrap {
class AdaBoostPy : public PyClassifier {
public:
AdaBoostPy();
~AdaBoostPy() = default;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
int getNumberOfNodes() const override;
};
} /* namespace pywrap */
#endif /* ADABOOST_H */

View File

@@ -4,5 +4,5 @@ include_directories(
${PyClassifiers_SOURCE_DIR}/lib/json/include ${PyClassifiers_SOURCE_DIR}/lib/json/include
${Bayesnet_INCLUDE_DIRS} ${Bayesnet_INCLUDE_DIRS}
) )
add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc PyClassifier.cc PyWrap.cc PBC4cip.cc) add_library(PyClassifiers ODTE.cc STree.cc SVC.cc RandomForest.cc XGBoost.cc AdaBoostPy.cc PyClassifier.cc PyWrap.cc)
target_link_libraries(PyClassifiers ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy) target_link_libraries(PyClassifiers nlohmann_json::nlohmann_json ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy)

View File

@@ -1,8 +0,0 @@
#include "PBC4cip.h"
namespace pywrap {
PBC4cip::PBC4cip() : PyClassifier("core.PBC4cip", "PBC4cip", true)
{
validHyperparameters = { "random_state" };
}
} /* namespace pywrap */

View File

@@ -1,13 +0,0 @@
#ifndef PBC4CIP_H
#define PBC4CIP_H
#include "PyClassifier.h"
namespace pywrap {
class PBC4cip : public PyClassifier {
public:
PBC4cip();
~PBC4cip() = default;
};
} /* namespace pywrap */
#endif /* PBC4CIP_H */

View File

@@ -93,11 +93,19 @@ namespace pywrap {
PyErr_Print(); PyErr_Print();
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className); throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
} }
int* data = reinterpret_cast<int*>(prediction.get_data()); if (xgboost) {
std::vector<int> vPrediction(data, data + prediction.shape(0)); long* data = reinterpret_cast<long*>(prediction.get_data());
auto resultTensor = torch::tensor(vPrediction, torch::kInt32); std::vector<int> vPrediction(data, data + prediction.shape(0));
Py_XDECREF(incoming); auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
return resultTensor; Py_XDECREF(incoming);
return resultTensor;
} else {
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
}
} }
torch::Tensor PyClassifier::predict_proba(torch::Tensor& X) torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
{ {
@@ -118,11 +126,19 @@ namespace pywrap {
PyErr_Print(); PyErr_Print();
throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className); throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
} }
double* data = reinterpret_cast<double*>(prediction.get_data()); if (xgboost) {
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1)); float* data = reinterpret_cast<float*>(prediction.get_data());
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) }); std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
Py_XDECREF(incoming); auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
return resultTensor; Py_XDECREF(incoming);
return resultTensor;
} else {
double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
}
} }
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y) float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{ {

View File

@@ -49,6 +49,7 @@ namespace pywrap {
nlohmann::json hyperparameters; nlohmann::json hyperparameters;
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {}; void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
std::vector<std::string> notes; std::vector<std::string> notes;
bool xgboost = false;
private: private:
PyWrap* pyWrap; PyWrap* pyWrap;
std::string module; std::string module;

View File

@@ -5,5 +5,6 @@ namespace pywrap {
XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true) XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
{ {
validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" }; validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
xgboost = true;
} }
} /* namespace pywrap */ } /* namespace pywrap */

View File

@@ -2,15 +2,13 @@ if(ENABLE_TESTING)
set(TEST_PYCLASSIFIERS "unit_tests_pyclassifiers") set(TEST_PYCLASSIFIERS "unit_tests_pyclassifiers")
include_directories( include_directories(
${PyClassifiers_SOURCE_DIR} ${PyClassifiers_SOURCE_DIR}
${PyClassifiers_SOURCE_DIR}/lib/Files
${PyClassifiers_SOURCE_DIR}/lib/mdlp
${PyClassifiers_SOURCE_DIR}/lib/json/include
${Python3_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS} ${TORCH_INCLUDE_DIRS}
${CMAKE_BINARY_DIR}/configured_files/include
/usr/local/include /usr/local/include
) )
file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc") file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc")
set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES}) set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES})
add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS}) add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS})
target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles mdlp Catch2::Catch2WithMain) target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy fimdlp Catch2::Catch2WithMain)
endif(ENABLE_TESTING) endif(ENABLE_TESTING)

View File

@@ -10,14 +10,16 @@
#include "pyclfs/SVC.h" #include "pyclfs/SVC.h"
#include "pyclfs/RandomForest.h" #include "pyclfs/RandomForest.h"
#include "pyclfs/XGBoost.h" #include "pyclfs/XGBoost.h"
#include "pyclfs/AdaBoostPy.h"
#include "pyclfs/ODTE.h" #include "pyclfs/ODTE.h"
#include "TestUtils.h" #include "TestUtils.h"
#include <iostream>
TEST_CASE("Test Python Classifiers score", "[PyClassifiers]") TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{ {
map <pair<std::string, std::string>, float> scores = { map <pair<std::string, std::string>, float> scores = {
// Diabetes // Diabetes
{{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.854166687}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0}, {{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.856770813f}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
// Ecoli // Ecoli
{{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0}, {{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0},
// Glass // Glass
@@ -33,10 +35,10 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{"RandomForest", new pywrap::RandomForest()} {"RandomForest", new pywrap::RandomForest()}
}; };
map<std::string, std::string> versions = { map<std::string, std::string> versions = {
{"ODTE", "1.0.0"}, {"ODTE", "1.0.0-1"},
{"STree", "1.3.2"}, {"STree", "1.4.0"},
{"SVC", "1.5.1"}, {"SVC", "1.5.2"},
{"RandomForest", "1.5.1"} {"RandomForest", "1.5.2"}
}; };
auto clf = models[name]; auto clf = models[name];
@@ -58,6 +60,15 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
REQUIRE(clf->getVersion() == versions[name]); REQUIRE(clf->getVersion() == versions[name]);
} }
} }
TEST_CASE("AdaBoostClassifier", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
auto clf = pywrap::AdaBoostPy();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
clf.setHyperparameters(nlohmann::json::parse("{ \"n_estimators\": 100 }"));
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.9599999f).epsilon(raw.epsilon));
}
TEST_CASE("Classifiers features", "[PyClassifiers]") TEST_CASE("Classifiers features", "[PyClassifiers]")
{ {
auto raw = RawDatasets("iris", false); auto raw = RawDatasets("iris", false);
@@ -116,33 +127,30 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
clf.setHyperparameters(hyperparameters); clf.setHyperparameters(hyperparameters);
auto score = clf.score(raw.Xt, raw.yt); auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon)); REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
std::cout << "XGBoost score: " << score << std::endl;
} }
// TEST_CASE("XGBoost predict proba", "[PyClassifiers]") TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
// {
// auto raw = RawDatasets("iris", true);
// auto clf = pywrap::XGBoost();
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// // nlohmann::json hyperparameters = { "n_jobs=1" };
// // clf.setHyperparameters(hyperparameters);
// auto predict = clf.predict(raw.Xt);
// for (int row = 0; row < predict.size(0); row++) {
// auto sum = 0.0;
// for (int col = 0; col < predict.size(1); col++) {
// std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
// sum += predict[row][col].item<int>();
// }
// std::cout << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
// }
// std::cout << predict << std::endl;
// }
TEST_CASE("PBC4cip", "[PyClassifiers]")
{ {
auto raw = RawDatasets("iris", true); auto raw = RawDatasets("iris", true);
auto clf = pywrap::PBC4cip(); auto clf = pywrap::XGBoost();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
nlohmann::json hyperparameters = { }; // nlohmann::json hyperparameters = { "n_jobs=1" };
clf.setHyperparameters(hyperparameters); // clf.setHyperparameters(hyperparameters);
auto score = clf.score(raw.Xt, raw.yt); auto predict_proba = clf.predict_proba(raw.Xt);
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon)); auto predict = clf.predict(raw.Xt);
// std::cout << "Predict proba: " << predict_proba << std::endl;
// std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
// assert(predict.size(0) == predict_proba.size(0));
for (int row = 0; row < predict_proba.size(0); row++) {
// auto sum = 0.0;
// std::cout << "Row " << std::setw(3) << row << ": ";
// for (int col = 0; col < predict_proba.size(1); col++) {
// std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
// sum += predict_proba[row][col].item<double>();
// }
// std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
}
} }

View File

@@ -5,8 +5,8 @@
#include <vector> #include <vector>
#include <map> #include <map>
#include <tuple> #include <tuple>
#include "ArffFiles.h" #include "ArffFiles/ArffFiles.hpp"
#include "CPPFImdlp.h" #include "fimdlp/CPPFImdlp.h"
bool file_exists(const std::string& name); bool file_exists(const std::string& name);
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features); std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);

Submodule tests/lib/Files deleted from a4329f5f9d

Submodule tests/lib/catch2 deleted from 506276c592

Submodule tests/lib/mdlp deleted from 7d62d6af4a

21
vcpkg-configuration.json Normal file
View File

@@ -0,0 +1,21 @@
{
"default-registry": {
"kind": "git",
"baseline": "760bfd0c8d7c89ec640aec4df89418b7c2745605",
"repository": "https://github.com/microsoft/vcpkg"
},
"registries": [
{
"kind": "git",
"repository": "https://github.com/rmontanana/vcpkg-stash",
"baseline": "1ea69243c0e8b0de77c9d1dd6e1d7593ae7f3627",
"packages": [
"arff-files",
"bayesnet",
"fimdlp",
"folding",
"libtorch-bin"
]
}
]
}

47
vcpkg.json Normal file
View File

@@ -0,0 +1,47 @@
{
"name": "platform",
"version-string": "1.1.0",
"dependencies": [
"arff-files",
"nlohmann-json",
"fimdlp",
"libtorch-bin",
"folding",
"argparse",
"catch2"
],
"overrides": [
{
"name": "arff-files",
"version": "1.1.0"
},
{
"name": "fimdlp",
"version": "2.0.1"
},
{
"name": "libtorch-bin",
"version": "2.7.0"
},
{
"name": "bayesnet",
"version": "1.1.1"
},
{
"name": "folding",
"version": "1.1.1"
},
{
"name": "argpase",
"version": "3.2"
},
{
"name": "catch2",
"version": "3.8.1"
},
{
"name": "nlohmann-json",
"version": "3.11.3"
}
]
}