Add Pywrap sources

This commit is contained in:
Ricardo Montañana Gómez 2023-11-12 21:43:07 +01:00
parent f9258e43b9
commit f6e00530be
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
18 changed files with 642 additions and 9 deletions

View File

@ -1,9 +1,8 @@
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")
include_directories(${PyWrap_SOURCE_DIR}/lib/Files)
include_directories(${PyWrap_SOURCE_DIR}/lib/json/include)
include_directories(${Python3_INCLUDE_DIRS})
include_directories(${TORCH_INCLUDE_DIRS})
add_library(PyWrap SHARED PyWrap.cc STree.cc ODTE.cc SVC.cc RandomForest.cc PyClassifier.cc)
#target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy xgboost::xgboost ArffFiles)
target_link_libraries(PyWrap ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles)

View File

@ -0,0 +1,25 @@
#ifndef CLASSIFIER_H
#define CLASSIFIER_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <string>
#include <map>
#include <vector>
namespace pywrap {
class Classifier {
public:
Classifier() = default;
virtual ~Classifier() = default;
virtual Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual Classifier& fit(torch::Tensor& X, torch::Tensor& y) = 0;
virtual torch::Tensor predict(torch::Tensor& X) = 0;
virtual double score(torch::Tensor& X, torch::Tensor& y) = 0;
virtual std::string version() = 0;
virtual std::string sklearnVersion() = 0;
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
protected:
virtual void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters) = 0;
};
} /* namespace pywrap */
#endif /* CLASSIFIER_H */

15
src/PyClassifiers/ODTE.cc Normal file
View File

@ -0,0 +1,15 @@
#include "ODTE.h"
namespace pywrap {
std::string ODTE::graph()
{
return callMethodString("graph");
}
void ODTE::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "n_jobs", "n_estimators", "random_state" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */

15
src/PyClassifiers/ODTE.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef ODTE_H
#define ODTE_H
#include "nlohmann/json.hpp"
#include "PyClassifier.h"
namespace pywrap {
class ODTE : public PyClassifier {
public:
ODTE() : PyClassifier("odte", "Odte") {};
~ODTE() = default;
std::string graph();
void setHyperparameters(const nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* ODTE_H */

View File

@ -0,0 +1,100 @@
#include "PyClassifier.h"
namespace pywrap {
namespace bp = boost::python;
namespace np = boost::python::numpy;
PyClassifier::PyClassifier(const std::string& module, const std::string& className) : module(module), className(className), fitted(false)
{
// This id allows to have more than one instance of the same module/class
id = reinterpret_cast<clfId_t>(this);
pyWrap = PyWrap::GetInstance();
pyWrap->importClass(id, module, className);
}
PyClassifier::~PyClassifier()
{
pyWrap->clean(id);
}
np::ndarray tensor2numpy(torch::Tensor& X)
{
int m = X.size(0);
int n = X.size(1);
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<float>(), bp::make_tuple(m, n), bp::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2), bp::object());
Xn = Xn.transpose();
return Xn;
}
std::pair<np::ndarray, np::ndarray> tensors2numpy(torch::Tensor& X, torch::Tensor& y)
{
int n = X.size(1);
auto yn = np::from_data(y.data_ptr(), np::dtype::get_builtin<int32_t>(), bp::make_tuple(n), bp::make_tuple(sizeof(y.dtype()) * 2), bp::object());
return { tensor2numpy(X), yn };
}
std::string PyClassifier::version()
{
return pyWrap->version(id);
}
std::string PyClassifier::sklearnVersion()
{
return pyWrap->sklearnVersion();
}
std::string PyClassifier::callMethodString(const std::string& method)
{
return pyWrap->callMethodString(id, method);
}
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y)
{
if (!fitted && hyperparameters.size() > 0) {
pyWrap->setHyperparameters(id, hyperparameters);
}
auto [Xn, yn] = tensors2numpy(X, y);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
CPyObject yp = bp::incref(bp::object(yn).ptr());
pyWrap->fit(id, Xp, yp);
fitted = true;
return *this;
}
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
return fit(X, y);
}
torch::Tensor PyClassifier::predict(torch::Tensor& X)
{
int dimension = X.size(1);
auto Xn = tensor2numpy(X);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
PyObject* incoming = pyWrap->predict(id, Xp);
bp::handle<> handle(incoming);
bp::object object(handle);
np::ndarray prediction = np::from_object(object);
if (PyErr_Occurred()) {
PyErr_Print();
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
}
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
}
double PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{
auto [Xn, yn] = tensors2numpy(X, y);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
CPyObject yp = bp::incref(bp::object(yn).ptr());
auto result = pyWrap->score(id, Xp, yp);
return result;
}
void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid, default is no hyperparameters
const std::vector<std::string> validKeys = { };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
void PyClassifier::checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters)
{
for (const auto& item : hyperparameters.items()) {
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid");
}
}
}
} /* namespace pywrap */

View File

@ -0,0 +1,39 @@
#ifndef PYCLASSIFIER_H
#define PYCLASSIFIER_H
#include "boost/python/detail/wrap_python.hpp"
#include <boost/python/numpy.hpp>
#include <nlohmann/json.hpp>
#include <string>
#include <map>
#include <vector>
#include <utility>
#include <torch/torch.h>
#include "PyWrap.h"
#include "Classifier.h"
#include "TypeId.h"
namespace pywrap {
class PyClassifier : public Classifier {
public:
PyClassifier(const std::string& module, const std::string& className);
virtual ~PyClassifier();
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
PyClassifier& fit(torch::Tensor& X, torch::Tensor& y) override;
torch::Tensor predict(torch::Tensor& X) override;
double score(torch::Tensor& X, torch::Tensor& y) override;
std::string version() override;
std::string sklearnVersion() override;
std::string callMethodString(const std::string& method);
void setHyperparameters(const nlohmann::json& hyperparameters) override;
protected:
void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters) override;
nlohmann::json hyperparameters;
private:
PyWrap* pyWrap;
std::string module;
std::string className;
clfId_t id;
bool fitted;
};
} /* namespace pywrap */
#endif /* PYCLASSIFIER_H */

View File

@ -0,0 +1,87 @@
#ifndef PYHELPER_HPP
#define PYHELPER_HPP
#pragma once
// Code taken and adapted from
// https ://www.codeproject.com/Articles/820116/Embedding-Python-program-in-a-C-Cplusplus-code
#include "boost/python/detail/wrap_python.hpp"
#include <boost/python/numpy.hpp>
#include <iostream>
namespace pywrap {
namespace p = boost::python;
namespace np = boost::python::numpy;
class CPyInstance {
public:
CPyInstance()
{
Py_Initialize();
np::initialize();
}
~CPyInstance()
{
Py_Finalize();
}
};
class CPyObject {
private:
PyObject* p;
public:
CPyObject() : p(NULL)
{
}
CPyObject(PyObject* _p) : p(_p)
{
}
~CPyObject()
{
Release();
}
PyObject* getObject()
{
return p;
}
PyObject* setObject(PyObject* _p)
{
return (p = _p);
}
PyObject* AddRef()
{
if (p) {
Py_INCREF(p);
}
return p;
}
void Release()
{
if (p) {
Py_XDECREF(p);
}
p = NULL;
}
PyObject* operator ->()
{
return p;
}
bool is()
{
return p ? true : false;
}
operator PyObject* ()
{
return p;
}
PyObject* operator = (PyObject* pp)
{
p = pp;
return p;
}
operator bool()
{
return p ? true : false;
}
};
} /* namespace pywrap */
#endif

188
src/PyClassifiers/PyWrap.cc Normal file
View File

@ -0,0 +1,188 @@
#define PY_SSIZE_T_CLEAN
#include <stdexcept>
#include "PyWrap.h"
#include <string>
#include <map>
#include <sstream>
#include <boost/python/numpy.hpp>
namespace pywrap {
namespace np = boost::python::numpy;
PyWrap* PyWrap::wrapper = nullptr;
std::mutex PyWrap::mutex;
CPyInstance* PyWrap::pyInstance = nullptr;
auto moduleClassMap = std::map<std::pair<std::string, std::string>, std::tuple<PyObject*, PyObject*, PyObject*>>();
PyWrap* PyWrap::GetInstance()
{
std::lock_guard<std::mutex> lock(mutex);
if (wrapper == nullptr) {
wrapper = new PyWrap();
pyInstance = new CPyInstance();
}
return wrapper;
}
void PyWrap::RemoveInstance()
{
if (wrapper != nullptr) {
if (pyInstance != nullptr) {
delete pyInstance;
}
pyInstance = nullptr;
if (wrapper != nullptr) {
delete wrapper;
}
wrapper = nullptr;
}
}
void PyWrap::importClass(const clfId_t id, const std::string& moduleName, const std::string& className)
{
std::lock_guard<std::mutex> lock(mutex);
auto result = moduleClassMap.find(id);
if (result != moduleClassMap.end()) {
return;
}
PyObject* module = PyImport_ImportModule(moduleName.c_str());
if (PyErr_Occurred()) {
errorAbort("Couldn't import module " + moduleName);
}
PyObject* classObject = PyObject_GetAttrString(module, className.c_str());
if (PyErr_Occurred()) {
errorAbort("Couldn't find class " + className);
}
PyObject* instance = PyObject_CallObject(classObject, NULL);
if (PyErr_Occurred()) {
errorAbort("Couldn't create instance of class " + className);
}
moduleClassMap.insert({ id, { module, classObject, instance } });
}
void PyWrap::clean(const clfId_t id)
{
// Remove Python interpreter if no more modules imported left
std::lock_guard<std::mutex> lock(mutex);
auto result = moduleClassMap.find(id);
if (result == moduleClassMap.end()) {
return;
}
Py_DECREF(std::get<0>(result->second));
Py_DECREF(std::get<1>(result->second));
Py_DECREF(std::get<2>(result->second));
moduleClassMap.erase(result);
if (PyErr_Occurred()) {
PyErr_Print();
errorAbort("Error cleaning module ");
}
if (moduleClassMap.empty()) {
RemoveInstance();
}
}
void PyWrap::errorAbort(const std::string& message)
{
std::cerr << message << std::endl;
PyErr_Print();
RemoveInstance();
exit(1);
}
PyObject* PyWrap::getClass(const clfId_t id)
{
auto item = moduleClassMap.find(id);
if (item == moduleClassMap.end()) {
errorAbort("Module not found");
}
return std::get<2>(item->second);
}
std::string PyWrap::callMethodString(const clfId_t id, const std::string& method)
{
PyObject* instance = getClass(id);
PyObject* result;
try {
if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL)))
errorAbort("Couldn't call method " + method);
}
catch (const std::exception& e) {
errorAbort(e.what());
}
std::string value = PyUnicode_AsUTF8(result);
Py_XDECREF(result);
return value;
}
std::string PyWrap::sklearnVersion()
{
return "1.0";
// CPyObject data = PyRun_SimpleString("import sklearn;return sklearn.__version__");
// std::string result = PyUnicode_AsUTF8(data);
// return result;
}
std::string PyWrap::version(const clfId_t id)
{
return callMethodString(id, "version");
}
void PyWrap::setHyperparameters(const clfId_t id, const json& hyperparameters)
{
// Set hyperparameters as attributes of the class
PyObject* pValue;
PyObject* instance = getClass(id);
for (const auto& [key, value] : hyperparameters.items()) {
std::stringstream oss;
oss << value.type_name();
if (oss.str() == "string") {
pValue = Py_BuildValue("s", value.get<std::string>().c_str());
} else {
if (value.is_number_integer()) {
pValue = Py_BuildValue("i", value.get<int>());
} else {
pValue = Py_BuildValue("f", value.get<double>());
}
}
int res = PyObject_SetAttrString(instance, key.c_str(), pValue);
if (res == -1 && PyErr_Occurred()) {
Py_XDECREF(pValue);
errorAbort("Couldn't set attribute " + key + "=" + value.dump());
}
Py_XDECREF(pValue);
}
}
void PyWrap::fit(const clfId_t id, CPyObject& X, CPyObject& y)
{
PyObject* instance = getClass(id);
CPyObject result;
CPyObject method = PyUnicode_FromString("fit");
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL)))
errorAbort("Couldn't call method fit");
}
catch (const std::exception& e) {
errorAbort(e.what());
}
}
PyObject* PyWrap::predict(const clfId_t id, CPyObject& X)
{
PyObject* instance = getClass(id);
PyObject* result;
CPyObject method = PyUnicode_FromString("predict");
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), NULL)))
errorAbort("Couldn't call method predict");
}
catch (const std::exception& e) {
errorAbort(e.what());
}
Py_INCREF(result);
return result; // Caller must free this object
}
double PyWrap::score(const clfId_t id, CPyObject& X, CPyObject& y)
{
PyObject* instance = getClass(id);
CPyObject result;
CPyObject method = PyUnicode_FromString("score");
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), y.getObject(), NULL)))
errorAbort("Couldn't call method score");
}
catch (const std::exception& e) {
errorAbort(e.what());
}
double resultValue = PyFloat_AsDouble(result);
return resultValue;
}
}

View File

@ -0,0 +1,47 @@
#ifndef PYWRAP_H
#define PYWRAP_H
#include "boost/python/detail/wrap_python.hpp"
#include <string>
#include <map>
#include <tuple>
#include <mutex>
#include <nlohmann/json.hpp>
#include "PyHelper.hpp"
#include "TypeId.h"
#pragma once
namespace pywrap {
/*
Singleton class to handle Python/numpy interpreter.
*/
using json = nlohmann::json;
class PyWrap {
public:
PyWrap() = default;
PyWrap(PyWrap& other) = delete;
static PyWrap* GetInstance();
void operator=(const PyWrap&) = delete;
~PyWrap() = default;
std::string callMethodString(const clfId_t id, const std::string& method);
std::string sklearnVersion();
std::string version(const clfId_t id);
void setHyperparameters(const clfId_t id, const json& hyperparameters);
void fit(const clfId_t id, CPyObject& X, CPyObject& y);
PyObject* predict(const clfId_t id, CPyObject& X);
double score(const clfId_t id, CPyObject& X, CPyObject& y);
void clean(const clfId_t id);
void importClass(const clfId_t id, const std::string& moduleName, const std::string& className);
PyObject* getClass(const clfId_t id);
private:
// Only call RemoveInstance from clean method
static void RemoveInstance();
void errorAbort(const std::string& message);
// No need to use static map here, since this class is a singleton
std::map<clfId_t, std::tuple<PyObject*, PyObject*, PyObject*>> moduleClassMap;
static CPyInstance* pyInstance;
static PyWrap* wrapper;
static std::mutex mutex;
};
} /* namespace pywrap */
#endif /* PYWRAP_H */

View File

@ -0,0 +1,8 @@
#include "RandomForest.h"
namespace pywrap {
std::string RandomForest::version()
{
return sklearnVersion();
}
} /* namespace pywrap */

View File

@ -0,0 +1,13 @@
#ifndef RANDOMFOREST_H
#define RANDOMFOREST_H
#include "PyClassifier.h"
namespace pywrap {
class RandomForest : public PyClassifier {
public:
RandomForest() : PyClassifier("sklearn.ensemble", "RandomForestClassifier") {};
~RandomForest() = default;
std::string version();
};
} /* namespace pywrap */
#endif /* RANDOMFOREST_H */

View File

@ -0,0 +1,15 @@
#include "STree.h"
namespace pywrap {
std::string STree::graph()
{
return callMethodString("graph");
}
void STree::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "C", "n_jobs", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */

15
src/PyClassifiers/STree.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef STREE_H
#define STREE_H
#include "nlohmann/json.hpp"
#include "PyClassifier.h"
namespace pywrap {
class STree : public PyClassifier {
public:
STree() : PyClassifier("stree", "Stree") {};
~STree() = default;
std::string graph();
void setHyperparameters(const nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* STREE_H */

15
src/PyClassifiers/SVC.cc Normal file
View File

@ -0,0 +1,15 @@
#include "SVC.h"
namespace pywrap {
std::string SVC::version()
{
return sklearnVersion();
}
void SVC::setHyperparameters(const nlohmann::json& hyperparameters)
{
// Check if hyperparameters are valid
const std::vector<std::string> validKeys = { "C", "gamma", "kernel", "random_state" };
checkHyperparameters(validKeys, hyperparameters);
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */

15
src/PyClassifiers/SVC.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef SVC_H
#define SVC_H
#include "PyClassifier.h"
namespace pywrap {
class SVC : public PyClassifier {
public:
SVC() : PyClassifier("sklearn.svm", "SVC") {};
~SVC() = default;
std::string version();
void setHyperparameters(const nlohmann::json& hyperparameters) override;
};
} /* namespace pywrap */
#endif /* STREE_H */

View File

@ -0,0 +1,6 @@
#ifndef TYPEDEF_H
#define TYPEDEF_H
namespace pywrap {
typedef uint64_t clfId_t;
}
#endif /* TYPEDEF_H */

View File

@ -0,0 +1,18 @@
#include "XGBoost.h"
See https ://stackoverflow.com/questions/36071672/using-xgboost-in-c
namespace pywrap {
std::string XGBoost::version()
{
return callMethodString("1.0");
}
} /* namespace pywrap */

View File

@ -0,0 +1,13 @@
#ifndef XGBOOST_H
#define XGBOOST_H
#include "PyClassifier.h"
namespace pywrap {
class XGBoost : public PyClassifier {
public:
XGBoost() : PyClassifier("xgboost", "XGBClassifier") {};
~XGBoost() = default;
std::string version();
};
} /* namespace pywrap */
#endif /* XGBOOST_H */