Implement hyperparameters
This commit is contained in:
@@ -1,9 +1,8 @@
|
||||
include_directories(${PyWrap_SOURCE_DIR}/lib/Files)
|
||||
include_directories(${PyWrap_SOURCE_DIR}/lib/json/include)
|
||||
include_directories(${Python3_INCLUDE_DIRS})
|
||||
include_directories(${TORCH_INCLUDE_DIRS})
|
||||
|
||||
add_executable(main main.cc STree.cc SVC.cc RandomForest.cc PyClassifier.cc PyWrap.cc)
|
||||
add_executable(example example.cpp PyWrap.cc)
|
||||
|
||||
target_link_libraries(main ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles)
|
||||
target_link_libraries(example ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles)
|
||||
|
13
src/Classifier.h
Normal file
13
src/Classifier.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#ifndef CLASSIFER_H
|
||||
#define CLASSIFER_H
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace pywrap {
|
||||
class Classifier {
|
||||
public:
|
||||
Classifier() = default;
|
||||
virtual ~Classifier() = default;
|
||||
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
|
||||
};
|
||||
} /* namespace pywrap */
|
||||
#endif /* CLASSIFER_H */
|
@@ -1,9 +1,10 @@
|
||||
#include "PyClassifier.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace pywrap {
|
||||
namespace bp = boost::python;
|
||||
namespace np = boost::python::numpy;
|
||||
PyClassifier::PyClassifier(const std::string& module, const std::string& className) : module(module), className(className)
|
||||
PyClassifier::PyClassifier(const std::string& module, const std::string& className) : module(module), className(className), fitted(false)
|
||||
{
|
||||
pyWrap = PyWrap::GetInstance();
|
||||
pyWrap->importClass(module, className);
|
||||
@@ -36,10 +37,14 @@ namespace pywrap {
|
||||
}
|
||||
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
|
||||
{
|
||||
if (!fitted && hyperparameters.size() > 0) {
|
||||
std::cout << "Setting hyperparameters" << std::endl;
|
||||
}
|
||||
auto [Xn, yn] = tensors2numpy(X, y);
|
||||
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
|
||||
CPyObject yp = bp::incref(bp::object(yn).ptr());
|
||||
pyWrap->fit(module, this->className, Xp, yp);
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
torch::Tensor PyClassifier::predict(torch::Tensor& X)
|
||||
@@ -69,4 +74,19 @@ namespace pywrap {
|
||||
auto result = pyWrap->score(module, className, Xp, yp);
|
||||
return result;
|
||||
}
|
||||
void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||
{
|
||||
// Check if hyperparameters are valid, default is no hyperparameters
|
||||
const std::vector<std::string> validKeys = { };
|
||||
checkHyperparameters(validKeys, hyperparameters);
|
||||
this->hyperparameters = hyperparameters;
|
||||
}
|
||||
void PyClassifier::checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters)
|
||||
{
|
||||
for (const auto& item : hyperparameters.items()) {
|
||||
if (find(validKeys.begin(), validKeys.end(), item.key()) == validKeys.end()) {
|
||||
throw std::invalid_argument("Hyperparameter " + item.key() + " is not valid");
|
||||
}
|
||||
}
|
||||
}
|
||||
} /* namespace pywrap */
|
@@ -2,15 +2,17 @@
|
||||
#define PYCLASSIFER_H
|
||||
#include "boost/python/detail/wrap_python.hpp"
|
||||
#include <boost/python/numpy.hpp>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <torch/torch.h>
|
||||
#include "PyWrap.h"
|
||||
#include "Classifier.h"
|
||||
|
||||
namespace pywrap {
|
||||
class PyClassifier {
|
||||
class PyClassifier : public Classifier {
|
||||
public:
|
||||
PyClassifier(const std::string& module, const std::string& className);
|
||||
virtual ~PyClassifier();
|
||||
@@ -19,11 +21,15 @@ namespace pywrap {
|
||||
double score(torch::Tensor& X, torch::Tensor& y);
|
||||
std::string version();
|
||||
std::string callMethodString(const std::string& method);
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
protected:
|
||||
void checkHyperparameters(const std::vector<std::string>& validKeys, const nlohmann::json& hyperparameters);
|
||||
nlohmann::json hyperparameters;
|
||||
private:
|
||||
PyWrap* pyWrap;
|
||||
std::string module;
|
||||
std::string className;
|
||||
bool fitted;
|
||||
};
|
||||
|
||||
} /* namespace pywrap */
|
||||
#endif /* PYCLASSIFER_H */
|
@@ -5,4 +5,11 @@ namespace pywrap {
|
||||
{
|
||||
return callMethodString("graph");
|
||||
}
|
||||
void STree::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||
{
|
||||
// Check if hyperparameters are valid
|
||||
const std::vector<std::string> validKeys = { "C", "n_jobs", "kernel", "max_iter", "max_depth", "random_state", "multiclass_strategy" };
|
||||
checkHyperparameters(validKeys, hyperparameters);
|
||||
this->hyperparameters = hyperparameters;
|
||||
}
|
||||
} /* namespace pywrap */
|
@@ -1,5 +1,6 @@
|
||||
#ifndef STREE_H
|
||||
#define STREE_H
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "PyClassifier.h"
|
||||
|
||||
namespace pywrap {
|
||||
@@ -8,6 +9,7 @@ namespace pywrap {
|
||||
STree() : PyClassifier("stree", "Stree") {};
|
||||
~STree() = default;
|
||||
std::string graph();
|
||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
||||
};
|
||||
} /* namespace pywrap */
|
||||
#endif /* STREE_H */
|
257
src/example.cpp
257
src/example.cpp
@@ -1,257 +0,0 @@
|
||||
#include "boost/python/detail/wrap_python.hpp"
|
||||
#include <boost/python/numpy.hpp>
|
||||
#include <torch/torch.h>
|
||||
#include <torch/csrc/utils/tensor_numpy.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "ArffFiles.h"
|
||||
#include "PyHelper.hpp"
|
||||
#include "PyWrap.h"
|
||||
|
||||
|
||||
void errorAbort(const std::string& message)
|
||||
{
|
||||
std::cerr << message << std::endl;
|
||||
PyErr_Print();
|
||||
exit(1);
|
||||
}
|
||||
void print_array(pywrap::np::ndarray& array)
|
||||
{
|
||||
std::cout << "Array: " << std::endl;
|
||||
std::cout << pywrap::p::extract<char const*>(pywrap::p::str(array)) << std::endl;
|
||||
}
|
||||
// np::ndarray to_numpy_matrix(torch::Tensor& input_data, np::dtype numpy_dtype)
|
||||
// {
|
||||
// p::tuple shape = p::make_tuple(input_data.size(0), input_data.size(1));
|
||||
// auto tensor_dtype = input_data.dtype();
|
||||
// p::tuple stride = p::make_tuple(sizeof(tensor_dtype) * input_data.size(1), sizeof(tensor_dtype));
|
||||
// auto dito = input_data.transpose(1, 0);
|
||||
// np::ndarray result = np::from_data(dito.data_ptr(), numpy_dtype, shape, stride, p::object());
|
||||
// return result;
|
||||
// }
|
||||
// np::ndarray to_numpy_vector(torch::Tensor& input_data, np::dtype numpy_dtype)
|
||||
// {
|
||||
// p::tuple shape = p::make_tuple(input_data.size(0));
|
||||
// auto tensor_dtype = input_data.dtype();
|
||||
// p::tuple stride = p::make_tuple(sizeof(tensor_dtype), sizeof(tensor_dtype));
|
||||
// np::ndarray result = np::from_data(input_data.data_ptr(), numpy_dtype, shape, stride, p::object());
|
||||
// return result;
|
||||
// }
|
||||
|
||||
class Paths {
|
||||
public:
|
||||
static string datasets()
|
||||
{
|
||||
return "../discretizbench/datasets/";
|
||||
}
|
||||
};
|
||||
|
||||
tuple<torch::Tensor, torch::Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(const string& name, bool class_last)
|
||||
{
|
||||
auto handler = ArffFiles();
|
||||
handler.load(Paths::datasets() + static_cast<string>(name) + ".arff", class_last);
|
||||
// Get Dataset X, y
|
||||
vector<vector<float>> X = handler.getX();
|
||||
vector<int> y = handler.getY();
|
||||
// Get className & Features
|
||||
auto className = handler.getClassName();
|
||||
vector<string> features;
|
||||
auto attributes = handler.getAttributes();
|
||||
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
|
||||
torch::Tensor Xd;
|
||||
auto states = map<string, vector<int>>();
|
||||
Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
|
||||
for (int i = 0; i < features.size(); ++i) {
|
||||
Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32));
|
||||
}
|
||||
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
|
||||
}
|
||||
|
||||
using namespace pywrap;
|
||||
np::ndarray tensor2numpy(torch::Tensor& X)
|
||||
{
|
||||
int m = X.size(0);
|
||||
int n = X.size(1);
|
||||
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<float>(), p::make_tuple(m, n), p::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2), p::object());
|
||||
Xn = Xn.transpose();
|
||||
return Xn;
|
||||
}
|
||||
pair<np::ndarray, np::ndarray> tensors2numpy(torch::Tensor& X, torch::Tensor& y)
|
||||
{
|
||||
int n = X.size(1);
|
||||
auto yn = np::from_data(y.data_ptr(), np::dtype::get_builtin<int32_t>(), p::make_tuple(n), p::make_tuple(sizeof(y.dtype()) * 2), p::object());
|
||||
return { tensor2numpy(X), yn };
|
||||
}
|
||||
pair<np::ndarray, np::ndarray> getData(const string& dataset)
|
||||
{
|
||||
auto [X, y, featuresx, classNamex, statesx] = loadDataset(dataset, true);
|
||||
auto [Xn, yn] = tensors2numpy(X, y);
|
||||
auto Xn_shapes = Xn.get_shape();
|
||||
auto yn_shapes = yn.get_shape();
|
||||
cout << "Xn_shapes: " << Xn_shapes[0] << ", " << Xn_shapes[1] << endl;
|
||||
cout << "yn_shapes: " << yn_shapes[0] << endl;
|
||||
cout << "X shapes: " << X.sizes() << endl;
|
||||
cout << "y shapes: " << y.sizes() << endl;
|
||||
assert(Xn_shapes[0] == X.sizes()[0]);
|
||||
assert(Xn_shapes[1] == X.sizes()[1]);
|
||||
assert(yn_shapes[0] == y.sizes()[0]);
|
||||
|
||||
return { Xn, yn };
|
||||
}
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
cout << "* Begin." << endl;
|
||||
{
|
||||
PyWrap* wrapper = PyWrap::GetInstance();
|
||||
string dataset = "iris";
|
||||
// Convert Tensor to numpy array
|
||||
// auto [Xn, yn] = tensors2numpy(X, y);
|
||||
// cout << "Numpy array data: " << endl;
|
||||
// print_array(Xn);
|
||||
// cout << "Numpy array labels: " << endl;
|
||||
// print_array(yn);
|
||||
// Import module
|
||||
string moduleName = "stree";
|
||||
string className = "Stree";
|
||||
// Import
|
||||
{
|
||||
cout << "--Import Phase--" << endl;
|
||||
wrapper->importClass(moduleName, className);
|
||||
cout << "--Import Phase end--" << endl;
|
||||
}
|
||||
// Version
|
||||
{
|
||||
cout << "--Version Phase--" << endl;
|
||||
auto version = wrapper->version(moduleName, className);
|
||||
cout << "Version: " << version << endl;
|
||||
cout << "--Version Phase end--" << endl;
|
||||
}
|
||||
// Fit
|
||||
{
|
||||
cout << "--Fit Phase--" << endl;
|
||||
auto [Xn, yn] = getData(dataset);
|
||||
auto Xn_shapes = Xn.get_shape();
|
||||
auto yn_shapes = yn.get_shape();
|
||||
CPyObject Xp = boost::python::incref(boost::python::object(Xn).ptr());
|
||||
CPyObject yp = boost::python::incref(boost::python::object(yn).ptr());
|
||||
//print_array(yn);
|
||||
// Call fit
|
||||
cout << "Calling fit" << endl;
|
||||
wrapper->fit(moduleName, className, Xp, yp);
|
||||
cout << "--Fit Phase end--" << endl;
|
||||
}
|
||||
// Score
|
||||
{
|
||||
cout << "--Score Phase--" << endl;
|
||||
auto [Xn, yn] = getData(dataset);
|
||||
auto Xn_shapes = Xn.get_shape();
|
||||
auto yn_shapes = yn.get_shape();
|
||||
CPyObject Xp = boost::python::incref(boost::python::object(Xn).ptr());
|
||||
CPyObject yp = boost::python::incref(boost::python::object(yn).ptr());
|
||||
//print_array(yn);
|
||||
// Call score
|
||||
cout << "Calling score" << endl;
|
||||
auto result = wrapper->score(moduleName, className, Xp, yp);
|
||||
cout << "Score: " << result << endl;
|
||||
cout << "--Score Phase end--" << endl;
|
||||
}
|
||||
// Call score
|
||||
// {
|
||||
// np::initialize();
|
||||
// cout << "--Score Phase--" << endl;
|
||||
// auto [X, y, featuresx, classNamex, statesx] = loadDataset(dataset, true);
|
||||
// auto [Xn, yn] = tensors2numpy(X, y);
|
||||
// auto Xn_shapes = Xn.get_shape();
|
||||
// auto yn_shapes = yn.get_shape();
|
||||
// cout << "Xn_shapes: " << Xn_shapes[0] << ", " << Xn_shapes[1] << endl;
|
||||
// cout << "yn_shapes: " << yn_shapes[0] << endl;
|
||||
// cout << "X shapes: " << X.sizes() << endl;
|
||||
// cout << "y shapes: " << y.sizes() << endl;
|
||||
// assert(Xn_shapes[0] == X.sizes()[0]);
|
||||
// assert(Xn_shapes[1] == X.sizes()[1]);
|
||||
// assert(yn_shapes[0] == y.sizes()[0]);
|
||||
// CPyObject Xp = Xn.ptr();
|
||||
// CPyObject yp = yn.ptr();
|
||||
// print_array(yn);
|
||||
// cout << "Calling score" << endl;
|
||||
// auto instance = wrapper->getClass(moduleName, className);
|
||||
// CPyObject result;
|
||||
// if (!(result = PyObject_CallMethod(instance, "score", "OO", Xp.getObject(), yp.getObject())))
|
||||
// errorAbort("Couldn't call method score");
|
||||
// auto score = PyFloat_AsDouble(result);
|
||||
// //auto score = wrapper->score(moduleName, className, Xp, yp);
|
||||
// cout << "Score: " << score << endl;
|
||||
// cout << "--Score Phase end--" << endl;
|
||||
// }
|
||||
// Clean module
|
||||
{
|
||||
cout << "--Clean Phase--" << endl;
|
||||
wrapper->clean(moduleName, className);
|
||||
cout << "--Clean Phase end--" << endl;
|
||||
}
|
||||
}
|
||||
cout << "* End." << endl;
|
||||
}
|
||||
// int main(int argc, char** argv)
|
||||
// {
|
||||
// auto [data_tensor, y_label, featuresx, classNamex, statesx] = loadDataset("iris", true);
|
||||
// // CPyInstance pInstance;
|
||||
// // auto wrapper = PyWrap();
|
||||
// PyWrap* wrapper = PyWrap::GetInstance();
|
||||
// // PyWrap* wrapper = PyWrap::GetInstance();
|
||||
// int m = data_tensor.size(0);
|
||||
// int n = data_tensor.size(1);
|
||||
// auto data_numpy = np::from_data(data_tensor.data_ptr(), np::dtype::get_builtin<float>(), p::make_tuple(m, n), p::make_tuple(sizeof(data_tensor.dtype()) * 2 * n, sizeof(data_tensor.dtype()) * 2), p::object());
|
||||
// data_numpy = data_numpy.transpose();
|
||||
// auto y_numpy = np::from_data(y_label.data_ptr(), np::dtype::get_builtin<int32_t>(), p::make_tuple(n), p::make_tuple(sizeof(y_label.dtype()) * 2), p::object());
|
||||
// cout << "Numpy array data: " << endl;
|
||||
// print_array(data_numpy);
|
||||
// cout << "Numpy array labels: " << endl;
|
||||
// print_array(y_numpy);
|
||||
// cout << "primero" << endl;
|
||||
// CPyObject p = data_numpy.ptr();
|
||||
// CPyObject yp = y_numpy.ptr();
|
||||
// string moduleName = "sklearn.svm";
|
||||
// string className = "SVC";
|
||||
// string method = "_repr_html_";
|
||||
// // CPyObject module = PyImport_ImportModule(moduleName.c_str());
|
||||
// // if (PyErr_Occurred()) {
|
||||
// // errorAbort("Could't import module " + moduleName);
|
||||
// // }
|
||||
// // CPyObject classObject = PyObject_GetAttrString(module, className.c_str());
|
||||
// // if (PyErr_Occurred()) {
|
||||
// // errorAbort("Couldn't find class " + className);
|
||||
// // }
|
||||
// // CPyObject instance = PyObject_CallObject(classObject, NULL);
|
||||
// // if (PyErr_Occurred()) {
|
||||
// // errorAbort("Couldn't create instance of class " + className);
|
||||
// // }
|
||||
// // wrapper.moduleClassMap.insert({ { moduleName, className }, { module, classObject, instance } });
|
||||
// wrapper->importClass(moduleName, className);
|
||||
// PyObject* instance = wrapper->getClass(moduleName, className);
|
||||
// CPyObject result;
|
||||
// if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL)))
|
||||
// errorAbort("Couldn't call method " + method);
|
||||
// std::string value = PyUnicode_AsUTF8(result);
|
||||
// cout << "Version: " << value << endl;
|
||||
// cout << "Calling fit" << endl;
|
||||
// p.AddRef();
|
||||
// yp.AddRef();
|
||||
// method = "fit";
|
||||
// wrapper->fit(moduleName, className, p, yp);
|
||||
// // PyObject* instance2 = wrapper->getClass(moduleName, className);
|
||||
// // if (!(result = PyObject_CallMethodObjArgs(instance2, PyUnicode_FromString(method.c_str()), p.getObject(), yp.getObject(), NULL)))
|
||||
// // errorAbort("Couldn't call method fit");
|
||||
// // method = "fit";
|
||||
// // if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), p.getObject(), yp.getObject(), NULL)))
|
||||
// // errorAbort("Couldn't call method fit");
|
||||
// cout << "Calling score" << endl;
|
||||
// // method = "score";
|
||||
// // if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), p.getObject(), yp.getObject(), NULL)))
|
||||
// // errorAbort("Couldn't call method score");
|
||||
// // float score = PyFloat_AsDouble(result);
|
||||
// auto score = wrapper->score(moduleName, className, p, yp);
|
||||
// cout << "Score: " << score << endl;
|
||||
// wrapper->clean(moduleName, className);
|
||||
// return 0;
|
||||
// }
|
@@ -52,6 +52,8 @@ int main(int argc, char* argv[])
|
||||
cout << "X: " << X.sizes() << endl;
|
||||
cout << "y: " << y.sizes() << endl;
|
||||
auto clf = pywrap::STree();
|
||||
auto hyperparameters = nlohmann::json({ "max_depth": 3, "C" : 0.7 });
|
||||
clf.setHyperparameters(hyperparameters);
|
||||
cout << "STree Version: " << clf.version() << endl;
|
||||
auto svc = pywrap::SVC();
|
||||
svc.fit(X, y, features, className, states);
|
||||
|
Reference in New Issue
Block a user