refactor importClass and valgrind

This commit is contained in:
2023-11-12 00:02:49 +01:00
parent a3bf97e501
commit c7372b7fc7
9 changed files with 1596906 additions and 44 deletions

View File

@@ -4,7 +4,7 @@ SHELL := /bin/bash
f_release = build_release f_release = build_release
f_debug = build_debug f_debug = build_debug
app_targets = example app_targets = example test
test_targets = unit_tests_pywrap test_targets = unit_tests_pywrap
n_procs = -j 16 n_procs = -j 16

1061282
callgrind.out.18795 Normal file

File diff suppressed because one or more lines are too long

513167
callgrind.out.29922 Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -6,3 +6,6 @@ include_directories(${TORCH_INCLUDE_DIRS})
add_executable(example example.cc) add_executable(example example.cc)
target_link_libraries(example PyWrap) target_link_libraries(example PyWrap)
add_executable(test test.cpp)
target_link_libraries(test ${Python3_LIBRARIES})

View File

@@ -1,10 +1,10 @@
#include <torch/torch.h> #include <torch/torch.h>
#include "ArffFiles.h" #include "ArffFiles.h"
#include <random>
#include <algorithm>
#include <vector> #include <vector>
#include <string> #include <string>
#include <iostream> #include <iostream>
#include <map>
#include <tuple>
#include "STree.h" #include "STree.h"
#include "SVC.h" #include "SVC.h"
#include "RandomForest.h" #include "RandomForest.h"
@@ -36,21 +36,42 @@ tuple<Tensor, Tensor> loadDataset(const string& name, bool class_last)
return { Xd, torch::tensor(y, torch::kInt32) }; return { Xd, torch::tensor(y, torch::kInt32) };
} }
pair<torch::Tensor, torch::Tensor> get_train_test_indices(int size)
{
int seed = 17;
float train_size_p = 0.7;
int train_size = static_cast<int>(size * train_size_p);
int test_size = size - train_size;
std::vector<int> indices(size);
std::iota(indices.begin(), indices.end(), 0);
shuffle(indices.begin(), indices.end(), std::default_random_engine(seed));
auto train_indices = torch::zeros({ train_size }, torch::kInt32);
auto test_indices = torch::zeros({ test_size }, torch::kInt32);
for (auto i = 0; i < size; ++i) {
if (i < train_size) {
train_indices[i] = indices[i];
} else if (i < size) {
test_indices[i - train_size] = indices[i];
}
}
return { train_indices, test_indices };
}
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
using json = nlohmann::json; using json = nlohmann::json;
cout << "* Begin." << endl; cout << "* Begin." << endl;
{ {
using namespace torch::indexing; using namespace torch::indexing;
auto datasetName = "iris"; auto datasetName = "wine";
bool class_last = true; bool class_last = true;
auto [X, y] = loadDataset(datasetName, class_last); auto [X, y] = loadDataset(datasetName, class_last);
auto m = y.size(0); // Split train/test
int train_split = m * .7; auto [train_indices, test_indices] = get_train_test_indices(X.size(1));
auto Xtrain = X.index({ "...", Slice(0, train_split) }); auto Xtrain = X.index({ "...", train_indices });
auto ytrain = y.index({ Slice(0, train_split) }); auto ytrain = y.index({ train_indices });
auto Xtest = X.index({ "...", Slice(train_split, m) }); auto Xtest = X.index({ "...", test_indices });
auto ytest = y.index({ Slice(train_split, m) }); auto ytest = y.index({ test_indices });
cout << "Dataset: " << datasetName << endl; cout << "Dataset: " << datasetName << endl;
cout << "X: " << X.sizes() << endl; cout << "X: " << X.sizes() << endl;
cout << "y: " << y.sizes() << endl; cout << "y: " << y.sizes() << endl;
@@ -62,36 +83,36 @@ int main(int argc, char* argv[])
// STree // STree
// //
auto clf = pywrap::STree(); auto clf = pywrap::STree();
clf.fit(X, y); clf.fit(Xtrain, ytest);
double clf_score = clf.score(X, y); double clf_score = clf.score(Xtest, ytest);
auto stree = pywrap::STree(); // auto stree = pywrap::STree();
auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}"); // auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}");
stree.setHyperparameters(hyperparameters); // stree.setHyperparameters(hyperparameters);
cout << "STree Version: " << clf.version() << endl; // cout << "STree Version: " << clf.version() << endl;
auto prediction = clf.predict(X); // auto prediction = clf.predict(X);
cout << "Prediction: " << endl << "{"; // cout << "Prediction: " << endl << "{";
for (int i = 0; i < prediction.size(0); ++i) { // for (int i = 0; i < prediction.size(0); ++i) {
cout << prediction[i].item<int>() << ", "; // cout << prediction[i].item<int>() << ", ";
} // }
cout << "}" << endl; // cout << "}" << endl;
// //
// SVC // SVC
// //
auto svc = pywrap::SVC(); // auto svc = pywrap::SVC();
cout << "SVC with hyperparameters" << endl; // cout << "SVC with hyperparameters" << endl;
svc.fit(X, y); // svc.fit(Xtrain, ytrain);
// //
// Random Forest // Random Forest
// //
cout << "Building Random Forest" << endl; // cout << "Building Random Forest" << endl;
auto rf = pywrap::RandomForest(); // auto rf = pywrap::RandomForest();
rf.fit(Xtrain, ytrain); // rf.fit(Xtrain, ytrain);
// //
// XGBoost // XGBoost
// //
cout << "Building XGBoost" << endl; // cout << "Building XGBoost" << endl;
auto xg = pywrap::XGBoost(); // auto xg = pywrap::XGBoost();
cout << "Fitting XGBoost" << endl; // cout << "Fitting XGBoost" << endl;
// xg.fit(Xtrain, ytrain); // xg.fit(Xtrain, ytrain);
// double xg_score = xg.score(Xtest, ytest); // double xg_score = xg.score(Xtest, ytest);
// //
@@ -100,10 +121,10 @@ int main(int argc, char* argv[])
cout << "Scoring dataset: " << datasetName << endl; cout << "Scoring dataset: " << datasetName << endl;
cout << "Scores:" << endl; cout << "Scores:" << endl;
cout << "STree Score ......: " << clf_score << endl; cout << "STree Score ......: " << clf_score << endl;
cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl; // cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl; // cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl; // cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl;
cout << "SVC Score ........: " << svc.score(X, y) << endl; // cout << "SVC Score ........: " << svc.score(Xtest, ytest) << endl;
// cout << "XGBoost Score ....: " << xg_score << endl; // cout << "XGBoost Score ....: " << xg_score << endl;
} }
cout << "* End." << endl; cout << "* End." << endl;

85
example/test.cpp Normal file
View File

@@ -0,0 +1,85 @@
#include <Python.h>
#include <iostream>
#include <string>
class Test {
public:
Test() = default;
~Test()
{
Py_DECREF(test);
Py_DECREF(object);
}
void create()
{
test = Py_BuildValue("s", "Hello World from a method in Embedded Python!!!");
std::cout << "test " << test->ob_refcnt << std::endl;
std::string moduleName = "sklearn.svm.SVC";
PyObject* pyModuleName = PyUnicode_FromString(moduleName.c_str());
std::cout << "py_module_name " << pyModuleName->ob_refcnt << std::endl;
object = PyImport_AddModuleObject(pyModuleName);
if (PyErr_Occurred()) {
std::cerr << "Couldn't import module " + moduleName;
Py_XDECREF(test);
Py_XDECREF(object);
Py_XDECREF(pyModuleName);
exit(1);
}
std::cout << "module " << object->ob_refcnt << std::endl;
Py_XDECREF(pyModuleName);
}
PyObject* test;
PyObject* object;
};
void print(PyObject* object, std::string name)
{
std::cout << name << " " << object->ob_refcnt << std::endl;
}
int main(int argc, char* argv[])
{
Py_Initialize();
std::string moduleName = "sklearn.svm";
std::cout << "1a" << std::endl;
PyObject* module = PyImport_ImportModule(moduleName.c_str());
if (PyErr_Occurred()) {
std::cerr << "Couldn't import module " << moduleName;
exit(1);
}
print(module, "module");
std::cout << "1b" << std::endl;
std::string className = "SVC";
PyObject* classObject = PyObject_GetAttrString(module, className.c_str());
if (PyErr_Occurred()) {
std::cerr << "Couldn't find class " << className;
exit(1);
}
print(classObject, "classObject");
std::cout << "1c" << std::endl;
PyObject* instance = PyObject_CallObject(classObject, NULL);
if (PyErr_Occurred()) {
std::cerr << "Couldn't create instance of class " << className;
exit(1);
}
print(instance, "instance");
std::cout << std::string(40, '-') << std::endl;
print(module, "module");
print(classObject, "classObject");
print(instance, "instance");
std::cout << "decref instance" << std::endl;
Py_DECREF(instance);
std::cout << "decref classobject" << std::endl;
Py_DECREF(classObject);
std::cout << "decref module" << std::endl;
Py_DECREF(module);
std::cout << std::string(40, '-') << std::endl;
print(module, "after decref module");
print(classObject, "after decref classObject");
print(instance, "after decref instance");
std::cout << "**call finalize" << std::endl;
Py_Finalize();
std::cout << "**after finalize" << std::endl;
return 0;
}

22296
res.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -37,27 +37,25 @@ namespace pywrap {
} }
void PyWrap::importClass(const clfId_t id, const std::string& moduleName, const std::string& className) void PyWrap::importClass(const clfId_t id, const std::string& moduleName, const std::string& className)
{ {
std::lock_guard<std::mutex> lock(mutex);
auto result = moduleClassMap.find(id); auto result = moduleClassMap.find(id);
if (result != moduleClassMap.end()) { if (result != moduleClassMap.end()) {
return; return;
} }
CPyObject module = PyImport_ImportModule(moduleName.c_str()); std::cout << "1a" << std::endl;
PyObject* module = PyImport_ImportModule(moduleName.c_str());
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
errorAbort("Couldn't import module " + moduleName); errorAbort("Couldn't import module " + moduleName);
} }
CPyObject classObject = PyObject_GetAttrString(module, className.c_str()); PyObject* classObject = PyObject_GetAttrString(module, className.c_str());
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
errorAbort("Couldn't find class " + className); errorAbort("Couldn't find class " + className);
} }
CPyObject instance = PyObject_CallObject(classObject, NULL); PyObject* instance = PyObject_CallObject(classObject, NULL);
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
errorAbort("Couldn't create instance of class " + className); errorAbort("Couldn't create instance of class " + className);
} }
std::lock_guard<std::mutex> lock(mutex); moduleClassMap.insert({ id, { module, classObject, instance } });
module.AddRef();
classObject.AddRef();
instance.AddRef();
moduleClassMap.insert({ id, { module.getObject(), classObject.getObject(), instance.getObject() } });
} }
void PyWrap::clean(const clfId_t id) void PyWrap::clean(const clfId_t id)
{ {

View File

@@ -1,5 +1,15 @@
#include "XGBoost.h" #include "XGBoost.h"
See https ://stackoverflow.com/questions/36071672/using-xgboost-in-c
namespace pywrap { namespace pywrap {
std::string XGBoost::version() std::string XGBoost::version()
{ {