refactor importClass and valgrind

This commit is contained in:
2023-11-12 00:02:49 +01:00
parent a3bf97e501
commit c7372b7fc7
9 changed files with 1596906 additions and 44 deletions

View File

@@ -4,7 +4,7 @@ SHELL := /bin/bash
f_release = build_release
f_debug = build_debug
app_targets = example
app_targets = example test
test_targets = unit_tests_pywrap
n_procs = -j 16

1061282
callgrind.out.18795 Normal file

File diff suppressed because one or more lines are too long

513167
callgrind.out.29922 Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -5,4 +5,7 @@ include_directories(${PyWrap_SOURCE_DIR}/src)
include_directories(${TORCH_INCLUDE_DIRS})
add_executable(example example.cc)
target_link_libraries(example PyWrap)
target_link_libraries(example PyWrap)
add_executable(test test.cpp)
target_link_libraries(test ${Python3_LIBRARIES})

View File

@@ -1,10 +1,10 @@
#include <torch/torch.h>
#include "ArffFiles.h"
#include <random>
#include <algorithm>
#include <vector>
#include <string>
#include <iostream>
#include <map>
#include <tuple>
#include "STree.h"
#include "SVC.h"
#include "RandomForest.h"
@@ -36,21 +36,42 @@ tuple<Tensor, Tensor> loadDataset(const string& name, bool class_last)
return { Xd, torch::tensor(y, torch::kInt32) };
}
pair<torch::Tensor, torch::Tensor> get_train_test_indices(int size)
{
int seed = 17;
float train_size_p = 0.7;
int train_size = static_cast<int>(size * train_size_p);
int test_size = size - train_size;
std::vector<int> indices(size);
std::iota(indices.begin(), indices.end(), 0);
shuffle(indices.begin(), indices.end(), std::default_random_engine(seed));
auto train_indices = torch::zeros({ train_size }, torch::kInt32);
auto test_indices = torch::zeros({ test_size }, torch::kInt32);
for (auto i = 0; i < size; ++i) {
if (i < train_size) {
train_indices[i] = indices[i];
} else if (i < size) {
test_indices[i - train_size] = indices[i];
}
}
return { train_indices, test_indices };
}
int main(int argc, char* argv[])
{
using json = nlohmann::json;
cout << "* Begin." << endl;
{
using namespace torch::indexing;
auto datasetName = "iris";
auto datasetName = "wine";
bool class_last = true;
auto [X, y] = loadDataset(datasetName, class_last);
auto m = y.size(0);
int train_split = m * .7;
auto Xtrain = X.index({ "...", Slice(0, train_split) });
auto ytrain = y.index({ Slice(0, train_split) });
auto Xtest = X.index({ "...", Slice(train_split, m) });
auto ytest = y.index({ Slice(train_split, m) });
// Split train/test
auto [train_indices, test_indices] = get_train_test_indices(X.size(1));
auto Xtrain = X.index({ "...", train_indices });
auto ytrain = y.index({ train_indices });
auto Xtest = X.index({ "...", test_indices });
auto ytest = y.index({ test_indices });
cout << "Dataset: " << datasetName << endl;
cout << "X: " << X.sizes() << endl;
cout << "y: " << y.sizes() << endl;
@@ -62,36 +83,36 @@ int main(int argc, char* argv[])
// STree
//
auto clf = pywrap::STree();
clf.fit(X, y);
double clf_score = clf.score(X, y);
auto stree = pywrap::STree();
auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}");
stree.setHyperparameters(hyperparameters);
cout << "STree Version: " << clf.version() << endl;
auto prediction = clf.predict(X);
cout << "Prediction: " << endl << "{";
for (int i = 0; i < prediction.size(0); ++i) {
cout << prediction[i].item<int>() << ", ";
}
cout << "}" << endl;
clf.fit(Xtrain, ytest);
double clf_score = clf.score(Xtest, ytest);
// auto stree = pywrap::STree();
// auto hyperparameters = json::parse("{\"C\": 0.7, \"max_iter\": 10000, \"kernel\": \"rbf\", \"random_state\": 17}");
// stree.setHyperparameters(hyperparameters);
// cout << "STree Version: " << clf.version() << endl;
// auto prediction = clf.predict(X);
// cout << "Prediction: " << endl << "{";
// for (int i = 0; i < prediction.size(0); ++i) {
// cout << prediction[i].item<int>() << ", ";
// }
// cout << "}" << endl;
//
// SVC
//
auto svc = pywrap::SVC();
cout << "SVC with hyperparameters" << endl;
svc.fit(X, y);
// auto svc = pywrap::SVC();
// cout << "SVC with hyperparameters" << endl;
// svc.fit(Xtrain, ytrain);
//
// Random Forest
//
cout << "Building Random Forest" << endl;
auto rf = pywrap::RandomForest();
rf.fit(Xtrain, ytrain);
// cout << "Building Random Forest" << endl;
// auto rf = pywrap::RandomForest();
// rf.fit(Xtrain, ytrain);
//
// XGBoost
//
cout << "Building XGBoost" << endl;
auto xg = pywrap::XGBoost();
cout << "Fitting XGBoost" << endl;
// cout << "Building XGBoost" << endl;
// auto xg = pywrap::XGBoost();
// cout << "Fitting XGBoost" << endl;
// xg.fit(Xtrain, ytrain);
// double xg_score = xg.score(Xtest, ytest);
//
@@ -100,10 +121,10 @@ int main(int argc, char* argv[])
cout << "Scoring dataset: " << datasetName << endl;
cout << "Scores:" << endl;
cout << "STree Score ......: " << clf_score << endl;
cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl;
cout << "SVC Score ........: " << svc.score(X, y) << endl;
// cout << "STree train/test .: " << clf.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
// cout << "STree hyper score : " << stree.fit(Xtrain, ytrain).score(Xtest, ytest) << endl;
// cout << "RandomForest Score: " << rf.score(Xtest, ytest) << endl;
// cout << "SVC Score ........: " << svc.score(Xtest, ytest) << endl;
// cout << "XGBoost Score ....: " << xg_score << endl;
}
cout << "* End." << endl;

85
example/test.cpp Normal file
View File

@@ -0,0 +1,85 @@
#include <Python.h>
#include <iostream>
#include <string>
class Test {
public:
Test() = default;
~Test()
{
Py_DECREF(test);
Py_DECREF(object);
}
void create()
{
test = Py_BuildValue("s", "Hello World from a method in Embedded Python!!!");
std::cout << "test " << test->ob_refcnt << std::endl;
std::string moduleName = "sklearn.svm.SVC";
PyObject* pyModuleName = PyUnicode_FromString(moduleName.c_str());
std::cout << "py_module_name " << pyModuleName->ob_refcnt << std::endl;
object = PyImport_AddModuleObject(pyModuleName);
if (PyErr_Occurred()) {
std::cerr << "Couldn't import module " + moduleName;
Py_XDECREF(test);
Py_XDECREF(object);
Py_XDECREF(pyModuleName);
exit(1);
}
std::cout << "module " << object->ob_refcnt << std::endl;
Py_XDECREF(pyModuleName);
}
PyObject* test;
PyObject* object;
};
void print(PyObject* object, std::string name)
{
std::cout << name << " " << object->ob_refcnt << std::endl;
}
int main(int argc, char* argv[])
{
Py_Initialize();
std::string moduleName = "sklearn.svm";
std::cout << "1a" << std::endl;
PyObject* module = PyImport_ImportModule(moduleName.c_str());
if (PyErr_Occurred()) {
std::cerr << "Couldn't import module " << moduleName;
exit(1);
}
print(module, "module");
std::cout << "1b" << std::endl;
std::string className = "SVC";
PyObject* classObject = PyObject_GetAttrString(module, className.c_str());
if (PyErr_Occurred()) {
std::cerr << "Couldn't find class " << className;
exit(1);
}
print(classObject, "classObject");
std::cout << "1c" << std::endl;
PyObject* instance = PyObject_CallObject(classObject, NULL);
if (PyErr_Occurred()) {
std::cerr << "Couldn't create instance of class " << className;
exit(1);
}
print(instance, "instance");
std::cout << std::string(40, '-') << std::endl;
print(module, "module");
print(classObject, "classObject");
print(instance, "instance");
std::cout << "decref instance" << std::endl;
Py_DECREF(instance);
std::cout << "decref classobject" << std::endl;
Py_DECREF(classObject);
std::cout << "decref module" << std::endl;
Py_DECREF(module);
std::cout << std::string(40, '-') << std::endl;
print(module, "after decref module");
print(classObject, "after decref classObject");
print(instance, "after decref instance");
std::cout << "**call finalize" << std::endl;
Py_Finalize();
std::cout << "**after finalize" << std::endl;
return 0;
}

22296
res.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -37,27 +37,25 @@ namespace pywrap {
}
void PyWrap::importClass(const clfId_t id, const std::string& moduleName, const std::string& className)
{
std::lock_guard<std::mutex> lock(mutex);
auto result = moduleClassMap.find(id);
if (result != moduleClassMap.end()) {
return;
}
CPyObject module = PyImport_ImportModule(moduleName.c_str());
std::cout << "1a" << std::endl;
PyObject* module = PyImport_ImportModule(moduleName.c_str());
if (PyErr_Occurred()) {
errorAbort("Couldn't import module " + moduleName);
}
CPyObject classObject = PyObject_GetAttrString(module, className.c_str());
PyObject* classObject = PyObject_GetAttrString(module, className.c_str());
if (PyErr_Occurred()) {
errorAbort("Couldn't find class " + className);
}
CPyObject instance = PyObject_CallObject(classObject, NULL);
PyObject* instance = PyObject_CallObject(classObject, NULL);
if (PyErr_Occurred()) {
errorAbort("Couldn't create instance of class " + className);
}
std::lock_guard<std::mutex> lock(mutex);
module.AddRef();
classObject.AddRef();
instance.AddRef();
moduleClassMap.insert({ id, { module.getObject(), classObject.getObject(), instance.getObject() } });
moduleClassMap.insert({ id, { module, classObject, instance } });
}
void PyWrap::clean(const clfId_t id)
{

View File

@@ -1,5 +1,15 @@
#include "XGBoost.h"
See https ://stackoverflow.com/questions/36071672/using-xgboost-in-c
namespace pywrap {
std::string XGBoost::version()
{