From bec04bc3a6a4ae91546d67130fa53d59dcf1736c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?=
 <rmontanana@gmail.com>
Date: Fri, 3 Nov 2023 22:00:46 +0100
Subject: [PATCH] Passing numpy working partially

---
 CMakeLists.txt      |   1 +
 Makefile            |   2 +-
 src/CMakeLists.txt  |   5 +-
 src/PyClassifier.cc |  56 ++++++++++++---
 src/PyHelper.hpp    |  94 ++++++++++++++++++++++++++
 src/PyWrap.cc       |  38 ++++++-----
 src/example.cpp     | 161 ++++++++++++++++++++++++++++++--------------
 src/main.cc         |   6 +-
 8 files changed, 283 insertions(+), 80 deletions(-)
 create mode 100644 src/PyHelper.hpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e16f89..383b926 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,6 +6,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 find_package(Python3 3.11...3.11.9 COMPONENTS Interpreter Development REQUIRED)
 find_package(Torch REQUIRED)
+find_package(Boost REQUIRED COMPONENTS python3 numpy3)
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 
diff --git a/Makefile b/Makefile
index 4e4c080..76c9b31 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ SHELL := /bin/bash
 
 f_release = build_release
 f_debug = build_debug
-app_targets = main
+app_targets = main example
 test_targets = unit_tests_bayesnet unit_tests_platform
 n_procs = -j 16
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c0588f4..eb23469 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,8 +1,9 @@
 include_directories(${PyWrap_SOURCE_DIR}/lib/Files)
 include_directories(${Python3_INCLUDE_DIRS})
+include_directories(${TORCH_INCLUDE_DIRS})
 
 add_executable(main main.cc STree.cc SVC.cc PyClassifier.cc PyWrap.cc)
 add_executable(example example.cpp)
 
-target_link_libraries(main ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} ArffFiles)
-target_link_libraries(example "${TORCH_LIBRARIES}" ArffFiles)
+target_link_libraries(main ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles)
+target_link_libraries(example ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" Boost::boost Boost::python Boost::numpy ArffFiles)
diff --git a/src/PyClassifier.cc b/src/PyClassifier.cc
index cef9bdb..831c780 100644
--- a/src/PyClassifier.cc
+++ b/src/PyClassifier.cc
@@ -1,10 +1,13 @@
 #include "PyClassifier.h"
+#include <boost/python/numpy.hpp>
 #include <torch/csrc/autograd/python_variable.h>
 #include <torch/csrc/utils/tensor_numpy.h>
+//#include "tensorflow/python/lib/core/py_func.h"
 #include <iostream>
 
 namespace pywrap {
-
+    namespace p = boost::python;
+    namespace np = boost::python::numpy;
     PyClassifier::PyClassifier(const std::string& module, const std::string& className) : module(module), className(className)
     {
         pyWrap = PyWrap::GetInstance();
@@ -15,11 +18,23 @@ namespace pywrap {
     {
         pyWrap->clean(module, className);
     }
-    PyObject* PyClassifier::toPyObject(torch::Tensor& tensor)
+    PyObject* PyClassifier::toPyObject(torch::Tensor& data_tensor)
     {
-        return torch::utils::tensor_to_numpy(tensor);
-        //return THPVariable_Wrap(tensor);
+
+        // return torch::utils::tensor_to_numpy(data_tensor);
+        return THPVariable_Wrap(data_tensor);
+        //auto data_numpy = np::from_data(data_tensor.data_ptr(), np::dtype::get_builtin<float>(), p::make_tuple(m, n), p::make_tuple(sizeof(data_tensor.dtype()) * 2 * n, sizeof(data_tensor.dtype()) * 2), p::object());
+        // PyObject* numpyObject = data_numpy.ptr();
+
+        // return numpyObject;
     }
+    // PyObject* PyClassifier::toPyObjecty(torch::Tensor& data_tensor)
+    // {
+    //     //return THPVariable_Wrap(tensor);
+    //     auto y_numpy = np::from_data(data_tensor.data_ptr(), np::dtype::get_builtin<int32_t>(), p::make_tuple(m), p::make_tuple(sizeof(data_tensor.dtype()) * 2), p::object());
+    //     PyObject* numpyObject = y_numpy.ptr();
+
+    // }
     std::string PyClassifier::version()
     {
         return pyWrap->version(module, className);
@@ -29,16 +44,28 @@ namespace pywrap {
     {
         return pyWrap->callMethodString(module, className, method);
     }
+    void print_array(np::ndarray& array)
+    {
+        std::cout << "Array: " << std::endl;
+        std::cout << p::extract<char const*>(p::str(array)) << std::endl;
+    }
     PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
     {
         std::cout << "Converting X to PyObject" << std::endl;
         std::cout << "X.defined() = " << X.defined() << std::endl;
         //std::cout << "X.pyobj() = " << X.pyobj() << std::endl;
-        PyObject* Xp = toPyObject(X);
+        //PyObject* Xp = torch::utils::tensor_to_numpy(X);
+        auto XX = X.transpose(0, 1);
+        int m = XX.size(0);
+        int n = XX.size(1);
+        auto data_numpy = np::from_data(XX.data_ptr(), np::dtype::get_builtin<float>(), p::make_tuple(m, n), p::make_tuple(sizeof(XX.dtype()) * 2 * n, sizeof(XX.dtype()) * 2), p::object());
+        print_array(data_numpy);
+        PyObject* Xp = data_numpy.ptr();
         std::cout << "Converting y to PyObject" << std::endl;
-        PyObject* yp = toPyObject(y);
+        auto y_numpy = np::from_data(y.data_ptr(), np::dtype::get_builtin<int32_t>(), p::make_tuple(m), p::make_tuple(sizeof(y.dtype()) * 2), p::object());
+        PyObject* yp = y_numpy.ptr();
         std::cout << "Calling fit" << std::endl;
-        pyWrap->fit(module, className, Xp, yp);
+        pyWrap->fit(module, this->className, Xp, yp);
         Py_DECREF(Xp);
         Py_DECREF(yp);
         return *this;
@@ -54,8 +81,19 @@ namespace pywrap {
     }
     double PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
     {
-        PyObject* Xp = toPyObject(X);
-        PyObject* yp = toPyObject(y);
+        std::cout << "Converting X to PyObject" << std::endl;
+        std::cout << "X.defined() = " << X.defined() << std::endl;
+        //std::cout << "X.pyobj() = " << X.pyobj() << std::endl;
+        //PyObject* Xp = torch::utils::tensor_to_numpy(X);
+        auto XX = X.transpose(0, 1);
+        int m = XX.size(0);
+        int n = XX.size(1);
+        auto data_numpy = np::from_data(XX.data_ptr(), np::dtype::get_builtin<float>(), p::make_tuple(m, n), p::make_tuple(sizeof(XX.dtype()) * 2 * n, sizeof(XX.dtype()) * 2), p::object());
+        print_array(data_numpy);
+        PyObject* Xp = data_numpy.ptr();
+        std::cout << "Converting y to PyObject" << std::endl;
+        auto y_numpy = np::from_data(y.data_ptr(), np::dtype::get_builtin<int32_t>(), p::make_tuple(m), p::make_tuple(sizeof(y.dtype()) * 2), p::object());
+        PyObject* yp = y_numpy.ptr();
         auto result = pyWrap->score(module, className, Xp, yp);
         Py_DECREF(Xp);
         Py_DECREF(yp);
diff --git a/src/PyHelper.hpp b/src/PyHelper.hpp
new file mode 100644
index 0000000..64fc150
--- /dev/null
+++ b/src/PyHelper.hpp
@@ -0,0 +1,94 @@
+#ifndef PYHELPER_HPP
+#define PYHELPER_HPP
+#pragma once
+
+#include <Python.h>
+
+class CPyInstance {
+public:
+    CPyInstance()
+    {
+        Py_Initialize();
+    }
+
+    ~CPyInstance()
+    {
+        Py_Finalize();
+    }
+};
+
+
+class CPyObject {
+private:
+    PyObject* p;
+public:
+    CPyObject() : p(NULL)
+    {
+    }
+
+    CPyObject(PyObject* _p) : p(_p)
+    {
+    }
+
+
+    ~CPyObject()
+    {
+        Release();
+    }
+
+    PyObject* getObject()
+    {
+        return p;
+    }
+
+    PyObject* setObject(PyObject* _p)
+    {
+        return (p = _p);
+    }
+
+    PyObject* AddRef()
+    {
+        if (p) {
+            Py_INCREF(p);
+        }
+        return p;
+    }
+
+    void Release()
+    {
+        if (p) {
+            Py_DECREF(p);
+        }
+
+        p = NULL;
+    }
+
+    PyObject* operator ->()
+    {
+        return p;
+    }
+
+    bool is()
+    {
+        return p ? true : false;
+    }
+
+    operator PyObject* ()
+    {
+        return p;
+    }
+
+    PyObject* operator = (PyObject* pp)
+    {
+        p = pp;
+        return p;
+    }
+
+    operator bool()
+    {
+        return p ? true : false;
+    }
+};
+
+
+#endif
\ No newline at end of file
diff --git a/src/PyWrap.cc b/src/PyWrap.cc
index 86ec928..43082a3 100644
--- a/src/PyWrap.cc
+++ b/src/PyWrap.cc
@@ -4,8 +4,11 @@
 #include <iostream>
 #include <string>
 #include <map>
+#include <boost/python/numpy.hpp>
+#include "PyHelper.hpp"
 
 namespace pywrap {
+    namespace np = boost::python::numpy;
     PyWrap* PyWrap::wrapper = nullptr;
     std::mutex PyWrap::mutex;
 
@@ -26,7 +29,17 @@ namespace pywrap {
         if (PyStatus_Exception(status)) {
             throw std::runtime_error("Error initializing Python");
         }
+        np::initialize();
+    }
 
+    PyWrap::~PyWrap()
+    {
+        for (const auto& item : moduleClassMap) {
+            Py_DECREF(std::get<0>(item.second));
+            Py_DECREF(std::get<1>(item.second));
+            Py_DECREF(std::get<2>(item.second));
+        }
+        Py_Finalize();
     }
 
     void PyWrap::importClass(const std::string& moduleName, const std::string& className)
@@ -68,18 +81,10 @@ namespace pywrap {
         std::cout << "Limpieza terminada" << std::endl;
     }
 
-    PyWrap::~PyWrap()
-    {
-        for (const auto& item : moduleClassMap) {
-            Py_DECREF(std::get<0>(item.second));
-            Py_DECREF(std::get<1>(item.second));
-            Py_DECREF(std::get<2>(item.second));
-        }
-        Py_Finalize();
-    }
+
     void PyWrap::errorAbort(const std::string& message)
     {
-        std::cerr << message << std::endl;
+        std::cout << message << std::endl;
         PyErr_Print();
         exit(1);
     }
@@ -115,9 +120,8 @@ namespace pywrap {
         std::cout << "Llamando método fit" << std::endl;
         PyObject* instance = getClass(moduleName, className);
         PyObject* result;
-        const char method[] = "fit";
-
-        if (!(result = PyObject_CallMethodObjArgs(instance, PyBytes_FromString(method), X, y, NULL)))
+        std::string method = "fit";
+        if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), X, y, NULL)))
             errorAbort("Couldn't call method fit");
         Py_DECREF(result);
     }
@@ -126,8 +130,8 @@ namespace pywrap {
         std::cout << "Llamando método predict" << std::endl;
         PyObject* instance = getClass(moduleName, className);
         PyObject* result;
-        const char method[] = "predict";
-        if (!(result = PyObject_CallMethodObjArgs(instance, PyBytes_FromString(method), X, NULL)))
+        std::string method = "predict";
+        if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), X, NULL)))
             errorAbort("Couldn't call method predict");
         return result; // The caller has to decref the result
     }
@@ -136,8 +140,8 @@ namespace pywrap {
         std::cout << "Llamando método score" << std::endl;
         PyObject* instance = getClass(moduleName, className);
         PyObject* result;
-        const char method[] = "score";
-        if (!(result = PyObject_CallMethodObjArgs(instance, PyBytes_FromString(method), X, y, NULL)))
+        std::string method = "score";
+        if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), X, y, NULL)))
             errorAbort("Couldn't call method score");
         return PyFloat_AsDouble(result);
     }
diff --git a/src/example.cpp b/src/example.cpp
index eb4803f..c39d233 100644
--- a/src/example.cpp
+++ b/src/example.cpp
@@ -1,60 +1,123 @@
+#include <boost/python/numpy.hpp>
+#include <string>
+#include <iostream>
 #include <torch/torch.h>
-#include "ArffFiles.h"
-#include<string>
-#include<iostream>
 
+namespace p = boost::python;
+namespace np = boost::python::numpy;
 using namespace std;
-using namespace torch;
-class Test {
-public:
-    Test(const string& c) : c(c) {};
-    ~Test() { std::cout << "Destructor" << std::endl; };
 
-    template<typename T>
-    T callMethod(const T& parameter)
-    {
-        std::cout << "Llamando a metodo" << std::endl;
-        return parameter;
-    }
-private:
-    string c;
-};
-
-tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(const string& name, bool class_last)
+void errorAbort(const std::string& message)
 {
-    auto handler = ArffFiles();
-    handler.load(static_cast<string>(name) + ".arff", class_last);
-    // Get Dataset X, y
-    vector<vector<float>> X = handler.getX();
-    vector<int> y = handler.getY();
-    // // Get className & Features
-    auto className = handler.getClassName();
-    vector<string> features;
-    auto attributes = handler.getAttributes();
-    transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
-    torch::Tensor Xd;
-    auto states = map<string, vector<int>>();
-    auto yt = torch::tensor(y, torch::kInt32);
-    Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
-    for (int i = 0; i < features.size(); ++i) {
-        Xd.index_put_({ i, "..." }, torch::tensor(X[i], torch::kFloat32));
-    }
-    return make_tuple(Xd, yt, features, className, states);
+    std::cerr << message << std::endl;
+    PyErr_Print();
+    exit(1);
 }
 
-int main()
+void print_array(np::ndarray& array)
 {
-    Test t("hola");
-    cout << t.callMethod<string>("hola") << endl;
-    cout << t.callMethod<int>(1) << endl;
-    cout << t.callMethod<double>(7.3) << endl;
-    vector<vector<float>> X;
-    vector<int> y = { 1, 2, 3 };
-    X.push_back({ 1.1, 2.2, 3.3 });
-    vector<float> v = { 1.1, 2.2, 3.3 };
-    torch::Tensor matrix = torch::tensor(X[0], torch::kFloat32);
-    cout << "X:" << matrix << endl;
-    cout << "y:" << torch::tensor(y, torch::kInt32) << endl;
+    std::cout << "Array: " << std::endl;
+    std::cout << p::extract<char const*>(p::str(array)) << std::endl;
+}
+
+np::ndarray to_numpy_matrix(torch::Tensor& input_data, np::dtype numpy_dtype)
+{
+    p::tuple shape = p::make_tuple(input_data.size(0), input_data.size(1));
+    auto tensor_dtype = input_data.dtype();
+    p::tuple stride = p::make_tuple(sizeof(tensor_dtype) * input_data.size(1), sizeof(tensor_dtype));
+    auto dito = input_data.transpose(1, 0);
+    np::ndarray result = np::from_data(dito.data_ptr(), numpy_dtype, shape, stride, p::object());
+    return result;
+}
+np::ndarray to_numpy_vector(torch::Tensor& input_data, np::dtype numpy_dtype)
+{
+    p::tuple shape = p::make_tuple(input_data.size(0));
+    auto tensor_dtype = input_data.dtype();
+    p::tuple stride = p::make_tuple(sizeof(tensor_dtype), sizeof(tensor_dtype));
+    np::ndarray result = np::from_data(input_data.data_ptr(), numpy_dtype, shape, stride, p::object());
+    return result;
+}
+void flat()
+{
+    double data[][4] = { {0.1, 0.2, 0.3, 0.4} , { 0.5, 0.6, 0.7, 0.8 }, { 0.9, 0.11, 0.12, 0.13 }, { 0.14, 0.15, 0.16, 0.17 }, { 0.18, 0.19, 0.21, 0.22 }, { 0.23, 0.24, 0.25, 0.26 }, { 0.27, 0.28, 0.29, 0.31 } };
+    int labels[] = { 0, 1, 0, 1 , 0, 0, 1 };
+    // cout << "Array data: (" << m << ", " << n << ") " << endl;
+    // for (int i = 0; i < m; ++i) {
+    //     cout << "[ ";
+    //     for (int j = 0; j < n; ++j) {
+    //         cout << setw(4) << std::setprecision(2) << fixed << data[i][j] << " ";
+    //     }
+    //     cout << "]" << endl;
+    // }
+    // cout << "Array labels: " << endl;
+    // for (int i = 0; i < m; ++i) {
+    //     cout << labels[i] << " ";
+    // }
+    // cout << endl;
+    // auto data_numpy = np::from_data(data, np::dtype::get_builtin<double>(), p::make_tuple(m, n), p::make_tuple(sizeof(double) * n, sizeof(double)), p::object());
+    // auto y_numpy = np::from_data(labels, np::dtype::get_builtin<int>(), p::make_tuple(m), p::make_tuple(sizeof(int)), p::object());
+}
+
+int main(int argc, char** argv)
+{
+    Py_Initialize();
+    np::initialize();
+    int m = 7;
+    int n = 4;
+    // torch::Tensor data_tensor = torch::rand({ m, n }, torch::kFloat64);
+    torch::Tensor data_tensor = torch::tensor({ {0.1, 0.2, 0.3, 0.4} , { 0.5, 0.6, 0.7, 0.8 }, { 0.9, 0.11, 0.12, 0.13 }, { 0.14, 0.15, 0.16, 0.17 }, { 0.18, 0.19, 0.21, 0.22 }, { 0.23, 0.24, 0.25, 0.26 }, { 0.27, 0.28, 0.29, 0.31 } }, torch::kFloat32);
+    // torch::Tensor y_label = torch::randint(0, 2, { m }, torch::kInt16);
+    torch::Tensor y_label = torch::tensor({ 17, 18, 19, 20 , 21, 22, 23 }, torch::kInt32);
+    cout << "Tensor data: (" << data_tensor.size(0) << ", " << data_tensor.size(1) << ") " << endl << data_tensor << endl;
+    cout << "Tensor data sizes: " << data_tensor.sizes() << endl;
+    cout << "Tensor labels: " << y_label << endl;
+    cout << "Tensor labels sizes: " << y_label.sizes() << endl;
+    auto data_numpy = np::from_data(data_tensor.data_ptr(), np::dtype::get_builtin<float>(), p::make_tuple(m, n), p::make_tuple(sizeof(data_tensor.dtype()) * 2 * n, sizeof(data_tensor.dtype()) * 2), p::object());
+    auto y_numpy = np::from_data(y_label.data_ptr(), np::dtype::get_builtin<int32_t>(), p::make_tuple(m), p::make_tuple(sizeof(y_label.dtype()) * 2), p::object());
+    //auto y_numpy = np::from_data(y_label.data_ptr(), np::dtype::get_builtin<int64_t>(), p::make_tuple(m), p::make_tuple(sizeof(y_label.dtype()) * 4), p::object());
+    cout << "Numpy array data: " << endl;
+    print_array(data_numpy);
+    cout << "Numpy array labels: " << endl;
+    print_array(y_numpy);
+    cout << "primero" << endl;
+    PyObject* p = data_numpy.ptr();
+    PyObject* yp = y_numpy.ptr();
+    cout << "segundo" << endl;
+    string moduleName = "stree";
+    string className = "Stree";
+    string method = "version";
+    PyObject* module = PyImport_ImportModule(moduleName.c_str());
+    if (PyErr_Occurred()) {
+        errorAbort("Could't import module " + moduleName);
+    }
+    PyObject* classObject = PyObject_GetAttrString(module, className.c_str());
+    if (PyErr_Occurred()) {
+        errorAbort("Couldn't find class " + className);
+    }
+    PyObject* instance = PyObject_CallObject(classObject, NULL);
+    if (PyErr_Occurred()) {
+        errorAbort("Couldn't create instance of class " + className);
+    }
+    PyObject* result;
+    if (!(result = PyObject_CallMethod(instance, method.c_str(), NULL)))
+        errorAbort("Couldn't call method " + method);
+
+    std::string value = PyUnicode_AsUTF8(result);
+    cout << "Version: " << value << endl;
+    method = "fit";
+    if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), p, yp, NULL)))
+        errorAbort("Couldn't call method fit");
+    method = "score";
+    if (!(result = PyObject_CallMethodObjArgs(instance, PyUnicode_FromString(method.c_str()), p, yp, NULL)))
+        errorAbort("Couldn't call method score");
+    float score = PyFloat_AsDouble(result);
+    cout << "Score: " << score << endl;
+    Py_DECREF(result);
+    Py_DECREF(instance);
+    Py_DECREF(module);
+    Py_DECREF(p);
+    Py_DECREF(yp);
+    cout << "tercero" << endl;
 
     return 0;
 }
\ No newline at end of file
diff --git a/src/main.cc b/src/main.cc
index 50e2461..6595a0b 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -51,7 +51,9 @@ int main(int argc, char* argv[])
     cout << string(80, '-') << endl;
     cout << "X: " << X.sizes() << endl;
     cout << "y: " << y.sizes() << endl;
-    auto result = svc.fit(X, y, features, className, states).score(X, y);
-    cout << "SVC score " << result << endl;
+    auto result = stree.fit(X, y, features, className, states);
+    cout << "Now calling score" << endl;
+    auto result2 = stree.score(X, y);
+    cout << "SVC score " << result2 << endl;
     return 0;
 }
\ No newline at end of file