Fix memory management vulnerabilities
This commit is contained in:
@@ -15,25 +15,91 @@ namespace pywrap {
|
||||
}
|
||||
np::ndarray tensor2numpy(torch::Tensor& X)
|
||||
{
|
||||
int m = X.size(0);
|
||||
int n = X.size(1);
|
||||
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<float>(), bp::make_tuple(m, n), bp::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2), bp::object());
|
||||
Xn = Xn.transpose();
|
||||
// Validate tensor dimensions
|
||||
if (X.dim() != 2) {
|
||||
throw std::runtime_error("tensor2numpy: Expected 2D tensor, got " + std::to_string(X.dim()) + "D");
|
||||
}
|
||||
|
||||
// Ensure tensor is contiguous and in the expected format
|
||||
X = X.contiguous();
|
||||
|
||||
if (X.dtype() != torch::kFloat32) {
|
||||
throw std::runtime_error("tensor2numpy: Expected float32 tensor");
|
||||
}
|
||||
|
||||
int64_t m = X.size(0);
|
||||
int64_t n = X.size(1);
|
||||
|
||||
// Calculate correct strides in bytes
|
||||
int64_t element_size = X.element_size();
|
||||
int64_t stride0 = X.stride(0) * element_size;
|
||||
int64_t stride1 = X.stride(1) * element_size;
|
||||
|
||||
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<float>(),
|
||||
bp::make_tuple(m, n),
|
||||
bp::make_tuple(stride0, stride1),
|
||||
bp::object());
|
||||
// Don't transpose - tensor is already in correct [samples, features] format
|
||||
return Xn;
|
||||
}
|
||||
np::ndarray tensorInt2numpy(torch::Tensor& X)
|
||||
{
|
||||
int m = X.size(0);
|
||||
int n = X.size(1);
|
||||
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<int>(), bp::make_tuple(m, n), bp::make_tuple(sizeof(X.dtype()) * 2 * n, sizeof(X.dtype()) * 2), bp::object());
|
||||
Xn = Xn.transpose();
|
||||
//std::cout << "Transposed array:\n" << boost::python::extract<char const*>(boost::python::str(Xn)) << std::endl;
|
||||
// Validate tensor dimensions
|
||||
if (X.dim() != 2) {
|
||||
throw std::runtime_error("tensorInt2numpy: Expected 2D tensor, got " + std::to_string(X.dim()) + "D");
|
||||
}
|
||||
|
||||
// Ensure tensor is contiguous and in the expected format
|
||||
X = X.contiguous();
|
||||
|
||||
if (X.dtype() != torch::kInt32) {
|
||||
throw std::runtime_error("tensorInt2numpy: Expected int32 tensor");
|
||||
}
|
||||
|
||||
int64_t m = X.size(0);
|
||||
int64_t n = X.size(1);
|
||||
|
||||
// Calculate correct strides in bytes
|
||||
int64_t element_size = X.element_size();
|
||||
int64_t stride0 = X.stride(0) * element_size;
|
||||
int64_t stride1 = X.stride(1) * element_size;
|
||||
|
||||
auto Xn = np::from_data(X.data_ptr(), np::dtype::get_builtin<int>(),
|
||||
bp::make_tuple(m, n),
|
||||
bp::make_tuple(stride0, stride1),
|
||||
bp::object());
|
||||
// Don't transpose - tensor is already in correct [samples, features] format
|
||||
return Xn;
|
||||
}
|
||||
std::pair<np::ndarray, np::ndarray> tensors2numpy(torch::Tensor& X, torch::Tensor& y)
|
||||
{
|
||||
int n = X.size(1);
|
||||
auto yn = np::from_data(y.data_ptr(), np::dtype::get_builtin<int32_t>(), bp::make_tuple(n), bp::make_tuple(sizeof(y.dtype()) * 2), bp::object());
|
||||
// Validate y tensor dimensions
|
||||
if (y.dim() != 1) {
|
||||
throw std::runtime_error("tensors2numpy: Expected 1D y tensor, got " + std::to_string(y.dim()) + "D");
|
||||
}
|
||||
|
||||
// Validate dimensions match
|
||||
if (X.size(0) != y.size(0)) {
|
||||
throw std::runtime_error("tensors2numpy: X and y dimension mismatch: X[" +
|
||||
std::to_string(X.size(0)) + "], y[" + std::to_string(y.size(0)) + "]");
|
||||
}
|
||||
|
||||
// Ensure y tensor is contiguous
|
||||
y = y.contiguous();
|
||||
|
||||
if (y.dtype() != torch::kInt32) {
|
||||
throw std::runtime_error("tensors2numpy: Expected int32 y tensor");
|
||||
}
|
||||
|
||||
int64_t n = y.size(0);
|
||||
int64_t element_size = y.element_size();
|
||||
int64_t stride = y.stride(0) * element_size;
|
||||
|
||||
auto yn = np::from_data(y.data_ptr(), np::dtype::get_builtin<int32_t>(),
|
||||
bp::make_tuple(n),
|
||||
bp::make_tuple(stride),
|
||||
bp::object());
|
||||
|
||||
if (X.dtype() == torch::kInt32) {
|
||||
return { tensorInt2numpy(X), yn };
|
||||
}
|
||||
@@ -63,12 +129,21 @@ namespace pywrap {
|
||||
if (!fitted && hyperparameters.size() > 0) {
|
||||
pyWrap->setHyperparameters(id, hyperparameters);
|
||||
}
|
||||
auto [Xn, yn] = tensors2numpy(X, y);
|
||||
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
|
||||
CPyObject yp = bp::incref(bp::object(yn).ptr());
|
||||
pyWrap->fit(id, Xp, yp);
|
||||
fitted = true;
|
||||
return *this;
|
||||
try {
|
||||
auto [Xn, yn] = tensors2numpy(X, y);
|
||||
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
|
||||
CPyObject yp = bp::incref(bp::object(yn).ptr());
|
||||
pyWrap->fit(id, Xp, yp);
|
||||
fitted = true;
|
||||
return *this;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
// Clear any Python errors before re-throwing
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Clear();
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
PyClassifier& PyClassifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing)
|
||||
{
|
||||
@@ -76,76 +151,148 @@ namespace pywrap {
|
||||
}
|
||||
torch::Tensor PyClassifier::predict(torch::Tensor& X)
|
||||
{
|
||||
int dimension = X.size(1);
|
||||
CPyObject Xp;
|
||||
if (X.dtype() == torch::kInt32) {
|
||||
auto Xn = tensorInt2numpy(X);
|
||||
Xp = bp::incref(bp::object(Xn).ptr());
|
||||
} else {
|
||||
auto Xn = tensor2numpy(X);
|
||||
Xp = bp::incref(bp::object(Xn).ptr());
|
||||
try {
|
||||
CPyObject Xp;
|
||||
if (X.dtype() == torch::kInt32) {
|
||||
auto Xn = tensorInt2numpy(X);
|
||||
Xp = bp::incref(bp::object(Xn).ptr());
|
||||
} else {
|
||||
auto Xn = tensor2numpy(X);
|
||||
Xp = bp::incref(bp::object(Xn).ptr());
|
||||
}
|
||||
|
||||
// Use RAII guard for automatic cleanup
|
||||
PyObjectGuard incoming(pyWrap->predict(id, Xp));
|
||||
if (!incoming) {
|
||||
throw std::runtime_error("predict() returned NULL for " + module + ":" + className);
|
||||
}
|
||||
|
||||
bp::handle<> handle(incoming.release()); // Transfer ownership to boost
|
||||
bp::object object(handle);
|
||||
np::ndarray prediction = np::from_object(object);
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Clear();
|
||||
throw std::runtime_error("Error creating numpy object for predict in " + module + ":" + className);
|
||||
}
|
||||
|
||||
// Validate numpy array
|
||||
if (prediction.get_nd() != 1) {
|
||||
throw std::runtime_error("Expected 1D prediction array, got " + std::to_string(prediction.get_nd()) + "D");
|
||||
}
|
||||
|
||||
// Safe type conversion with validation
|
||||
std::vector<int> vPrediction;
|
||||
if (xgboost) {
|
||||
// Validate data type for XGBoost (typically returns long)
|
||||
if (prediction.get_dtype() == np::dtype::get_builtin<long>()) {
|
||||
long* data = reinterpret_cast<long*>(prediction.get_data());
|
||||
vPrediction.reserve(prediction.shape(0));
|
||||
for (int i = 0; i < prediction.shape(0); ++i) {
|
||||
vPrediction.push_back(static_cast<int>(data[i]));
|
||||
}
|
||||
} else {
|
||||
throw std::runtime_error("XGBoost prediction: unexpected data type");
|
||||
}
|
||||
} else {
|
||||
// Validate data type for other classifiers (typically returns int)
|
||||
if (prediction.get_dtype() == np::dtype::get_builtin<int>()) {
|
||||
int* data = reinterpret_cast<int*>(prediction.get_data());
|
||||
vPrediction.assign(data, data + prediction.shape(0));
|
||||
} else {
|
||||
throw std::runtime_error("Prediction: unexpected data type");
|
||||
}
|
||||
}
|
||||
|
||||
return torch::tensor(vPrediction, torch::kInt32);
|
||||
}
|
||||
PyObject* incoming = pyWrap->predict(id, Xp);
|
||||
bp::handle<> handle(incoming);
|
||||
bp::object object(handle);
|
||||
np::ndarray prediction = np::from_object(object);
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Print();
|
||||
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
|
||||
}
|
||||
if (xgboost) {
|
||||
long* data = reinterpret_cast<long*>(prediction.get_data());
|
||||
std::vector<int> vPrediction(data, data + prediction.shape(0));
|
||||
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
|
||||
Py_XDECREF(incoming);
|
||||
return resultTensor;
|
||||
} else {
|
||||
int* data = reinterpret_cast<int*>(prediction.get_data());
|
||||
std::vector<int> vPrediction(data, data + prediction.shape(0));
|
||||
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
|
||||
Py_XDECREF(incoming);
|
||||
return resultTensor;
|
||||
catch (const std::exception& e) {
|
||||
// Clear any Python errors before re-throwing
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Clear();
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
|
||||
{
|
||||
int dimension = X.size(1);
|
||||
CPyObject Xp;
|
||||
if (X.dtype() == torch::kInt32) {
|
||||
auto Xn = tensorInt2numpy(X);
|
||||
Xp = bp::incref(bp::object(Xn).ptr());
|
||||
} else {
|
||||
auto Xn = tensor2numpy(X);
|
||||
Xp = bp::incref(bp::object(Xn).ptr());
|
||||
try {
|
||||
CPyObject Xp;
|
||||
if (X.dtype() == torch::kInt32) {
|
||||
auto Xn = tensorInt2numpy(X);
|
||||
Xp = bp::incref(bp::object(Xn).ptr());
|
||||
} else {
|
||||
auto Xn = tensor2numpy(X);
|
||||
Xp = bp::incref(bp::object(Xn).ptr());
|
||||
}
|
||||
|
||||
// Use RAII guard for automatic cleanup
|
||||
PyObjectGuard incoming(pyWrap->predict_proba(id, Xp));
|
||||
if (!incoming) {
|
||||
throw std::runtime_error("predict_proba() returned NULL for " + module + ":" + className);
|
||||
}
|
||||
|
||||
bp::handle<> handle(incoming.release()); // Transfer ownership to boost
|
||||
bp::object object(handle);
|
||||
np::ndarray prediction = np::from_object(object);
|
||||
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Clear();
|
||||
throw std::runtime_error("Error creating numpy object for predict_proba in " + module + ":" + className);
|
||||
}
|
||||
|
||||
// Validate numpy array dimensions
|
||||
if (prediction.get_nd() != 2) {
|
||||
throw std::runtime_error("Expected 2D probability array, got " + std::to_string(prediction.get_nd()) + "D");
|
||||
}
|
||||
|
||||
int64_t rows = prediction.shape(0);
|
||||
int64_t cols = prediction.shape(1);
|
||||
|
||||
// Safe type conversion with validation
|
||||
if (xgboost) {
|
||||
// Validate data type for XGBoost (typically returns float)
|
||||
if (prediction.get_dtype() == np::dtype::get_builtin<float>()) {
|
||||
float* data = reinterpret_cast<float*>(prediction.get_data());
|
||||
std::vector<float> vPrediction(data, data + rows * cols);
|
||||
return torch::tensor(vPrediction, torch::kFloat32).reshape({rows, cols});
|
||||
} else {
|
||||
throw std::runtime_error("XGBoost predict_proba: unexpected data type");
|
||||
}
|
||||
} else {
|
||||
// Validate data type for other classifiers (typically returns double)
|
||||
if (prediction.get_dtype() == np::dtype::get_builtin<double>()) {
|
||||
double* data = reinterpret_cast<double*>(prediction.get_data());
|
||||
std::vector<double> vPrediction(data, data + rows * cols);
|
||||
return torch::tensor(vPrediction, torch::kFloat64).reshape({rows, cols});
|
||||
} else {
|
||||
throw std::runtime_error("predict_proba: unexpected data type");
|
||||
}
|
||||
}
|
||||
}
|
||||
PyObject* incoming = pyWrap->predict_proba(id, Xp);
|
||||
bp::handle<> handle(incoming);
|
||||
bp::object object(handle);
|
||||
np::ndarray prediction = np::from_object(object);
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Print();
|
||||
throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
|
||||
}
|
||||
if (xgboost) {
|
||||
float* data = reinterpret_cast<float*>(prediction.get_data());
|
||||
std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
|
||||
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
|
||||
Py_XDECREF(incoming);
|
||||
return resultTensor;
|
||||
} else {
|
||||
double* data = reinterpret_cast<double*>(prediction.get_data());
|
||||
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
|
||||
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
|
||||
Py_XDECREF(incoming);
|
||||
return resultTensor;
|
||||
catch (const std::exception& e) {
|
||||
// Clear any Python errors before re-throwing
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Clear();
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
|
||||
{
|
||||
auto [Xn, yn] = tensors2numpy(X, y);
|
||||
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
|
||||
CPyObject yp = bp::incref(bp::object(yn).ptr());
|
||||
return pyWrap->score(id, Xp, yp);
|
||||
try {
|
||||
auto [Xn, yn] = tensors2numpy(X, y);
|
||||
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
|
||||
CPyObject yp = bp::incref(bp::object(yn).ptr());
|
||||
return pyWrap->score(id, Xp, yp);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
// Clear any Python errors before re-throwing
|
||||
if (PyErr_Occurred()) {
|
||||
PyErr_Clear();
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters)
|
||||
{
|
||||
|
@@ -27,13 +27,28 @@ namespace pywrap {
|
||||
private:
|
||||
PyObject* p;
|
||||
public:
|
||||
CPyObject() : p(NULL)
|
||||
CPyObject() : p(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
CPyObject(PyObject* _p) : p(_p)
|
||||
{
|
||||
}
|
||||
|
||||
// Copy constructor
|
||||
CPyObject(const CPyObject& other) : p(other.p)
|
||||
{
|
||||
if (p) {
|
||||
Py_INCREF(p);
|
||||
}
|
||||
}
|
||||
|
||||
// Move constructor
|
||||
CPyObject(CPyObject&& other) noexcept : p(other.p)
|
||||
{
|
||||
other.p = nullptr;
|
||||
}
|
||||
|
||||
~CPyObject()
|
||||
{
|
||||
Release();
|
||||
@@ -44,7 +59,11 @@ namespace pywrap {
|
||||
}
|
||||
PyObject* setObject(PyObject* _p)
|
||||
{
|
||||
return (p = _p);
|
||||
if (p != _p) {
|
||||
Release(); // Release old reference
|
||||
p = _p;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
PyObject* AddRef()
|
||||
{
|
||||
@@ -57,31 +76,157 @@ namespace pywrap {
|
||||
{
|
||||
if (p) {
|
||||
Py_XDECREF(p);
|
||||
p = nullptr;
|
||||
}
|
||||
|
||||
p = NULL;
|
||||
}
|
||||
PyObject* operator ->()
|
||||
{
|
||||
return p;
|
||||
}
|
||||
bool is()
|
||||
bool is() const
|
||||
{
|
||||
return p ? true : false;
|
||||
return p != nullptr;
|
||||
}
|
||||
|
||||
// Check if object is valid
|
||||
bool isValid() const
|
||||
{
|
||||
return p != nullptr;
|
||||
}
|
||||
operator PyObject* ()
|
||||
{
|
||||
return p;
|
||||
}
|
||||
PyObject* operator = (PyObject* pp)
|
||||
// Copy assignment operator
|
||||
CPyObject& operator=(const CPyObject& other)
|
||||
{
|
||||
p = pp;
|
||||
if (this != &other) {
|
||||
Release(); // Release current reference
|
||||
p = other.p;
|
||||
if (p) {
|
||||
Py_INCREF(p); // Add reference to new object
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Move assignment operator
|
||||
CPyObject& operator=(CPyObject&& other) noexcept
|
||||
{
|
||||
if (this != &other) {
|
||||
Release(); // Release current reference
|
||||
p = other.p;
|
||||
other.p = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Assignment from PyObject* - DEPRECATED, use setObject() instead
|
||||
PyObject* operator=(PyObject* pp)
|
||||
{
|
||||
setObject(pp);
|
||||
return p;
|
||||
}
|
||||
operator bool()
|
||||
explicit operator bool() const
|
||||
{
|
||||
return p ? true : false;
|
||||
return p != nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
// RAII guard for PyObject* - safer alternative to manual reference management
|
||||
class PyObjectGuard {
|
||||
private:
|
||||
PyObject* obj_;
|
||||
bool owns_reference_;
|
||||
|
||||
public:
|
||||
// Constructor takes ownership of a new reference
|
||||
explicit PyObjectGuard(PyObject* obj = nullptr) : obj_(obj), owns_reference_(true) {}
|
||||
|
||||
// Constructor for borrowed references
|
||||
PyObjectGuard(PyObject* obj, bool borrow) : obj_(obj), owns_reference_(!borrow) {
|
||||
if (borrow && obj_) {
|
||||
Py_INCREF(obj_);
|
||||
owns_reference_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Non-copyable to prevent accidental reference issues
|
||||
PyObjectGuard(const PyObjectGuard&) = delete;
|
||||
PyObjectGuard& operator=(const PyObjectGuard&) = delete;
|
||||
|
||||
// Movable
|
||||
PyObjectGuard(PyObjectGuard&& other) noexcept
|
||||
: obj_(other.obj_), owns_reference_(other.owns_reference_) {
|
||||
other.obj_ = nullptr;
|
||||
other.owns_reference_ = false;
|
||||
}
|
||||
|
||||
PyObjectGuard& operator=(PyObjectGuard&& other) noexcept {
|
||||
if (this != &other) {
|
||||
reset();
|
||||
obj_ = other.obj_;
|
||||
owns_reference_ = other.owns_reference_;
|
||||
other.obj_ = nullptr;
|
||||
other.owns_reference_ = false;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
~PyObjectGuard() {
|
||||
reset();
|
||||
}
|
||||
|
||||
// Reset to nullptr, releasing current reference if owned
|
||||
void reset(PyObject* new_obj = nullptr) {
|
||||
if (owns_reference_ && obj_) {
|
||||
Py_DECREF(obj_);
|
||||
}
|
||||
obj_ = new_obj;
|
||||
owns_reference_ = (new_obj != nullptr);
|
||||
}
|
||||
|
||||
// Release ownership and return the object
|
||||
PyObject* release() {
|
||||
PyObject* result = obj_;
|
||||
obj_ = nullptr;
|
||||
owns_reference_ = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Get the raw pointer (does not transfer ownership)
|
||||
PyObject* get() const {
|
||||
return obj_;
|
||||
}
|
||||
|
||||
// Check if valid
|
||||
bool isValid() const {
|
||||
return obj_ != nullptr;
|
||||
}
|
||||
|
||||
explicit operator bool() const {
|
||||
return obj_ != nullptr;
|
||||
}
|
||||
|
||||
// Access operators
|
||||
PyObject* operator->() const {
|
||||
return obj_;
|
||||
}
|
||||
|
||||
// Implicit conversion to PyObject* for API calls (does not transfer ownership)
|
||||
operator PyObject*() const {
|
||||
return obj_;
|
||||
}
|
||||
};
|
||||
|
||||
// Helper function to create a PyObjectGuard from a borrowed reference
|
||||
inline PyObjectGuard borrowReference(PyObject* obj) {
|
||||
return PyObjectGuard(obj, true);
|
||||
}
|
||||
|
||||
// Helper function to create a PyObjectGuard from a new reference
|
||||
inline PyObjectGuard newReference(PyObject* obj) {
|
||||
return PyObjectGuard(obj);
|
||||
}
|
||||
} /* namespace pywrap */
|
||||
#endif
|
@@ -237,12 +237,12 @@ namespace pywrap {
|
||||
CPyObject method = PyUnicode_FromString(name.c_str());
|
||||
try {
|
||||
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), NULL)))
|
||||
errorAbort("Couldn't call method predict");
|
||||
errorAbort("Couldn't call method " + name);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
errorAbort(e.what());
|
||||
}
|
||||
Py_INCREF(result);
|
||||
// PyObject_CallMethodObjArgs already returns a new reference, no need for Py_INCREF
|
||||
return result; // Caller must free this object
|
||||
}
|
||||
double PyWrap::score(const clfId_t id, CPyObject& X, CPyObject& y)
|
||||
|
Reference in New Issue
Block a user