Add predict_proba with tensors

This commit is contained in:
2024-07-12 12:54:30 +02:00
parent c5ff1a0b2b
commit 37716a57f4
6 changed files with 66 additions and 9 deletions

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20)
project(PyClassifiers
VERSION 1.0.2
VERSION 1.0.3
DESCRIPTION "Python Classifiers Wrapper."
HOMEPAGE_URL "https://github.com/rmontanana/pyclassifiers"
LANGUAGES CXX

View File

@@ -99,13 +99,37 @@ namespace pywrap {
Py_XDECREF(incoming);
return resultTensor;
}
torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
{
int dimension = X.size(1);
CPyObject Xp;
if (X.dtype() == torch::kInt32) {
auto Xn = tensorInt2numpy(X);
Xp = bp::incref(bp::object(Xn).ptr());
} else {
auto Xn = tensor2numpy(X);
Xp = bp::incref(bp::object(Xn).ptr());
}
PyObject* incoming = pyWrap->predict_proba(id, Xp);
bp::handle<> handle(incoming);
bp::object object(handle);
np::ndarray prediction = np::from_object(object);
if (PyErr_Occurred()) {
PyErr_Print();
throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
}
double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
}
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{
auto [Xn, yn] = tensors2numpy(X, y);
CPyObject Xp = bp::incref(bp::object(Xn).ptr());
CPyObject yp = bp::incref(bp::object(yn).ptr());
float result = pyWrap->score(id, Xp, yp);
return result;
return pyWrap->score(id, Xp, yp);
}
void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters)
{

View File

@@ -25,7 +25,7 @@ namespace pywrap {
PyClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override { return *this; };
torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int >>& X) override { return std::vector<int>(); }; // Not implemented
torch::Tensor predict_proba(torch::Tensor& X) override { return torch::zeros({ 0, 0 }); } // Not implemented
torch::Tensor predict_proba(torch::Tensor& X) override;
std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int >>& X) override { return std::vector<std::vector<double>>(); }; // Not implemented
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override { return 0.0; }; // Not implemented
float score(torch::Tensor& X, torch::Tensor& y) override;

View File

@@ -222,11 +222,19 @@ namespace pywrap {
errorAbort(e.what());
}
}
PyObject* PyWrap::predict_proba(const clfId_t id, CPyObject& X)
{
return predict_method("predict_proba", id, X);
}
PyObject* PyWrap::predict(const clfId_t id, CPyObject& X)
{
return predict_method("predict", id, X);
}
PyObject* PyWrap::predict_method(const std::string name, const clfId_t id, CPyObject& X)
{
PyObject* instance = getClass(id);
PyObject* result;
CPyObject method = PyUnicode_FromString("predict");
CPyObject method = PyUnicode_FromString(name.c_str());
try {
if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), NULL)))
errorAbort("Couldn't call method predict");

View File

@@ -31,6 +31,7 @@ namespace pywrap {
void setHyperparameters(const clfId_t id, const json& hyperparameters);
void fit(const clfId_t id, CPyObject& X, CPyObject& y);
PyObject* predict(const clfId_t id, CPyObject& X);
PyObject* predict_proba(const clfId_t id, CPyObject& X);
double score(const clfId_t id, CPyObject& X, CPyObject& y);
void clean(const clfId_t id);
void importClass(const clfId_t id, const std::string& moduleName, const std::string& className);
@@ -38,6 +39,7 @@ namespace pywrap {
private:
// Only call RemoveInstance from clean method
static void RemoveInstance();
PyObject* predict_method(const std::string name, const clfId_t id, CPyObject& X);
void errorAbort(const std::string& message);
// No need to use static map here, since this class is a singleton
std::map<clfId_t, std::tuple<PyObject*, PyObject*, PyObject*>> moduleClassMap;

View File

@@ -13,8 +13,6 @@
#include "pyclfs/ODTE.h"
#include "TestUtils.h"
const std::string ACTUAL_VERSION = "1.0.5";
TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{
map <pair<std::string, std::string>, float> scores = {
@@ -37,15 +35,17 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
map<std::string, std::string> versions = {
{"ODTE", "0.3.6"},
{"STree", "1.3.2"},
{"SVC", "1.3.2"},
{"RandomForest", "1.3.2"}
{"SVC", "1.5.0"},
{"RandomForest", "1.5.0"}
};
auto clf = models[name];
SECTION("Test Python Classifier " + name + " score ")
{
auto random_state = nlohmann::json::parse("{ \"random_state\": 0 }");
for (std::string file_name : { "glass", "iris", "ecoli", "diabetes" }) {
auto raw = RawDatasets(file_name, false);
clf->setHyperparameters(random_state);
clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf->score(raw.Xt, raw.yt);
INFO("File: " + file_name + " Classifier: " + name + " Score: " + to_string(score));
@@ -82,6 +82,29 @@ TEST_CASE("Classifier with discretized dataset", "[PyClassifiers]")
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.96667f).epsilon(raw.epsilon));
}
TEST_CASE("Predict with non_discretized dataset and comparing to predict_proba", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
auto clf = pywrap::STree();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto predictions = clf.predict(raw.Xt);
auto probabilities = clf.predict_proba(raw.Xt);
auto preds = probabilities.argmax(1);
auto classNumStates = torch::max(raw.yt).item<int>() + 1;
REQUIRE(predictions.size(0) == probabilities.size(0));
REQUIRE(predictions.size(0) == preds.size(0));
REQUIRE(probabilities.size(1) == classNumStates);
int right = 0;
for (std::size_t i = 0; i < predictions.size(0); ++i) {
if (predictions[i].item<int>() == preds[i].item<int>()) {
right++;
}
REQUIRE(predictions[i].item<int>() == preds[i].item<int>());
}
auto accuracy = right / static_cast<float>(predictions.size(0));
REQUIRE(accuracy == Catch::Approx(1.0f).epsilon(raw.epsilon));
}
// TEST_CASE("XGBoost", "[PyClassifiers]")
// {
// auto raw = RawDatasets("iris", true);