diff --git a/CMakeLists.txt b/CMakeLists.txt index d0409b2..d9a56a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) project(PyClassifiers - VERSION 1.0.2 + VERSION 1.0.3 DESCRIPTION "Python Classifiers Wrapper." HOMEPAGE_URL "https://github.com/rmontanana/pyclassifiers" LANGUAGES CXX diff --git a/pyclfs/PyClassifier.cc b/pyclfs/PyClassifier.cc index 47f1185..359cae1 100644 --- a/pyclfs/PyClassifier.cc +++ b/pyclfs/PyClassifier.cc @@ -99,13 +99,37 @@ namespace pywrap { Py_XDECREF(incoming); return resultTensor; } + torch::Tensor PyClassifier::predict_proba(torch::Tensor& X) + { + int dimension = X.size(1); + CPyObject Xp; + if (X.dtype() == torch::kInt32) { + auto Xn = tensorInt2numpy(X); + Xp = bp::incref(bp::object(Xn).ptr()); + } else { + auto Xn = tensor2numpy(X); + Xp = bp::incref(bp::object(Xn).ptr()); + } + PyObject* incoming = pyWrap->predict_proba(id, Xp); + bp::handle<> handle(incoming); + bp::object object(handle); + np::ndarray prediction = np::from_object(object); + if (PyErr_Occurred()) { + PyErr_Print(); + throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className); + } + double* data = reinterpret_cast(prediction.get_data()); + std::vector vPrediction(data, data + prediction.shape(0) * prediction.shape(1)); + auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) }); + Py_XDECREF(incoming); + return resultTensor; + } float PyClassifier::score(torch::Tensor& X, torch::Tensor& y) { auto [Xn, yn] = tensors2numpy(X, y); CPyObject Xp = bp::incref(bp::object(Xn).ptr()); CPyObject yp = bp::incref(bp::object(yn).ptr()); - float result = pyWrap->score(id, Xp, yp); - return result; + return pyWrap->score(id, Xp, yp); } void PyClassifier::setHyperparameters(const nlohmann::json& hyperparameters) { diff --git a/pyclfs/PyClassifier.h b/pyclfs/PyClassifier.h index fbe4005..ebd6a27 100644 --- a/pyclfs/PyClassifier.h +++ b/pyclfs/PyClassifier.h @@ -25,7 +25,7 @@ namespace pywrap { PyClassifier& fit(torch::Tensor& dataset, const std::vector& features, const std::string& className, std::map>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override { return *this; }; torch::Tensor predict(torch::Tensor& X) override; std::vector predict(std::vector>& X) override { return std::vector(); }; // Not implemented - torch::Tensor predict_proba(torch::Tensor& X) override { return torch::zeros({ 0, 0 }); } // Not implemented + torch::Tensor predict_proba(torch::Tensor& X) override; std::vector> predict_proba(std::vector>& X) override { return std::vector>(); }; // Not implemented float score(std::vector>& X, std::vector& y) override { return 0.0; }; // Not implemented float score(torch::Tensor& X, torch::Tensor& y) override; diff --git a/pyclfs/PyWrap.cc b/pyclfs/PyWrap.cc index 88a5a9c..79c8b68 100644 --- a/pyclfs/PyWrap.cc +++ b/pyclfs/PyWrap.cc @@ -222,11 +222,19 @@ namespace pywrap { errorAbort(e.what()); } } + PyObject* PyWrap::predict_proba(const clfId_t id, CPyObject& X) + { + return predict_method("predict_proba", id, X); + } PyObject* PyWrap::predict(const clfId_t id, CPyObject& X) + { + return predict_method("predict", id, X); + } + PyObject* PyWrap::predict_method(const std::string name, const clfId_t id, CPyObject& X) { PyObject* instance = getClass(id); PyObject* result; - CPyObject method = PyUnicode_FromString("predict"); + CPyObject method = PyUnicode_FromString(name.c_str()); try { if (!(result = PyObject_CallMethodObjArgs(instance, method.getObject(), X.getObject(), NULL))) errorAbort("Couldn't call method predict"); diff --git a/pyclfs/PyWrap.h b/pyclfs/PyWrap.h index b953ff0..205acb5 100644 --- a/pyclfs/PyWrap.h +++ b/pyclfs/PyWrap.h @@ -31,6 +31,7 @@ namespace pywrap { void setHyperparameters(const clfId_t id, const json& hyperparameters); void fit(const clfId_t id, CPyObject& X, CPyObject& y); PyObject* predict(const clfId_t id, CPyObject& X); + PyObject* predict_proba(const clfId_t id, CPyObject& X); double score(const clfId_t id, CPyObject& X, CPyObject& y); void clean(const clfId_t id); void importClass(const clfId_t id, const std::string& moduleName, const std::string& className); @@ -38,6 +39,7 @@ namespace pywrap { private: // Only call RemoveInstance from clean method static void RemoveInstance(); + PyObject* predict_method(const std::string name, const clfId_t id, CPyObject& X); void errorAbort(const std::string& message); // No need to use static map here, since this class is a singleton std::map> moduleClassMap; diff --git a/tests/TestPythonClassifiers.cc b/tests/TestPythonClassifiers.cc index b1ea6c2..f2079d9 100644 --- a/tests/TestPythonClassifiers.cc +++ b/tests/TestPythonClassifiers.cc @@ -13,8 +13,6 @@ #include "pyclfs/ODTE.h" #include "TestUtils.h" -const std::string ACTUAL_VERSION = "1.0.5"; - TEST_CASE("Test Python Classifiers score", "[PyClassifiers]") { map , float> scores = { @@ -37,15 +35,17 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]") map versions = { {"ODTE", "0.3.6"}, {"STree", "1.3.2"}, - {"SVC", "1.3.2"}, - {"RandomForest", "1.3.2"} + {"SVC", "1.5.0"}, + {"RandomForest", "1.5.0"} }; auto clf = models[name]; SECTION("Test Python Classifier " + name + " score ") { + auto random_state = nlohmann::json::parse("{ \"random_state\": 0 }"); for (std::string file_name : { "glass", "iris", "ecoli", "diabetes" }) { auto raw = RawDatasets(file_name, false); + clf->setHyperparameters(random_state); clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); auto score = clf->score(raw.Xt, raw.yt); INFO("File: " + file_name + " Classifier: " + name + " Score: " + to_string(score)); @@ -82,6 +82,29 @@ TEST_CASE("Classifier with discretized dataset", "[PyClassifiers]") auto score = clf.score(raw.Xt, raw.yt); REQUIRE(score == Catch::Approx(0.96667f).epsilon(raw.epsilon)); } +TEST_CASE("Predict with non_discretized dataset and comparing to predict_proba", "[PyClassifiers]") +{ + auto raw = RawDatasets("iris", false); + auto clf = pywrap::STree(); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto predictions = clf.predict(raw.Xt); + auto probabilities = clf.predict_proba(raw.Xt); + auto preds = probabilities.argmax(1); + auto classNumStates = torch::max(raw.yt).item() + 1; + + REQUIRE(predictions.size(0) == probabilities.size(0)); + REQUIRE(predictions.size(0) == preds.size(0)); + REQUIRE(probabilities.size(1) == classNumStates); + int right = 0; + for (std::size_t i = 0; i < predictions.size(0); ++i) { + if (predictions[i].item() == preds[i].item()) { + right++; + } + REQUIRE(predictions[i].item() == preds[i].item()); + } + auto accuracy = right / static_cast(predictions.size(0)); + REQUIRE(accuracy == Catch::Approx(1.0f).epsilon(raw.epsilon)); +} // TEST_CASE("XGBoost", "[PyClassifiers]") // { // auto raw = RawDatasets("iris", true);