Fix xgboost error in predict/predict_proba

2025-04-12 17:48:23 +02:00
parent 761f57be6c
commit 830265d91b
4 changed files with 55 additions and 30 deletions
--- a/pyclfs/PyClassifier.cc
+++ b/pyclfs/PyClassifier.cc
@@ -93,11 +93,19 @@ namespace pywrap {
            PyErr_Print();
            throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
        }
-        int* data = reinterpret_cast<int*>(prediction.get_data());
-        std::vector<int> vPrediction(data, data + prediction.shape(0));
-        auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
-        Py_XDECREF(incoming);
-        return resultTensor;
+        if (xgboost) {
+            long* data = reinterpret_cast<long*>(prediction.get_data());
+            std::vector<int> vPrediction(data, data + prediction.shape(0));
+            auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
+            Py_XDECREF(incoming);
+            return resultTensor;
+        } else {
+            int* data = reinterpret_cast<int*>(prediction.get_data());
+            std::vector<int> vPrediction(data, data + prediction.shape(0));
+            auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
+            Py_XDECREF(incoming);
+            return resultTensor;
+        }
    }
    torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
    {
@@ -118,11 +126,19 @@ namespace pywrap {
            PyErr_Print();
            throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
        }
-        double* data = reinterpret_cast<double*>(prediction.get_data());
-        std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
-        auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
-        Py_XDECREF(incoming);
-        return resultTensor;
+        if (xgboost) {
+            float* data = reinterpret_cast<float*>(prediction.get_data());
+            std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
+            auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
+            Py_XDECREF(incoming);
+            return resultTensor;
+        } else {
+            double* data = reinterpret_cast<double*>(prediction.get_data());
+            std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
+            auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
+            Py_XDECREF(incoming);
+            return resultTensor;
+        }
    }
    float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
    {
@@ -135,4 +151,4 @@ namespace pywrap {
    {
        this->hyperparameters = hyperparameters;
    }
-} /* namespace pywrap */
+} /* namespace pywrap */
--- a/pyclfs/PyClassifier.h
+++ b/pyclfs/PyClassifier.h
@@ -49,6 +49,7 @@ namespace pywrap {
        nlohmann::json hyperparameters;
        void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
        std::vector<std::string> notes;
+        bool xgboost = false;
    private:
        PyWrap* pyWrap;
        std::string module;
--- a/pyclfs/XGBoost.cc
+++ b/pyclfs/XGBoost.cc
@@ -5,5 +5,6 @@ namespace pywrap {
    XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
    {
        validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
+        xgboost = true;
    }
 } /* namespace pywrap */
--- a/tests/TestPythonClassifiers.cc
+++ b/tests/TestPythonClassifiers.cc
@@ -116,23 +116,30 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
    clf.setHyperparameters(hyperparameters);
    auto score = clf.score(raw.Xt, raw.yt);
    REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
+    std::cout << "XGBoost score: " << score << std::endl;
 }
-// TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
-// {
-//     auto raw = RawDatasets("iris", true);
-//     auto clf = pywrap::XGBoost();
-//     clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
-//     // nlohmann::json hyperparameters = { "n_jobs=1" };
-//     // clf.setHyperparameters(hyperparameters);
-//     auto predict = clf.predict(raw.Xt);
-//     for (int row = 0; row < predict.size(0); row++) {
-//         auto sum = 0.0;
-//         for (int col = 0; col < predict.size(1); col++) {
-//             std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
-//             sum += predict[row][col].item<int>();
-//         }
-//         std::cout << std::endl;
-//         // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
-//     }
-//     std::cout << predict << std::endl;
-// }
+TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
+{
+    auto raw = RawDatasets("iris", true);
+    auto clf = pywrap::XGBoost();
+    clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
+    // nlohmann::json hyperparameters = { "n_jobs=1" };
+    // clf.setHyperparameters(hyperparameters);
+    auto predict_proba = clf.predict_proba(raw.Xt);
+    auto predict = clf.predict(raw.Xt);
+    // std::cout << "Predict proba: " << predict_proba << std::endl;
+    // std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
+    // assert(predict.size(0) == predict_proba.size(0));
+    for (int row = 0; row < predict_proba.size(0); row++) {
+        // auto sum = 0.0;
+        // std::cout << "Row " << std::setw(3) << row << ": ";
+        // for (int col = 0; col < predict_proba.size(1); col++) {
+        //     std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
+        //     sum += predict_proba[row][col].item<double>();
+        // }
+        // std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
+        //     // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
+        REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
+        REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
+    }
+}