Fix xgboost error in predict/predict_proba

Update tests
2025-04-12 17:48:23 +02:00 · 2025-01-09 11:25:19 +01:00
7 changed files with 67 additions and 37 deletions
--- a/cmake/modules/CodeCoverage.cmake
+++ b/cmake/modules/CodeCoverage.cmake
@@ -137,7 +137,7 @@
 include(CMakeParseArguments)
-option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE)
+option(CODE_COVERAGE_VERBOSE "Verbose information" TRUE)
 # Check prereqs
 find_program( GCOV_PATH gcov )
@@ -160,8 +160,12 @@ foreach(LANG ${LANGUAGES})
    endif()
  elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
         AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
         if ("${LANG}" MATCHES "CUDA")
             message(STATUS "Ignoring CUDA")
        else()
            message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
        endif()
  endif()
 endforeach()
 set(COVERAGE_COMPILER_FLAGS "-g --coverage"
--- a/lib/json
+++ b/lib/json
--- a/pyclfs/PyClassifier.cc
+++ b/pyclfs/PyClassifier.cc
@@ -93,12 +93,20 @@ namespace pywrap {
            PyErr_Print();
            throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
        }
        if (xgboost) {
            long* data = reinterpret_cast<long*>(prediction.get_data());
            std::vector<int> vPrediction(data, data + prediction.shape(0));
            auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
            Py_XDECREF(incoming);
            return resultTensor;
        } else {
            int* data = reinterpret_cast<int*>(prediction.get_data());
            std::vector<int> vPrediction(data, data + prediction.shape(0));
            auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
            Py_XDECREF(incoming);
            return resultTensor;
        }
    }
    torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
    {
        int dimension = X.size(1);
@@ -118,12 +126,20 @@ namespace pywrap {
            PyErr_Print();
            throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
        }
        if (xgboost) {
            float* data = reinterpret_cast<float*>(prediction.get_data());
            std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
            auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
            Py_XDECREF(incoming);
            return resultTensor;
        } else {
            double* data = reinterpret_cast<double*>(prediction.get_data());
            std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
            auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
            Py_XDECREF(incoming);
            return resultTensor;
        }
    }
    float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
    {
        auto [Xn, yn] = tensors2numpy(X, y);
--- a/pyclfs/PyClassifier.h
+++ b/pyclfs/PyClassifier.h
@@ -49,6 +49,7 @@ namespace pywrap {
        nlohmann::json hyperparameters;
        void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
        std::vector<std::string> notes;
        bool xgboost = false;
    private:
        PyWrap* pyWrap;
        std::string module;
--- a/pyclfs/XGBoost.cc
+++ b/pyclfs/XGBoost.cc
@@ -5,5 +5,6 @@ namespace pywrap {
    XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
    {
        validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
        xgboost = true;
    }
 } /* namespace pywrap */
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -3,14 +3,15 @@ if(ENABLE_TESTING)
    include_directories(
        ${PyClassifiers_SOURCE_DIR}
        ${PyClassifiers_SOURCE_DIR}/lib/Files
-        ${PyClassifiers_SOURCE_DIR}/lib/mdlp
+        ${PyClassifiers_SOURCE_DIR}/lib/mdlp/src
        ${PyClassifiers_SOURCE_DIR}/lib/json/include
        ${Python3_INCLUDE_DIRS}
        ${TORCH_INCLUDE_DIRS}
        ${CMAKE_BINARY_DIR}/configured_files/include
        /usr/local/include
    )
    file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc")
    set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES})
    add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS})
-    target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles mdlp Catch2::Catch2WithMain)
+    target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles fimdlp Catch2::Catch2WithMain)
 endif(ENABLE_TESTING)
--- a/tests/TestPythonClassifiers.cc
+++ b/tests/TestPythonClassifiers.cc
@@ -33,8 +33,8 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
        {"RandomForest", new pywrap::RandomForest()}
    };
    map<std::string, std::string> versions = {
-        {"ODTE", "1.0.0"},
+        {"ODTE", "1.0.0-1"},
-        {"STree", "1.3.2"},
+        {"STree", "1.4.0"},
        {"SVC", "1.5.1"},
        {"RandomForest", "1.5.1"}
    };
@@ -116,23 +116,30 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
    clf.setHyperparameters(hyperparameters);
    auto score = clf.score(raw.Xt, raw.yt);
    REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
    std::cout << "XGBoost score: " << score << std::endl;
 }
 TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
 {
    auto raw = RawDatasets("iris", true);
    auto clf = pywrap::XGBoost();
    clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
    // nlohmann::json hyperparameters = { "n_jobs=1" };
    // clf.setHyperparameters(hyperparameters);
    auto predict_proba = clf.predict_proba(raw.Xt);
    auto predict = clf.predict(raw.Xt);
    // std::cout << "Predict proba: " << predict_proba << std::endl;
    // std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
    // assert(predict.size(0) == predict_proba.size(0));
    for (int row = 0; row < predict_proba.size(0); row++) {
        // auto sum = 0.0;
        // std::cout << "Row " << std::setw(3) << row << ": ";
        // for (int col = 0; col < predict_proba.size(1); col++) {
        //     std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
        //     sum += predict_proba[row][col].item<double>();
        // }
        // std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
        //     // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
        REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
        REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
    }
 }
 // TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
 // {
 //     auto raw = RawDatasets("iris", true);
 //     auto clf = pywrap::XGBoost();
 //     clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
 //     // nlohmann::json hyperparameters = { "n_jobs=1" };
 //     // clf.setHyperparameters(hyperparameters);
 //     auto predict = clf.predict(raw.Xt);
 //     for (int row = 0; row < predict.size(0); row++) {
 //         auto sum = 0.0;
 //         for (int col = 0; col < predict.size(1); col++) {
 //             std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
 //             sum += predict[row][col].item<int>();
 //         }
 //         std::cout << std::endl;
 //         // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
 //     }
 //     std::cout << predict << std::endl;
 // }
Author	SHA1	Message	Date
Ricardo Montañana Gómez	830265d91b	Fix xgboost error in predict/predict_proba	2025-04-12 17:48:23 +02:00
Ricardo Montañana Gómez	761f57be6c	Update tests	2025-01-09 11:25:19 +01:00