Compare commits

...

2 Commits

Author SHA1 Message Date
830265d91b Fix xgboost error in predict/predict_proba 2025-04-12 17:48:23 +02:00
761f57be6c Update tests 2025-01-09 11:25:19 +01:00
7 changed files with 67 additions and 37 deletions

View File

@@ -137,7 +137,7 @@
include(CMakeParseArguments) include(CMakeParseArguments)
option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE) option(CODE_COVERAGE_VERBOSE "Verbose information" TRUE)
# Check prereqs # Check prereqs
find_program( GCOV_PATH gcov ) find_program( GCOV_PATH gcov )
@@ -160,8 +160,12 @@ foreach(LANG ${LANGUAGES})
endif() endif()
elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU" elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang") AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
if ("${LANG}" MATCHES "CUDA")
message(STATUS "Ignoring CUDA")
else()
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
endif() endif()
endif()
endforeach() endforeach()
set(COVERAGE_COMPILER_FLAGS "-g --coverage" set(COVERAGE_COMPILER_FLAGS "-g --coverage"

View File

@@ -93,12 +93,20 @@ namespace pywrap {
PyErr_Print(); PyErr_Print();
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className); throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
} }
if (xgboost) {
long* data = reinterpret_cast<long*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
} else {
int* data = reinterpret_cast<int*>(prediction.get_data()); int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0)); std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32); auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming); Py_XDECREF(incoming);
return resultTensor; return resultTensor;
} }
}
torch::Tensor PyClassifier::predict_proba(torch::Tensor& X) torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
{ {
int dimension = X.size(1); int dimension = X.size(1);
@@ -118,12 +126,20 @@ namespace pywrap {
PyErr_Print(); PyErr_Print();
throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className); throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
} }
if (xgboost) {
float* data = reinterpret_cast<float*>(prediction.get_data());
std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
} else {
double* data = reinterpret_cast<double*>(prediction.get_data()); double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1)); std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) }); auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming); Py_XDECREF(incoming);
return resultTensor; return resultTensor;
} }
}
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y) float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{ {
auto [Xn, yn] = tensors2numpy(X, y); auto [Xn, yn] = tensors2numpy(X, y);

View File

@@ -49,6 +49,7 @@ namespace pywrap {
nlohmann::json hyperparameters; nlohmann::json hyperparameters;
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {}; void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
std::vector<std::string> notes; std::vector<std::string> notes;
bool xgboost = false;
private: private:
PyWrap* pyWrap; PyWrap* pyWrap;
std::string module; std::string module;

View File

@@ -5,5 +5,6 @@ namespace pywrap {
XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true) XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
{ {
validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" }; validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
xgboost = true;
} }
} /* namespace pywrap */ } /* namespace pywrap */

View File

@@ -3,14 +3,15 @@ if(ENABLE_TESTING)
include_directories( include_directories(
${PyClassifiers_SOURCE_DIR} ${PyClassifiers_SOURCE_DIR}
${PyClassifiers_SOURCE_DIR}/lib/Files ${PyClassifiers_SOURCE_DIR}/lib/Files
${PyClassifiers_SOURCE_DIR}/lib/mdlp ${PyClassifiers_SOURCE_DIR}/lib/mdlp/src
${PyClassifiers_SOURCE_DIR}/lib/json/include ${PyClassifiers_SOURCE_DIR}/lib/json/include
${Python3_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS} ${TORCH_INCLUDE_DIRS}
${CMAKE_BINARY_DIR}/configured_files/include
/usr/local/include /usr/local/include
) )
file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc") file(GLOB_RECURSE PyClassifiers_SOURCES "${PyClassifiers_SOURCE_DIR}/pyclfs/*.cc")
set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES}) set(TEST_SOURCES_PYCLASSIFIERS TestPythonClassifiers.cc TestUtils.cc ${PyClassifiers_SOURCES})
add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS}) add_executable(${TEST_PYCLASSIFIERS} ${TEST_SOURCES_PYCLASSIFIERS})
target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles mdlp Catch2::Catch2WithMain) target_link_libraries(${TEST_PYCLASSIFIERS} PUBLIC "${TORCH_LIBRARIES}" ${Python3_LIBRARIES} ${LIBTORCH_PYTHON} Boost::boost Boost::python Boost::numpy ArffFiles fimdlp Catch2::Catch2WithMain)
endif(ENABLE_TESTING) endif(ENABLE_TESTING)

View File

@@ -33,8 +33,8 @@ TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{"RandomForest", new pywrap::RandomForest()} {"RandomForest", new pywrap::RandomForest()}
}; };
map<std::string, std::string> versions = { map<std::string, std::string> versions = {
{"ODTE", "1.0.0"}, {"ODTE", "1.0.0-1"},
{"STree", "1.3.2"}, {"STree", "1.4.0"},
{"SVC", "1.5.1"}, {"SVC", "1.5.1"},
{"RandomForest", "1.5.1"} {"RandomForest", "1.5.1"}
}; };
@@ -116,23 +116,30 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
clf.setHyperparameters(hyperparameters); clf.setHyperparameters(hyperparameters);
auto score = clf.score(raw.Xt, raw.yt); auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon)); REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
std::cout << "XGBoost score: " << score << std::endl;
}
TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", true);
auto clf = pywrap::XGBoost();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// nlohmann::json hyperparameters = { "n_jobs=1" };
// clf.setHyperparameters(hyperparameters);
auto predict_proba = clf.predict_proba(raw.Xt);
auto predict = clf.predict(raw.Xt);
// std::cout << "Predict proba: " << predict_proba << std::endl;
// std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
// assert(predict.size(0) == predict_proba.size(0));
for (int row = 0; row < predict_proba.size(0); row++) {
// auto sum = 0.0;
// std::cout << "Row " << std::setw(3) << row << ": ";
// for (int col = 0; col < predict_proba.size(1); col++) {
// std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
// sum += predict_proba[row][col].item<double>();
// }
// std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
}
} }
// TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
// {
// auto raw = RawDatasets("iris", true);
// auto clf = pywrap::XGBoost();
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// // nlohmann::json hyperparameters = { "n_jobs=1" };
// // clf.setHyperparameters(hyperparameters);
// auto predict = clf.predict(raw.Xt);
// for (int row = 0; row < predict.size(0); row++) {
// auto sum = 0.0;
// for (int col = 0; col < predict.size(1); col++) {
// std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
// sum += predict[row][col].item<int>();
// }
// std::cout << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
// }
// std::cout << predict << std::endl;
// }