Fix xgboost error in predict/predict_proba

This commit is contained in:
2025-04-12 17:48:23 +02:00
parent 761f57be6c
commit 830265d91b
4 changed files with 55 additions and 30 deletions

View File

@@ -93,11 +93,19 @@ namespace pywrap {
PyErr_Print();
throw std::runtime_error("Error creating object for predict in " + module + " and class " + className);
}
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
if (xgboost) {
long* data = reinterpret_cast<long*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
} else {
int* data = reinterpret_cast<int*>(prediction.get_data());
std::vector<int> vPrediction(data, data + prediction.shape(0));
auto resultTensor = torch::tensor(vPrediction, torch::kInt32);
Py_XDECREF(incoming);
return resultTensor;
}
}
torch::Tensor PyClassifier::predict_proba(torch::Tensor& X)
{
@@ -118,11 +126,19 @@ namespace pywrap {
PyErr_Print();
throw std::runtime_error("Error creating object for predict_proba in " + module + " and class " + className);
}
double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
if (xgboost) {
float* data = reinterpret_cast<float*>(prediction.get_data());
std::vector<float> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
} else {
double* data = reinterpret_cast<double*>(prediction.get_data());
std::vector<double> vPrediction(data, data + prediction.shape(0) * prediction.shape(1));
auto resultTensor = torch::tensor(vPrediction, torch::kFloat64).reshape({ prediction.shape(0), prediction.shape(1) });
Py_XDECREF(incoming);
return resultTensor;
}
}
float PyClassifier::score(torch::Tensor& X, torch::Tensor& y)
{
@@ -135,4 +151,4 @@ namespace pywrap {
{
this->hyperparameters = hyperparameters;
}
} /* namespace pywrap */
} /* namespace pywrap */

View File

@@ -49,6 +49,7 @@ namespace pywrap {
nlohmann::json hyperparameters;
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing = bayesnet::Smoothing_t::NONE) override {};
std::vector<std::string> notes;
bool xgboost = false;
private:
PyWrap* pyWrap;
std::string module;

View File

@@ -5,5 +5,6 @@ namespace pywrap {
XGBoost::XGBoost() : PyClassifier("xgboost", "XGBClassifier", true)
{
validHyperparameters = { "tree_method", "early_stopping_rounds", "n_jobs" };
xgboost = true;
}
} /* namespace pywrap */

View File

@@ -116,23 +116,30 @@ TEST_CASE("XGBoost", "[PyClassifiers]")
clf.setHyperparameters(hyperparameters);
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
std::cout << "XGBoost score: " << score << std::endl;
}
// TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
// {
// auto raw = RawDatasets("iris", true);
// auto clf = pywrap::XGBoost();
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// // nlohmann::json hyperparameters = { "n_jobs=1" };
// // clf.setHyperparameters(hyperparameters);
// auto predict = clf.predict(raw.Xt);
// for (int row = 0; row < predict.size(0); row++) {
// auto sum = 0.0;
// for (int col = 0; col < predict.size(1); col++) {
// std::cout << std::setw(12) << std::setprecision(10) << predict[row][col].item<double>() << " ";
// sum += predict[row][col].item<int>();
// }
// std::cout << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
// }
// std::cout << predict << std::endl;
// }
TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", true);
auto clf = pywrap::XGBoost();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// nlohmann::json hyperparameters = { "n_jobs=1" };
// clf.setHyperparameters(hyperparameters);
auto predict_proba = clf.predict_proba(raw.Xt);
auto predict = clf.predict(raw.Xt);
// std::cout << "Predict proba: " << predict_proba << std::endl;
// std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
// assert(predict.size(0) == predict_proba.size(0));
for (int row = 0; row < predict_proba.size(0); row++) {
// auto sum = 0.0;
// std::cout << "Row " << std::setw(3) << row << ": ";
// for (int col = 0; col < predict_proba.size(1); col++) {
// std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
// sum += predict_proba[row][col].item<double>();
// }
// std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
}
}