Files
Pyclassifiers/tests/TestPythonClassifiers.cc

156 lines
6.8 KiB
C++

#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
#include <vector>
#include <map>
#include <string>
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <nlohmann/json.hpp>
#include "pyclfs/STree.h"
#include "pyclfs/SVC.h"
#include "pyclfs/RandomForest.h"
#include "pyclfs/XGBoost.h"
#include "pyclfs/AdaBoostPy.h"
#include "pyclfs/ODTE.h"
#include "TestUtils.h"
#include <iostream>
TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
{
map <pair<std::string, std::string>, float> scores = {
// Diabetes
{{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.856770813f}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
// Ecoli
{{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0},
// Glass
{{"glass", "STree"}, 0.57009}, {{"glass", "ODTE"}, 0.76168227}, {{"glass", "SVC"}, 0.35514}, {{"glass", "RandomForest"}, 1.0},
// Iris
{{"iris", "STree"}, 0.99333}, {{"iris", "ODTE"}, 0.98667}, {{"iris", "SVC"}, 0.97333}, {{"iris", "RandomForest"}, 1.0},
};
std::string name = GENERATE("ODTE", "STree", "SVC", "RandomForest");
map<std::string, pywrap::PyClassifier*> models = {
{"ODTE", new pywrap::ODTE()},
{"STree", new pywrap::STree()},
{"SVC", new pywrap::SVC()},
{"RandomForest", new pywrap::RandomForest()}
};
map<std::string, std::string> versions = {
{"ODTE", "1.0.0-1"},
{"STree", "1.4.0"},
{"SVC", "1.5.2"},
{"RandomForest", "1.5.2"}
};
auto clf = models[name];
SECTION("Test Python Classifier " + name + " score ")
{
auto random_state = nlohmann::json::parse("{ \"random_state\": 0 }");
for (std::string file_name : { "glass", "iris", "ecoli", "diabetes" }) {
auto raw = RawDatasets(file_name, false);
clf->setHyperparameters(random_state);
clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf->score(raw.Xt, raw.yt);
INFO("File: " + file_name + " Classifier: " + name + " Score: " + to_string(score));
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
}
}
SECTION("Library check version")
{
INFO("Checking version of " + name + " classifier");
REQUIRE(clf->getVersion() == versions[name]);
}
}
TEST_CASE("AdaBoostClassifier", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
auto clf = pywrap::AdaBoostPy();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
clf.setHyperparameters(nlohmann::json::parse("{ \"n_estimators\": 100 }"));
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.9599999f).epsilon(raw.epsilon));
}
TEST_CASE("Classifiers features", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
auto clf = pywrap::STree();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 3);
}
TEST_CASE("Get num features & num edges", "[PyClassifiers]")
{
auto estimators = nlohmann::json::parse("{ \"n_estimators\": 10 }");
auto raw = RawDatasets("iris", false);
auto clf = pywrap::ODTE();
clf.setHyperparameters(estimators);
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
REQUIRE(clf.getNumberOfNodes() == 50);
REQUIRE(clf.getNumberOfEdges() == 30);
}
TEST_CASE("Classifier with discretized dataset", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", true);
auto clf = pywrap::SVC();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.96667f).epsilon(raw.epsilon));
}
TEST_CASE("Predict with non_discretized dataset and comparing to predict_proba", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", false);
auto clf = pywrap::STree();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto predictions = clf.predict(raw.Xt);
auto probabilities = clf.predict_proba(raw.Xt);
auto preds = probabilities.argmax(1);
auto classNumStates = torch::max(raw.yt).item<int>() + 1;
REQUIRE(predictions.size(0) == probabilities.size(0));
REQUIRE(predictions.size(0) == preds.size(0));
REQUIRE(probabilities.size(1) == classNumStates);
int right = 0;
for (std::size_t i = 0; i < predictions.size(0); ++i) {
if (predictions[i].item<int>() == preds[i].item<int>()) {
right++;
}
REQUIRE(predictions[i].item<int>() == preds[i].item<int>());
}
auto accuracy = right / static_cast<float>(predictions.size(0));
REQUIRE(accuracy == Catch::Approx(1.0f).epsilon(raw.epsilon));
}
TEST_CASE("XGBoost", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", true);
auto clf = pywrap::XGBoost();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
nlohmann::json hyperparameters = { "n_jobs=1" };
clf.setHyperparameters(hyperparameters);
auto score = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
std::cout << "XGBoost score: " << score << std::endl;
}
TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
{
auto raw = RawDatasets("iris", true);
auto clf = pywrap::XGBoost();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// nlohmann::json hyperparameters = { "n_jobs=1" };
// clf.setHyperparameters(hyperparameters);
auto predict_proba = clf.predict_proba(raw.Xt);
auto predict = clf.predict(raw.Xt);
// std::cout << "Predict proba: " << predict_proba << std::endl;
// std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
// assert(predict.size(0) == predict_proba.size(0));
for (int row = 0; row < predict_proba.size(0); row++) {
// auto sum = 0.0;
// std::cout << "Row " << std::setw(3) << row << ": ";
// for (int col = 0; col < predict_proba.size(1); col++) {
// std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
// sum += predict_proba[row][col].item<double>();
// }
// std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
}
}