156 lines
6.8 KiB
C++
156 lines
6.8 KiB
C++
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
|
|
#include <vector>
|
|
#include <map>
|
|
#include <string>
|
|
#include <catch2/catch_test_macros.hpp>
|
|
#include <catch2/catch_approx.hpp>
|
|
#include <catch2/generators/catch_generators.hpp>
|
|
#include <nlohmann/json.hpp>
|
|
#include "pyclfs/STree.h"
|
|
#include "pyclfs/SVC.h"
|
|
#include "pyclfs/RandomForest.h"
|
|
#include "pyclfs/XGBoost.h"
|
|
#include "pyclfs/AdaBoostPy.h"
|
|
#include "pyclfs/ODTE.h"
|
|
#include "TestUtils.h"
|
|
#include <iostream>
|
|
|
|
TEST_CASE("Test Python Classifiers score", "[PyClassifiers]")
|
|
{
|
|
map <pair<std::string, std::string>, float> scores = {
|
|
// Diabetes
|
|
{{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.856770813f}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0},
|
|
// Ecoli
|
|
{{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.875}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0},
|
|
// Glass
|
|
{{"glass", "STree"}, 0.57009}, {{"glass", "ODTE"}, 0.76168227}, {{"glass", "SVC"}, 0.35514}, {{"glass", "RandomForest"}, 1.0},
|
|
// Iris
|
|
{{"iris", "STree"}, 0.99333}, {{"iris", "ODTE"}, 0.98667}, {{"iris", "SVC"}, 0.97333}, {{"iris", "RandomForest"}, 1.0},
|
|
};
|
|
std::string name = GENERATE("ODTE", "STree", "SVC", "RandomForest");
|
|
map<std::string, pywrap::PyClassifier*> models = {
|
|
{"ODTE", new pywrap::ODTE()},
|
|
{"STree", new pywrap::STree()},
|
|
{"SVC", new pywrap::SVC()},
|
|
{"RandomForest", new pywrap::RandomForest()}
|
|
};
|
|
map<std::string, std::string> versions = {
|
|
{"ODTE", "1.0.0-1"},
|
|
{"STree", "1.4.0"},
|
|
{"SVC", "1.5.2"},
|
|
{"RandomForest", "1.5.2"}
|
|
};
|
|
auto clf = models[name];
|
|
|
|
SECTION("Test Python Classifier " + name + " score ")
|
|
{
|
|
auto random_state = nlohmann::json::parse("{ \"random_state\": 0 }");
|
|
for (std::string file_name : { "glass", "iris", "ecoli", "diabetes" }) {
|
|
auto raw = RawDatasets(file_name, false);
|
|
clf->setHyperparameters(random_state);
|
|
clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
|
auto score = clf->score(raw.Xt, raw.yt);
|
|
INFO("File: " + file_name + " Classifier: " + name + " Score: " + to_string(score));
|
|
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
|
}
|
|
}
|
|
SECTION("Library check version")
|
|
{
|
|
INFO("Checking version of " + name + " classifier");
|
|
REQUIRE(clf->getVersion() == versions[name]);
|
|
}
|
|
}
|
|
TEST_CASE("AdaBoostClassifier", "[PyClassifiers]")
|
|
{
|
|
auto raw = RawDatasets("iris", false);
|
|
auto clf = pywrap::AdaBoostPy();
|
|
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
|
clf.setHyperparameters(nlohmann::json::parse("{ \"n_estimators\": 100 }"));
|
|
auto score = clf.score(raw.Xt, raw.yt);
|
|
REQUIRE(score == Catch::Approx(0.9599999f).epsilon(raw.epsilon));
|
|
}
|
|
TEST_CASE("Classifiers features", "[PyClassifiers]")
|
|
{
|
|
auto raw = RawDatasets("iris", false);
|
|
auto clf = pywrap::STree();
|
|
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
|
REQUIRE(clf.getNumberOfEdges() == 3);
|
|
}
|
|
TEST_CASE("Get num features & num edges", "[PyClassifiers]")
|
|
{
|
|
auto estimators = nlohmann::json::parse("{ \"n_estimators\": 10 }");
|
|
auto raw = RawDatasets("iris", false);
|
|
auto clf = pywrap::ODTE();
|
|
clf.setHyperparameters(estimators);
|
|
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
|
REQUIRE(clf.getNumberOfNodes() == 50);
|
|
REQUIRE(clf.getNumberOfEdges() == 30);
|
|
}
|
|
TEST_CASE("Classifier with discretized dataset", "[PyClassifiers]")
|
|
{
|
|
auto raw = RawDatasets("iris", true);
|
|
auto clf = pywrap::SVC();
|
|
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
|
auto score = clf.score(raw.Xt, raw.yt);
|
|
REQUIRE(score == Catch::Approx(0.96667f).epsilon(raw.epsilon));
|
|
}
|
|
TEST_CASE("Predict with non_discretized dataset and comparing to predict_proba", "[PyClassifiers]")
|
|
{
|
|
auto raw = RawDatasets("iris", false);
|
|
auto clf = pywrap::STree();
|
|
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
|
auto predictions = clf.predict(raw.Xt);
|
|
auto probabilities = clf.predict_proba(raw.Xt);
|
|
auto preds = probabilities.argmax(1);
|
|
auto classNumStates = torch::max(raw.yt).item<int>() + 1;
|
|
|
|
REQUIRE(predictions.size(0) == probabilities.size(0));
|
|
REQUIRE(predictions.size(0) == preds.size(0));
|
|
REQUIRE(probabilities.size(1) == classNumStates);
|
|
int right = 0;
|
|
for (std::size_t i = 0; i < predictions.size(0); ++i) {
|
|
if (predictions[i].item<int>() == preds[i].item<int>()) {
|
|
right++;
|
|
}
|
|
REQUIRE(predictions[i].item<int>() == preds[i].item<int>());
|
|
}
|
|
auto accuracy = right / static_cast<float>(predictions.size(0));
|
|
REQUIRE(accuracy == Catch::Approx(1.0f).epsilon(raw.epsilon));
|
|
}
|
|
TEST_CASE("XGBoost", "[PyClassifiers]")
|
|
{
|
|
auto raw = RawDatasets("iris", true);
|
|
auto clf = pywrap::XGBoost();
|
|
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
|
nlohmann::json hyperparameters = { "n_jobs=1" };
|
|
clf.setHyperparameters(hyperparameters);
|
|
auto score = clf.score(raw.Xt, raw.yt);
|
|
REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon));
|
|
std::cout << "XGBoost score: " << score << std::endl;
|
|
}
|
|
TEST_CASE("XGBoost predict proba", "[PyClassifiers]")
|
|
{
|
|
auto raw = RawDatasets("iris", true);
|
|
auto clf = pywrap::XGBoost();
|
|
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
|
|
// nlohmann::json hyperparameters = { "n_jobs=1" };
|
|
// clf.setHyperparameters(hyperparameters);
|
|
auto predict_proba = clf.predict_proba(raw.Xt);
|
|
auto predict = clf.predict(raw.Xt);
|
|
// std::cout << "Predict proba: " << predict_proba << std::endl;
|
|
// std::cout << "Predict proba size: " << predict_proba.sizes() << std::endl;
|
|
// assert(predict.size(0) == predict_proba.size(0));
|
|
for (int row = 0; row < predict_proba.size(0); row++) {
|
|
// auto sum = 0.0;
|
|
// std::cout << "Row " << std::setw(3) << row << ": ";
|
|
// for (int col = 0; col < predict_proba.size(1); col++) {
|
|
// std::cout << std::setw(9) << std::fixed << std::setprecision(7) << predict_proba[row][col].item<double>() << " ";
|
|
// sum += predict_proba[row][col].item<double>();
|
|
// }
|
|
// std::cout << " -> " << std::setw(9) << std::fixed << std::setprecision(7) << sum << " -> " << torch::argmax(predict_proba[row]).item<int>() << " = " << predict[row].item<int>() << std::endl;
|
|
// // REQUIRE(sum == Catch::Approx(1.0).epsilon(raw.epsilon));
|
|
REQUIRE(torch::argmax(predict_proba[row]).item<int>() == predict[row].item<int>());
|
|
REQUIRE(torch::sum(predict_proba[row]).item<double>() == Catch::Approx(1.0).epsilon(raw.epsilon));
|
|
}
|
|
} |