#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do #include #include #include #include #include #include #include #include "pyclfs/STree.h" #include "pyclfs/SVC.h" #include "pyclfs/RandomForest.h" #include "pyclfs/XGBoost.h" #include "pyclfs/ODTE.h" #include "TestUtils.h" TEST_CASE("Test Python Classifiers score", "[PyClassifiers]") { map , float> scores = { // Diabetes {{"diabetes", "STree"}, 0.81641}, {{"diabetes", "ODTE"}, 0.84635}, {{"diabetes", "SVC"}, 0.76823}, {{"diabetes", "RandomForest"}, 1.0}, // Ecoli {{"ecoli", "STree"}, 0.8125}, {{"ecoli", "ODTE"}, 0.84821}, {{"ecoli", "SVC"}, 0.89583}, {{"ecoli", "RandomForest"}, 1.0}, // Glass {{"glass", "STree"}, 0.57009}, {{"glass", "ODTE"}, 0.77103}, {{"glass", "SVC"}, 0.35514}, {{"glass", "RandomForest"}, 1.0}, // Iris {{"iris", "STree"}, 0.99333}, {{"iris", "ODTE"}, 0.98667}, {{"iris", "SVC"}, 0.97333}, {{"iris", "RandomForest"}, 1.0}, }; std::string name = GENERATE("ODTE", "STree", "SVC", "RandomForest"); map models = { {"ODTE", new pywrap::ODTE()}, {"STree", new pywrap::STree()}, {"SVC", new pywrap::SVC()}, {"RandomForest", new pywrap::RandomForest()} }; map versions = { {"ODTE", "0.3.6"}, {"STree", "1.3.2"}, {"SVC", "1.5.0"}, {"RandomForest", "1.5.0"} }; auto clf = models[name]; SECTION("Test Python Classifier " + name + " score ") { auto random_state = nlohmann::json::parse("{ \"random_state\": 0 }"); for (std::string file_name : { "glass", "iris", "ecoli", "diabetes" }) { auto raw = RawDatasets(file_name, false); clf->setHyperparameters(random_state); clf->fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); auto score = clf->score(raw.Xt, raw.yt); INFO("File: " + file_name + " Classifier: " + name + " Score: " + to_string(score)); REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon)); } } SECTION("Library check version") { INFO("Checking version of " + name + " classifier"); REQUIRE(clf->getVersion() == versions[name]); } } TEST_CASE("Classifiers features", "[PyClassifiers]") { auto raw = RawDatasets("iris", false); auto clf = pywrap::STree(); clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfEdges() == 3); } TEST_CASE("Get num features & num edges", "[PyClassifiers]") { auto raw = RawDatasets("iris", false); auto clf = pywrap::ODTE(); clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); REQUIRE(clf.getNumberOfNodes() == 50); REQUIRE(clf.getNumberOfEdges() == 30); } TEST_CASE("Classifier with discretized dataset", "[PyClassifiers]") { auto raw = RawDatasets("iris", true); auto clf = pywrap::SVC(); clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); auto score = clf.score(raw.Xt, raw.yt); REQUIRE(score == Catch::Approx(0.96667f).epsilon(raw.epsilon)); } TEST_CASE("Predict with non_discretized dataset and comparing to predict_proba", "[PyClassifiers]") { auto raw = RawDatasets("iris", false); auto clf = pywrap::STree(); clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); auto predictions = clf.predict(raw.Xt); auto probabilities = clf.predict_proba(raw.Xt); auto preds = probabilities.argmax(1); auto classNumStates = torch::max(raw.yt).item() + 1; REQUIRE(predictions.size(0) == probabilities.size(0)); REQUIRE(predictions.size(0) == preds.size(0)); REQUIRE(probabilities.size(1) == classNumStates); int right = 0; for (std::size_t i = 0; i < predictions.size(0); ++i) { if (predictions[i].item() == preds[i].item()) { right++; } REQUIRE(predictions[i].item() == preds[i].item()); } auto accuracy = right / static_cast(predictions.size(0)); REQUIRE(accuracy == Catch::Approx(1.0f).epsilon(raw.epsilon)); } TEST_CASE("XGBoost", "[PyClassifiers]") { auto raw = RawDatasets("iris", true); auto clf = pywrap::XGBoost(); clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); nlohmann::json hyperparameters = { "n_jobs=1" }; clf.setHyperparameters(hyperparameters); auto score = clf.score(raw.Xt, raw.yt); REQUIRE(score == Catch::Approx(0.98).epsilon(raw.epsilon)); }