Add DecisionTree with tests

2025-06-17 13:48:11 +02:00
parent 8c413a1eb0
commit 023d5613b4
16 changed files with 1272 additions and 81 deletions
--- a/tests/TestScores.cpp
+++ b/tests/TestScores.cpp
@@ -14,38 +14,40 @@
 using json = nlohmann::ordered_json;
 auto epsilon = 1e-4;

-void make_test_bin(int TP, int TN, int FP, int FN, std::vector<int>& y_test, std::vector<int>& y_pred)
+void make_test_bin(int TP, int TN, int FP, int FN, std::vector<int>& y_test, torch::Tensor& y_pred)
 {
-    // TP
+    std::vector<std::array<double, 2>> probs;
+    // TP: true positive (label 1, predicted 1)
    for (int i = 0; i < TP; i++) {
        y_test.push_back(1);
-        y_pred.push_back(1);
+        probs.push_back({ 0.0, 1.0 }); // P(class 0)=0, P(class 1)=1
    }
-    // TN
+    // TN: true negative (label 0, predicted 0)
    for (int i = 0; i < TN; i++) {
        y_test.push_back(0);
-        y_pred.push_back(0);
+        probs.push_back({ 1.0, 0.0 }); // P(class 0)=1, P(class 1)=0
    }
-    // FP
+    // FP: false positive (label 0, predicted 1)
    for (int i = 0; i < FP; i++) {
        y_test.push_back(0);
-        y_pred.push_back(1);
+        probs.push_back({ 0.0, 1.0 }); // P(class 0)=0, P(class 1)=1
    }
-    // FN
+    // FN: false negative (label 1, predicted 0)
    for (int i = 0; i < FN; i++) {
        y_test.push_back(1);
-        y_pred.push_back(0);
+        probs.push_back({ 1.0, 0.0 }); // P(class 0)=1, P(class 1)=0
    }
+    // Convert to torch::Tensor of double, shape [N,2]
+    y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 2 }, torch::kFloat64).clone();
 }

 TEST_CASE("Scores binary", "[Scores]")
 {
    std::vector<int> y_test;
-    std::vector<int> y_pred;
+    torch::Tensor y_pred;
    make_test_bin(197, 210, 52, 41, y_test, y_pred);
    auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
-    auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
-    platform::Scores scores(y_test_tensor, y_pred_tensor, 2);
+    platform::Scores scores(y_test_tensor, y_pred, 2);
    REQUIRE(scores.accuracy() == Catch::Approx(0.814).epsilon(epsilon));
    REQUIRE(scores.f1_score(0) == Catch::Approx(0.818713));
    REQUIRE(scores.f1_score(1) == Catch::Approx(0.809035));
@@ -64,10 +66,23 @@ TEST_CASE("Scores binary", "[Scores]")
 TEST_CASE("Scores multiclass", "[Scores]")
 {
    std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
-    std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
+    // Refactor y_pred to a tensor of shape [10, 3] with probabilities
+    std::vector<std::array<double, 3>> probs = {
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
+    };
+    torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
+    // Convert y_test to a tensor
    auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
-    auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
-    platform::Scores scores(y_test_tensor, y_pred_tensor, 3);
+    platform::Scores scores(y_test_tensor, y_pred, 3);
    REQUIRE(scores.accuracy() == Catch::Approx(0.6).epsilon(epsilon));
    REQUIRE(scores.f1_score(0) == Catch::Approx(0.666667));
    REQUIRE(scores.f1_score(1) == Catch::Approx(0.4));
@@ -84,10 +99,21 @@ TEST_CASE("Scores multiclass", "[Scores]")
 TEST_CASE("Test Confusion Matrix Values", "[Scores]")
 {
    std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
-    std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
+    std::vector<std::array<double, 3>> probs = {
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
+    };
+    torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
    auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
-    auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
-    platform::Scores scores(y_test_tensor, y_pred_tensor, 3);
+    platform::Scores scores(y_test_tensor, y_pred, 3);
    auto confusion_matrix = scores.get_confusion_matrix();
    REQUIRE(confusion_matrix[0][0].item<int>() == 2);
    REQUIRE(confusion_matrix[0][1].item<int>() == 1);
@@ -102,11 +128,22 @@ TEST_CASE("Test Confusion Matrix Values", "[Scores]")
 TEST_CASE("Confusion Matrix JSON", "[Scores]")
 {
    std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
-    std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
+    std::vector<std::array<double, 3>> probs = {
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
+    };
+    torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
    auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
-    auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
    std::vector<std::string> labels = { "Aeroplane", "Boat", "Car" };
-    platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
+    platform::Scores scores(y_test_tensor, y_pred, 3, labels);
    auto res_json_int = scores.get_confusion_matrix_json();
    REQUIRE(res_json_int[0][0] == 2);
    REQUIRE(res_json_int[0][1] == 1);
@@ -131,11 +168,22 @@ TEST_CASE("Confusion Matrix JSON", "[Scores]")
 TEST_CASE("Classification Report", "[Scores]")
 {
    std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
-    std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
+    std::vector<std::array<double, 3>> probs = {
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
+    };
+    torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
    auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
-    auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
    std::vector<std::string> labels = { "Aeroplane", "Boat", "Car" };
-    platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
+    platform::Scores scores(y_test_tensor, y_pred, 3, labels);
    auto report = scores.classification_report(Colors::BLUE(), "train");
    auto json_matrix = scores.get_confusion_matrix_json(true);
    platform::Scores scores2(json_matrix);
@@ -144,11 +192,22 @@ TEST_CASE("Classification Report", "[Scores]")
 TEST_CASE("JSON constructor", "[Scores]")
 {
    std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
-    std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
+    std::vector<std::array<double, 3>> probs = {
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
+    };
+    torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
    auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
-    auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
    std::vector<std::string> labels = { "Car", "Boat", "Aeroplane" };
-    platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
+    platform::Scores scores(y_test_tensor, y_pred, 3, labels);
    auto res_json_int = scores.get_confusion_matrix_json();
    platform::Scores scores2(res_json_int);
    REQUIRE(scores.accuracy() == scores2.accuracy());
@@ -173,17 +232,14 @@ TEST_CASE("JSON constructor", "[Scores]")
 TEST_CASE("Aggregate", "[Scores]")
 {
    std::vector<int> y_test;
-    std::vector<int> y_pred;
+    torch::Tensor y_pred;
    make_test_bin(197, 210, 52, 41, y_test, y_pred);
    auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
-    auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
-    platform::Scores scores(y_test_tensor, y_pred_tensor, 2);
+    platform::Scores scores(y_test_tensor, y_pred, 2);
    y_test.clear();
-    y_pred.clear();
    make_test_bin(227, 187, 39, 47, y_test, y_pred);
    auto y_test_tensor2 = torch::tensor(y_test, torch::kInt32);
-    auto y_pred_tensor2 = torch::tensor(y_pred, torch::kInt32);
-    platform::Scores scores2(y_test_tensor2, y_pred_tensor2, 2);
+    platform::Scores scores2(y_test_tensor2, y_pred, 2);
    scores.aggregate(scores2);
    REQUIRE(scores.accuracy() == Catch::Approx(0.821).epsilon(epsilon));
    REQUIRE(scores.f1_score(0) == Catch::Approx(0.8160329));
@@ -195,11 +251,9 @@ TEST_CASE("Aggregate", "[Scores]")
    REQUIRE(scores.f1_weighted() == Catch::Approx(0.8209856));
    REQUIRE(scores.f1_macro() == Catch::Approx(0.8208694));
    y_test.clear();
-    y_pred.clear();
    make_test_bin(197 + 227, 210 + 187, 52 + 39, 41 + 47, y_test, y_pred);
    y_test_tensor = torch::tensor(y_test, torch::kInt32);
-    y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
-    platform::Scores scores3(y_test_tensor, y_pred_tensor, 2);
+    platform::Scores scores3(y_test_tensor, y_pred, 2);
    for (int i = 0; i < 2; ++i) {
        REQUIRE(scores3.f1_score(i) == scores.f1_score(i));
        REQUIRE(scores3.precision(i) == scores.precision(i));
@@ -212,11 +266,22 @@ TEST_CASE("Aggregate", "[Scores]")
 TEST_CASE("Order of keys", "[Scores]")
 {
    std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
-    std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
+    std::vector<std::array<double, 3>> probs = {
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
+        { 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
+    };
+    torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
    auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
-    auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
    std::vector<std::string> labels = { "Car", "Boat", "Aeroplane" };
-    platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
+    platform::Scores scores(y_test_tensor, y_pred, 3, labels);
    auto res_json_int = scores.get_confusion_matrix_json(true);
    // Make a temp file and store the json
    std::string filename = "temp.json";