Add DecisionTree with tests

This commit is contained in:
2025-06-17 13:48:11 +02:00
parent 8c413a1eb0
commit 023d5613b4
16 changed files with 1272 additions and 81 deletions

View File

@@ -14,38 +14,40 @@
using json = nlohmann::ordered_json;
auto epsilon = 1e-4;
void make_test_bin(int TP, int TN, int FP, int FN, std::vector<int>& y_test, std::vector<int>& y_pred)
void make_test_bin(int TP, int TN, int FP, int FN, std::vector<int>& y_test, torch::Tensor& y_pred)
{
// TP
std::vector<std::array<double, 2>> probs;
// TP: true positive (label 1, predicted 1)
for (int i = 0; i < TP; i++) {
y_test.push_back(1);
y_pred.push_back(1);
probs.push_back({ 0.0, 1.0 }); // P(class 0)=0, P(class 1)=1
}
// TN
// TN: true negative (label 0, predicted 0)
for (int i = 0; i < TN; i++) {
y_test.push_back(0);
y_pred.push_back(0);
probs.push_back({ 1.0, 0.0 }); // P(class 0)=1, P(class 1)=0
}
// FP
// FP: false positive (label 0, predicted 1)
for (int i = 0; i < FP; i++) {
y_test.push_back(0);
y_pred.push_back(1);
probs.push_back({ 0.0, 1.0 }); // P(class 0)=0, P(class 1)=1
}
// FN
// FN: false negative (label 1, predicted 0)
for (int i = 0; i < FN; i++) {
y_test.push_back(1);
y_pred.push_back(0);
probs.push_back({ 1.0, 0.0 }); // P(class 0)=1, P(class 1)=0
}
// Convert to torch::Tensor of double, shape [N,2]
y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 2 }, torch::kFloat64).clone();
}
TEST_CASE("Scores binary", "[Scores]")
{
std::vector<int> y_test;
std::vector<int> y_pred;
torch::Tensor y_pred;
make_test_bin(197, 210, 52, 41, y_test, y_pred);
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
platform::Scores scores(y_test_tensor, y_pred_tensor, 2);
platform::Scores scores(y_test_tensor, y_pred, 2);
REQUIRE(scores.accuracy() == Catch::Approx(0.814).epsilon(epsilon));
REQUIRE(scores.f1_score(0) == Catch::Approx(0.818713));
REQUIRE(scores.f1_score(1) == Catch::Approx(0.809035));
@@ -64,10 +66,23 @@ TEST_CASE("Scores binary", "[Scores]")
TEST_CASE("Scores multiclass", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
// Refactor y_pred to a tensor of shape [10, 3] with probabilities
std::vector<std::array<double, 3>> probs = {
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
};
torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
// Convert y_test to a tensor
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
platform::Scores scores(y_test_tensor, y_pred_tensor, 3);
platform::Scores scores(y_test_tensor, y_pred, 3);
REQUIRE(scores.accuracy() == Catch::Approx(0.6).epsilon(epsilon));
REQUIRE(scores.f1_score(0) == Catch::Approx(0.666667));
REQUIRE(scores.f1_score(1) == Catch::Approx(0.4));
@@ -84,10 +99,21 @@ TEST_CASE("Scores multiclass", "[Scores]")
TEST_CASE("Test Confusion Matrix Values", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
std::vector<std::array<double, 3>> probs = {
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
};
torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
platform::Scores scores(y_test_tensor, y_pred_tensor, 3);
platform::Scores scores(y_test_tensor, y_pred, 3);
auto confusion_matrix = scores.get_confusion_matrix();
REQUIRE(confusion_matrix[0][0].item<int>() == 2);
REQUIRE(confusion_matrix[0][1].item<int>() == 1);
@@ -102,11 +128,22 @@ TEST_CASE("Test Confusion Matrix Values", "[Scores]")
TEST_CASE("Confusion Matrix JSON", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
std::vector<std::array<double, 3>> probs = {
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
};
torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
std::vector<std::string> labels = { "Aeroplane", "Boat", "Car" };
platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
platform::Scores scores(y_test_tensor, y_pred, 3, labels);
auto res_json_int = scores.get_confusion_matrix_json();
REQUIRE(res_json_int[0][0] == 2);
REQUIRE(res_json_int[0][1] == 1);
@@ -131,11 +168,22 @@ TEST_CASE("Confusion Matrix JSON", "[Scores]")
TEST_CASE("Classification Report", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
std::vector<std::array<double, 3>> probs = {
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
};
torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
std::vector<std::string> labels = { "Aeroplane", "Boat", "Car" };
platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
platform::Scores scores(y_test_tensor, y_pred, 3, labels);
auto report = scores.classification_report(Colors::BLUE(), "train");
auto json_matrix = scores.get_confusion_matrix_json(true);
platform::Scores scores2(json_matrix);
@@ -144,11 +192,22 @@ TEST_CASE("Classification Report", "[Scores]")
TEST_CASE("JSON constructor", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
std::vector<std::array<double, 3>> probs = {
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
};
torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
std::vector<std::string> labels = { "Car", "Boat", "Aeroplane" };
platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
platform::Scores scores(y_test_tensor, y_pred, 3, labels);
auto res_json_int = scores.get_confusion_matrix_json();
platform::Scores scores2(res_json_int);
REQUIRE(scores.accuracy() == scores2.accuracy());
@@ -173,17 +232,14 @@ TEST_CASE("JSON constructor", "[Scores]")
TEST_CASE("Aggregate", "[Scores]")
{
std::vector<int> y_test;
std::vector<int> y_pred;
torch::Tensor y_pred;
make_test_bin(197, 210, 52, 41, y_test, y_pred);
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
platform::Scores scores(y_test_tensor, y_pred_tensor, 2);
platform::Scores scores(y_test_tensor, y_pred, 2);
y_test.clear();
y_pred.clear();
make_test_bin(227, 187, 39, 47, y_test, y_pred);
auto y_test_tensor2 = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor2 = torch::tensor(y_pred, torch::kInt32);
platform::Scores scores2(y_test_tensor2, y_pred_tensor2, 2);
platform::Scores scores2(y_test_tensor2, y_pred, 2);
scores.aggregate(scores2);
REQUIRE(scores.accuracy() == Catch::Approx(0.821).epsilon(epsilon));
REQUIRE(scores.f1_score(0) == Catch::Approx(0.8160329));
@@ -195,11 +251,9 @@ TEST_CASE("Aggregate", "[Scores]")
REQUIRE(scores.f1_weighted() == Catch::Approx(0.8209856));
REQUIRE(scores.f1_macro() == Catch::Approx(0.8208694));
y_test.clear();
y_pred.clear();
make_test_bin(197 + 227, 210 + 187, 52 + 39, 41 + 47, y_test, y_pred);
y_test_tensor = torch::tensor(y_test, torch::kInt32);
y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
platform::Scores scores3(y_test_tensor, y_pred_tensor, 2);
platform::Scores scores3(y_test_tensor, y_pred, 2);
for (int i = 0; i < 2; ++i) {
REQUIRE(scores3.f1_score(i) == scores.f1_score(i));
REQUIRE(scores3.precision(i) == scores.precision(i));
@@ -212,11 +266,22 @@ TEST_CASE("Aggregate", "[Scores]")
TEST_CASE("Order of keys", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
std::vector<std::array<double, 3>> probs = {
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 0.0, 1.0 }, // P(class 0)=0, P(class 1)=0, P(class 2)=1
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 0.0, 1.0, 0.0 }, // P(class 0)=0, P(class 1)=1, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 1.0, 0.0, 0.0 }, // P(class 0)=1, P(class 1)=0, P(class 2)=0
{ 0.0, 0.0, 1.0 } // P(class 0)=0, P(class 1)=0, P(class 2)=1
};
torch::Tensor y_pred = torch::from_blob(probs.data(), { (long)probs.size(), 3 }, torch::kFloat64).clone();
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
std::vector<std::string> labels = { "Car", "Boat", "Aeroplane" };
platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
platform::Scores scores(y_test_tensor, y_pred, 3, labels);
auto res_json_int = scores.get_confusion_matrix_json(true);
// Make a temp file and store the json
std::string filename = "temp.json";