From 009ed037b8680c86fc04afb1b4c97eb2e9f28959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Fri, 10 May 2024 00:51:21 +0200 Subject: [PATCH] Add Scores class and TestsScores --- src/CMakeLists.txt | 2 +- src/main/Models.h | 9 ++- src/main/Scores.cpp | 123 +++++++++++++++++++++++++++++++++++ src/main/Scores.h | 33 ++++++++++ tests/CMakeLists.txt | 6 +- tests/TestScores.cpp | 150 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 316 insertions(+), 7 deletions(-) create mode 100644 src/main/Scores.cpp create mode 100644 src/main/Scores.h create mode 100644 tests/TestScores.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b6d2ead..7d23ba3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -47,7 +47,7 @@ add_executable(b_list commands/b_list.cpp target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}") # b_main -set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp) +set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp) list(TRANSFORM main_sources PREPEND main/) add_executable(b_main commands/b_main.cpp ${main_sources} common/Datasets.cpp common/Dataset.cpp diff --git a/src/main/Models.h b/src/main/Models.h index 4746803..e9b51b7 100644 --- a/src/main/Models.h +++ b/src/main/Models.h @@ -20,10 +20,6 @@ #include namespace platform { class Models { - private: - map> functionRegistry; - static Models* factory; //singleton - Models() {}; public: Models(Models&) = delete; void operator=(const Models&) = delete; @@ -34,7 +30,10 @@ namespace platform { function classFactoryFunction); std::vector getNames(); std::string toString(); - + private: + map> functionRegistry; + static Models* factory; //singleton + Models() {}; }; class Registrar { public: diff --git a/src/main/Scores.cpp b/src/main/Scores.cpp new file mode 100644 index 0000000..9417e54 --- /dev/null +++ b/src/main/Scores.cpp @@ -0,0 +1,123 @@ +#include +#include "Scores.h" +namespace platform { + Scores::Scores(torch::Tensor& y_test, torch::Tensor& y_pred, int num_classes, std::vector labels) : num_classes(num_classes), labels(labels) + { + if (labels.size() == 0) { + for (int i = 0; i < num_classes; i++) { + this->labels.push_back("Class " + std::to_string(i)); + } + } + total = y_test.size(0); + accuracy_value = (y_pred == y_test).sum().item() / total; + confusion_matrix = torch::zeros({ num_classes, num_classes }, torch::kInt32); + for (int i = 0; i < total; i++) { + int actual = y_test[i].item(); + int predicted = y_pred[i].item(); + confusion_matrix[actual][predicted] += 1; + } + } + float Scores::accuracy() + { + return accuracy_value; + } + float Scores::f1_score(int num_class) + { + // Compute f1_score in a one vs rest fashion + auto precision_value = precision(num_class); + auto recall_value = recall(num_class); + return 2 * precision_value * recall_value / (precision_value + recall_value); + } + float Scores::f1_weighted() + { + float f1_weighted = 0; + for (int i = 0; i < num_classes; i++) { + f1_weighted += confusion_matrix[i].sum().item() * f1_score(i); + } + return f1_weighted / total; + } + float Scores::f1_macro() + { + float f1_macro = 0; + for (int i = 0; i < num_classes; i++) { + f1_macro += f1_score(i); + } + return f1_macro / num_classes; + } + float Scores::precision(int num_class) + { + int tp = confusion_matrix[num_class][num_class].item(); + int fp = confusion_matrix.index({ "...", num_class }).sum().item() - tp; + int fn = confusion_matrix[num_class].sum().item() - tp; + return float(tp) / (tp + fp); + } + float Scores::recall(int num_class) + { + int tp = confusion_matrix[num_class][num_class].item(); + int fp = confusion_matrix.index({ "...", num_class }).sum().item() - tp; + int fn = confusion_matrix[num_class].sum().item() - tp; + return float(tp) / (tp + fn); + } + std::string Scores::classification_report_line(std::string label, float precision, float recall, float f1_score, int support) + { + std::stringstream oss; + oss << std::right << std::setw(label_len) << label << " "; + if (precision == 0) { + oss << std::string(dlen, ' ') << " "; + } else { + oss << std::setw(dlen) << std::setprecision(ndec) << std::fixed << precision << " "; + } + if (recall == 0) { + oss << std::string(dlen, ' ') << " "; + } else { + oss << std::setw(dlen) << std::setprecision(ndec) << std::fixed << recall << " "; + } + oss << std::setw(dlen) << std::setprecision(ndec) << std::fixed << f1_score << " " + << std::setw(dlen) << std::right << support << std::endl; + return oss.str(); + } + std::string Scores::classification_report() + { + std::stringstream oss; + oss << "Classification Report" << std::endl; + oss << "=====================" << std::endl; + oss << std::string(label_len, ' ') << " precision recall f1-score support" << std::endl; + oss << std::string(label_len, ' ') << " ========= ========= ========= =========" << std::endl; + for (int i = 0; i < num_classes; i++) { + oss << classification_report_line(labels[i], precision(i), recall(i), f1_score(i), confusion_matrix[i].sum().item()); + } + oss << std::endl; + oss << classification_report_line("accuracy", 0, 0, accuracy(), total); + float precision_avg = 0; + float recall_avg = 0; + float precision_wavg = 0; + float recall_wavg = 0; + for (int i = 0; i < num_classes; i++) { + int support = confusion_matrix[i].sum().item(); + precision_avg += precision(i); + precision_wavg += precision(i) * support; + recall_avg += recall(i); + recall_wavg += recall(i) * support; + } + precision_wavg /= total; + recall_wavg /= total; + precision_avg /= num_classes; + recall_avg /= num_classes; + oss << classification_report_line("macro avg", precision_avg, recall_avg, f1_macro(), total); + oss << classification_report_line("weighted avg", precision_wavg, recall_wavg, f1_weighted(), total); + return oss.str(); + } + json Scores::get_confusion_matrix_json(bool labels_as_keys) + { + json j; + for (int i = 0; i < num_classes; i++) { + auto r_ptr = confusion_matrix[i].data_ptr(); + if (labels_as_keys) { + j[labels[i]] = std::vector(r_ptr, r_ptr + num_classes); + } else { + j[i] = std::vector(r_ptr, r_ptr + num_classes); + } + } + return j; + } +} \ No newline at end of file diff --git a/src/main/Scores.h b/src/main/Scores.h new file mode 100644 index 0000000..9097fd0 --- /dev/null +++ b/src/main/Scores.h @@ -0,0 +1,33 @@ +#ifndef SCORES_H +#define SCORES_H +#include +#include +#include +#include +namespace platform { + using json = nlohmann::json; + class Scores { + public: + Scores(torch::Tensor& y_test, torch::Tensor& y_pred, int num_classes, std::vector labels = {}); + float accuracy(); + float f1_score(int num_class); + float f1_weighted(); + float f1_macro(); + float precision(int num_class); + float recall(int num_class); + torch::Tensor get_confusion_matrix() { return confusion_matrix; } + std::string classification_report(); + json get_confusion_matrix_json(bool labels_as_keys = false); + private: + std::string classification_report_line(std::string label, float precision, float recall, float f1_score, int support); + int num_classes; + float accuracy_value; + int total; + std::vector labels; + torch::Tensor confusion_matrix; // Rows ar actual, columns are predicted + int label_len = 12; + int dlen = 9; + int ndec = 7; + }; +} +#endif \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e76bc54..7074b4a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -11,7 +11,11 @@ if(ENABLE_TESTING) ${PyClassifiers_INCLUDE_DIRS} ${Bayesnet_INCLUDE_DIRS} ) - set(TEST_SOURCES_PLATFORM TestUtils.cpp TestPlatform.cpp TestResult.cpp ${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp) + set(TEST_SOURCES_PLATFORM + TestUtils.cpp TestPlatform.cpp TestResult.cpp TestScores.cpp + ${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp + ${Platform_SOURCE_DIR}/src/main/Scores.cpp + ) add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM}) target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain BayesNet) add_test(NAME ${TEST_PLATFORM} COMMAND ${TEST_PLATFORM}) diff --git a/tests/TestScores.cpp b/tests/TestScores.cpp new file mode 100644 index 0000000..03654f9 --- /dev/null +++ b/tests/TestScores.cpp @@ -0,0 +1,150 @@ +#include +#include +#include +#include +#include "TestUtils.h" +#include "results/Result.h" +#include "common/DotEnv.h" +#include "common/Datasets.h" +#include "common/Paths.h" +#include "main/Scores.h" +#include "config.h" + +auto epsilon = 1e-4; + +void make_test_bin(int TP, int TN, int FP, int FN, std::vector& y_test, std::vector& y_pred) +{ + // TP + for (int i = 0; i < TP; i++) { + y_test.push_back(1); + y_pred.push_back(1); + } + // TN + for (int i = 0; i < TN; i++) { + y_test.push_back(0); + y_pred.push_back(0); + } + // FP + for (int i = 0; i < FP; i++) { + y_test.push_back(0); + y_pred.push_back(1); + } + // FN + for (int i = 0; i < FN; i++) { + y_test.push_back(1); + y_pred.push_back(0); + } +} + +TEST_CASE("TestScores binary", "[Scores]") +{ + std::vector y_test; + std::vector y_pred; + make_test_bin(197, 210, 52, 41, y_test, y_pred); + auto y_test_tensor = torch::tensor(y_test, torch::kInt32); + auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32); + platform::Scores scores(y_test_tensor, y_pred_tensor, 2); + REQUIRE(scores.accuracy() == Catch::Approx(0.814).epsilon(epsilon)); + REQUIRE(scores.f1_score(0) == Catch::Approx(0.818713)); + REQUIRE(scores.f1_score(1) == Catch::Approx(0.809035)); + REQUIRE(scores.precision(0) == Catch::Approx(0.836653)); + REQUIRE(scores.precision(1) == Catch::Approx(0.791165)); + REQUIRE(scores.recall(0) == Catch::Approx(0.801527)); + REQUIRE(scores.recall(1) == Catch::Approx(0.827731)); + REQUIRE(scores.f1_weighted() == Catch::Approx(0.814106)); + REQUIRE(scores.f1_macro() == Catch::Approx(0.813874)); + auto confusion_matrix = scores.get_confusion_matrix(); + REQUIRE(confusion_matrix[0][0].item() == 210); + REQUIRE(confusion_matrix[0][1].item() == 52); + REQUIRE(confusion_matrix[1][0].item() == 41); + REQUIRE(confusion_matrix[1][1].item() == 197); +} +TEST_CASE("TestScores multiclass", "[Scores]") +{ + std::vector y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 }; + std::vector y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 }; + auto y_test_tensor = torch::tensor(y_test, torch::kInt32); + auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32); + platform::Scores scores(y_test_tensor, y_pred_tensor, 3); + REQUIRE(scores.accuracy() == Catch::Approx(0.6).epsilon(epsilon)); + REQUIRE(scores.f1_score(0) == Catch::Approx(0.666667)); + REQUIRE(scores.f1_score(1) == Catch::Approx(0.4)); + REQUIRE(scores.f1_score(2) == Catch::Approx(0.666667)); + REQUIRE(scores.precision(0) == Catch::Approx(0.666667)); + REQUIRE(scores.precision(1) == Catch::Approx(0.25)); + REQUIRE(scores.precision(2) == Catch::Approx(1.0)); + REQUIRE(scores.recall(0) == Catch::Approx(0.666667)); + REQUIRE(scores.recall(1) == Catch::Approx(1.0)); + REQUIRE(scores.recall(2) == Catch::Approx(0.5)); + REQUIRE(scores.f1_weighted() == Catch::Approx(0.64)); + REQUIRE(scores.f1_macro() == Catch::Approx(0.577778)); +} +TEST_CASE("Test Confusion Matrix Values", "[Scores]") +{ + std::vector y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 }; + std::vector y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 }; + auto y_test_tensor = torch::tensor(y_test, torch::kInt32); + auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32); + platform::Scores scores(y_test_tensor, y_pred_tensor, 3); + auto confusion_matrix = scores.get_confusion_matrix(); + REQUIRE(confusion_matrix[0][0].item() == 2); + REQUIRE(confusion_matrix[0][1].item() == 1); + REQUIRE(confusion_matrix[0][2].item() == 0); + REQUIRE(confusion_matrix[1][0].item() == 0); + REQUIRE(confusion_matrix[1][1].item() == 1); + REQUIRE(confusion_matrix[1][2].item() == 0); + REQUIRE(confusion_matrix[2][0].item() == 1); + REQUIRE(confusion_matrix[2][1].item() == 2); + REQUIRE(confusion_matrix[2][2].item() == 3); +} +TEST_CASE("Confusion Matrix JSON", "[Scores]") +{ + std::vector y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 }; + std::vector y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 }; + auto y_test_tensor = torch::tensor(y_test, torch::kInt32); + auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32); + std::vector labels = { "Aeroplane", "Boat", "Car" }; + platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels); + auto res_json_int = scores.get_confusion_matrix_json(); + REQUIRE(res_json_int[0][0] == 2); + REQUIRE(res_json_int[0][1] == 1); + REQUIRE(res_json_int[0][2] == 0); + REQUIRE(res_json_int[1][0] == 0); + REQUIRE(res_json_int[1][1] == 1); + REQUIRE(res_json_int[1][2] == 0); + REQUIRE(res_json_int[2][0] == 1); + REQUIRE(res_json_int[2][1] == 2); + REQUIRE(res_json_int[2][2] == 3); + auto res_json_str = scores.get_confusion_matrix_json(true); + REQUIRE(res_json_str["Aeroplane"][0] == 2); + REQUIRE(res_json_str["Aeroplane"][1] == 1); + REQUIRE(res_json_str["Aeroplane"][2] == 0); + REQUIRE(res_json_str["Boat"][0] == 0); + REQUIRE(res_json_str["Boat"][1] == 1); + REQUIRE(res_json_str["Boat"][2] == 0); + REQUIRE(res_json_str["Car"][0] == 1); + REQUIRE(res_json_str["Car"][1] == 2); + REQUIRE(res_json_str["Car"][2] == 3); +} +TEST_CASE("Classification Report", "[Scores]") +{ + std::vector y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 }; + std::vector y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 }; + auto y_test_tensor = torch::tensor(y_test, torch::kInt32); + auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32); + std::vector labels = { "Aeroplane", "Boat", "Car" }; + platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels); + std::string expected = R"(Classification Report +===================== + precision recall f1-score support + ========= ========= ========= ========= + Aeroplane 0.6666667 0.6666667 0.6666667 3 + Boat 0.2500000 1.0000000 0.4000000 1 + Car 1.0000000 0.5000000 0.6666667 6 + + accuracy 0.6000000 10 + macro avg 0.6388889 0.7222223 0.5777778 10 +weighted avg 0.8250000 0.6000000 0.6400000 10 +)"; + REQUIRE(scores.classification_report() == expected); +} \ No newline at end of file