Add Scores class and TestsScores

This commit is contained in:
2024-05-10 00:51:21 +02:00
parent 6d1b78ada7
commit 009ed037b8
6 changed files with 316 additions and 7 deletions

View File

@@ -47,7 +47,7 @@ add_executable(b_list commands/b_list.cpp
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" ArffFiles mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
# b_main
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp)
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
list(TRANSFORM main_sources PREPEND main/)
add_executable(b_main commands/b_main.cpp ${main_sources}
common/Datasets.cpp common/Dataset.cpp

View File

@@ -20,10 +20,6 @@
#include <pyclassifiers/RandomForest.h>
namespace platform {
class Models {
private:
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
static Models* factory; //singleton
Models() {};
public:
Models(Models&) = delete;
void operator=(const Models&) = delete;
@@ -34,7 +30,10 @@ namespace platform {
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
std::vector<string> getNames();
std::string toString();
private:
map<std::string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
static Models* factory; //singleton
Models() {};
};
class Registrar {
public:

123
src/main/Scores.cpp Normal file
View File

@@ -0,0 +1,123 @@
#include <sstream>
#include "Scores.h"
namespace platform {
Scores::Scores(torch::Tensor& y_test, torch::Tensor& y_pred, int num_classes, std::vector<std::string> labels) : num_classes(num_classes), labels(labels)
{
if (labels.size() == 0) {
for (int i = 0; i < num_classes; i++) {
this->labels.push_back("Class " + std::to_string(i));
}
}
total = y_test.size(0);
accuracy_value = (y_pred == y_test).sum().item<float>() / total;
confusion_matrix = torch::zeros({ num_classes, num_classes }, torch::kInt32);
for (int i = 0; i < total; i++) {
int actual = y_test[i].item<int>();
int predicted = y_pred[i].item<int>();
confusion_matrix[actual][predicted] += 1;
}
}
float Scores::accuracy()
{
return accuracy_value;
}
float Scores::f1_score(int num_class)
{
// Compute f1_score in a one vs rest fashion
auto precision_value = precision(num_class);
auto recall_value = recall(num_class);
return 2 * precision_value * recall_value / (precision_value + recall_value);
}
float Scores::f1_weighted()
{
float f1_weighted = 0;
for (int i = 0; i < num_classes; i++) {
f1_weighted += confusion_matrix[i].sum().item<int>() * f1_score(i);
}
return f1_weighted / total;
}
float Scores::f1_macro()
{
float f1_macro = 0;
for (int i = 0; i < num_classes; i++) {
f1_macro += f1_score(i);
}
return f1_macro / num_classes;
}
float Scores::precision(int num_class)
{
int tp = confusion_matrix[num_class][num_class].item<int>();
int fp = confusion_matrix.index({ "...", num_class }).sum().item<int>() - tp;
int fn = confusion_matrix[num_class].sum().item<int>() - tp;
return float(tp) / (tp + fp);
}
float Scores::recall(int num_class)
{
int tp = confusion_matrix[num_class][num_class].item<int>();
int fp = confusion_matrix.index({ "...", num_class }).sum().item<int>() - tp;
int fn = confusion_matrix[num_class].sum().item<int>() - tp;
return float(tp) / (tp + fn);
}
std::string Scores::classification_report_line(std::string label, float precision, float recall, float f1_score, int support)
{
std::stringstream oss;
oss << std::right << std::setw(label_len) << label << " ";
if (precision == 0) {
oss << std::string(dlen, ' ') << " ";
} else {
oss << std::setw(dlen) << std::setprecision(ndec) << std::fixed << precision << " ";
}
if (recall == 0) {
oss << std::string(dlen, ' ') << " ";
} else {
oss << std::setw(dlen) << std::setprecision(ndec) << std::fixed << recall << " ";
}
oss << std::setw(dlen) << std::setprecision(ndec) << std::fixed << f1_score << " "
<< std::setw(dlen) << std::right << support << std::endl;
return oss.str();
}
std::string Scores::classification_report()
{
std::stringstream oss;
oss << "Classification Report" << std::endl;
oss << "=====================" << std::endl;
oss << std::string(label_len, ' ') << " precision recall f1-score support" << std::endl;
oss << std::string(label_len, ' ') << " ========= ========= ========= =========" << std::endl;
for (int i = 0; i < num_classes; i++) {
oss << classification_report_line(labels[i], precision(i), recall(i), f1_score(i), confusion_matrix[i].sum().item<int>());
}
oss << std::endl;
oss << classification_report_line("accuracy", 0, 0, accuracy(), total);
float precision_avg = 0;
float recall_avg = 0;
float precision_wavg = 0;
float recall_wavg = 0;
for (int i = 0; i < num_classes; i++) {
int support = confusion_matrix[i].sum().item<int>();
precision_avg += precision(i);
precision_wavg += precision(i) * support;
recall_avg += recall(i);
recall_wavg += recall(i) * support;
}
precision_wavg /= total;
recall_wavg /= total;
precision_avg /= num_classes;
recall_avg /= num_classes;
oss << classification_report_line("macro avg", precision_avg, recall_avg, f1_macro(), total);
oss << classification_report_line("weighted avg", precision_wavg, recall_wavg, f1_weighted(), total);
return oss.str();
}
json Scores::get_confusion_matrix_json(bool labels_as_keys)
{
json j;
for (int i = 0; i < num_classes; i++) {
auto r_ptr = confusion_matrix[i].data_ptr<int>();
if (labels_as_keys) {
j[labels[i]] = std::vector<int>(r_ptr, r_ptr + num_classes);
} else {
j[i] = std::vector<int>(r_ptr, r_ptr + num_classes);
}
}
return j;
}
}

33
src/main/Scores.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef SCORES_H
#define SCORES_H
#include <vector>
#include <string>
#include <torch/torch.h>
#include <nlohmann/json.hpp>
namespace platform {
using json = nlohmann::json;
class Scores {
public:
Scores(torch::Tensor& y_test, torch::Tensor& y_pred, int num_classes, std::vector<std::string> labels = {});
float accuracy();
float f1_score(int num_class);
float f1_weighted();
float f1_macro();
float precision(int num_class);
float recall(int num_class);
torch::Tensor get_confusion_matrix() { return confusion_matrix; }
std::string classification_report();
json get_confusion_matrix_json(bool labels_as_keys = false);
private:
std::string classification_report_line(std::string label, float precision, float recall, float f1_score, int support);
int num_classes;
float accuracy_value;
int total;
std::vector<std::string> labels;
torch::Tensor confusion_matrix; // Rows ar actual, columns are predicted
int label_len = 12;
int dlen = 9;
int ndec = 7;
};
}
#endif

View File

@@ -11,7 +11,11 @@ if(ENABLE_TESTING)
${PyClassifiers_INCLUDE_DIRS}
${Bayesnet_INCLUDE_DIRS}
)
set(TEST_SOURCES_PLATFORM TestUtils.cpp TestPlatform.cpp TestResult.cpp ${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp)
set(TEST_SOURCES_PLATFORM
TestUtils.cpp TestPlatform.cpp TestResult.cpp TestScores.cpp
${Platform_SOURCE_DIR}/src/common/Datasets.cpp ${Platform_SOURCE_DIR}/src/common/Dataset.cpp
${Platform_SOURCE_DIR}/src/main/Scores.cpp
)
add_executable(${TEST_PLATFORM} ${TEST_SOURCES_PLATFORM})
target_link_libraries(${TEST_PLATFORM} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain BayesNet)
add_test(NAME ${TEST_PLATFORM} COMMAND ${TEST_PLATFORM})

150
tests/TestScores.cpp Normal file
View File

@@ -0,0 +1,150 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <vector>
#include <string>
#include "TestUtils.h"
#include "results/Result.h"
#include "common/DotEnv.h"
#include "common/Datasets.h"
#include "common/Paths.h"
#include "main/Scores.h"
#include "config.h"
auto epsilon = 1e-4;
void make_test_bin(int TP, int TN, int FP, int FN, std::vector<int>& y_test, std::vector<int>& y_pred)
{
// TP
for (int i = 0; i < TP; i++) {
y_test.push_back(1);
y_pred.push_back(1);
}
// TN
for (int i = 0; i < TN; i++) {
y_test.push_back(0);
y_pred.push_back(0);
}
// FP
for (int i = 0; i < FP; i++) {
y_test.push_back(0);
y_pred.push_back(1);
}
// FN
for (int i = 0; i < FN; i++) {
y_test.push_back(1);
y_pred.push_back(0);
}
}
TEST_CASE("TestScores binary", "[Scores]")
{
std::vector<int> y_test;
std::vector<int> y_pred;
make_test_bin(197, 210, 52, 41, y_test, y_pred);
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
platform::Scores scores(y_test_tensor, y_pred_tensor, 2);
REQUIRE(scores.accuracy() == Catch::Approx(0.814).epsilon(epsilon));
REQUIRE(scores.f1_score(0) == Catch::Approx(0.818713));
REQUIRE(scores.f1_score(1) == Catch::Approx(0.809035));
REQUIRE(scores.precision(0) == Catch::Approx(0.836653));
REQUIRE(scores.precision(1) == Catch::Approx(0.791165));
REQUIRE(scores.recall(0) == Catch::Approx(0.801527));
REQUIRE(scores.recall(1) == Catch::Approx(0.827731));
REQUIRE(scores.f1_weighted() == Catch::Approx(0.814106));
REQUIRE(scores.f1_macro() == Catch::Approx(0.813874));
auto confusion_matrix = scores.get_confusion_matrix();
REQUIRE(confusion_matrix[0][0].item<int>() == 210);
REQUIRE(confusion_matrix[0][1].item<int>() == 52);
REQUIRE(confusion_matrix[1][0].item<int>() == 41);
REQUIRE(confusion_matrix[1][1].item<int>() == 197);
}
TEST_CASE("TestScores multiclass", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
platform::Scores scores(y_test_tensor, y_pred_tensor, 3);
REQUIRE(scores.accuracy() == Catch::Approx(0.6).epsilon(epsilon));
REQUIRE(scores.f1_score(0) == Catch::Approx(0.666667));
REQUIRE(scores.f1_score(1) == Catch::Approx(0.4));
REQUIRE(scores.f1_score(2) == Catch::Approx(0.666667));
REQUIRE(scores.precision(0) == Catch::Approx(0.666667));
REQUIRE(scores.precision(1) == Catch::Approx(0.25));
REQUIRE(scores.precision(2) == Catch::Approx(1.0));
REQUIRE(scores.recall(0) == Catch::Approx(0.666667));
REQUIRE(scores.recall(1) == Catch::Approx(1.0));
REQUIRE(scores.recall(2) == Catch::Approx(0.5));
REQUIRE(scores.f1_weighted() == Catch::Approx(0.64));
REQUIRE(scores.f1_macro() == Catch::Approx(0.577778));
}
TEST_CASE("Test Confusion Matrix Values", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
platform::Scores scores(y_test_tensor, y_pred_tensor, 3);
auto confusion_matrix = scores.get_confusion_matrix();
REQUIRE(confusion_matrix[0][0].item<int>() == 2);
REQUIRE(confusion_matrix[0][1].item<int>() == 1);
REQUIRE(confusion_matrix[0][2].item<int>() == 0);
REQUIRE(confusion_matrix[1][0].item<int>() == 0);
REQUIRE(confusion_matrix[1][1].item<int>() == 1);
REQUIRE(confusion_matrix[1][2].item<int>() == 0);
REQUIRE(confusion_matrix[2][0].item<int>() == 1);
REQUIRE(confusion_matrix[2][1].item<int>() == 2);
REQUIRE(confusion_matrix[2][2].item<int>() == 3);
}
TEST_CASE("Confusion Matrix JSON", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
std::vector<std::string> labels = { "Aeroplane", "Boat", "Car" };
platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
auto res_json_int = scores.get_confusion_matrix_json();
REQUIRE(res_json_int[0][0] == 2);
REQUIRE(res_json_int[0][1] == 1);
REQUIRE(res_json_int[0][2] == 0);
REQUIRE(res_json_int[1][0] == 0);
REQUIRE(res_json_int[1][1] == 1);
REQUIRE(res_json_int[1][2] == 0);
REQUIRE(res_json_int[2][0] == 1);
REQUIRE(res_json_int[2][1] == 2);
REQUIRE(res_json_int[2][2] == 3);
auto res_json_str = scores.get_confusion_matrix_json(true);
REQUIRE(res_json_str["Aeroplane"][0] == 2);
REQUIRE(res_json_str["Aeroplane"][1] == 1);
REQUIRE(res_json_str["Aeroplane"][2] == 0);
REQUIRE(res_json_str["Boat"][0] == 0);
REQUIRE(res_json_str["Boat"][1] == 1);
REQUIRE(res_json_str["Boat"][2] == 0);
REQUIRE(res_json_str["Car"][0] == 1);
REQUIRE(res_json_str["Car"][1] == 2);
REQUIRE(res_json_str["Car"][2] == 3);
}
TEST_CASE("Classification Report", "[Scores]")
{
std::vector<int> y_test = { 0, 2, 2, 2, 2, 0, 1, 2, 0, 2 };
std::vector<int> y_pred = { 0, 1, 2, 2, 1, 1, 1, 0, 0, 2 };
auto y_test_tensor = torch::tensor(y_test, torch::kInt32);
auto y_pred_tensor = torch::tensor(y_pred, torch::kInt32);
std::vector<std::string> labels = { "Aeroplane", "Boat", "Car" };
platform::Scores scores(y_test_tensor, y_pred_tensor, 3, labels);
std::string expected = R"(Classification Report
=====================
precision recall f1-score support
========= ========= ========= =========
Aeroplane 0.6666667 0.6666667 0.6666667 3
Boat 0.2500000 1.0000000 0.4000000 1
Car 1.0000000 0.5000000 0.6666667 6
accuracy 0.6000000 10
macro avg 0.6388889 0.7222223 0.5777778 10
weighted avg 0.8250000 0.6000000 0.6400000 10
)";
REQUIRE(scores.classification_report() == expected);
}