Add AUC computing in Experiment and store in result
This commit is contained in:
2
lib/json
2
lib/json
Submodule lib/json updated: 8c391e04fe...960b763ecd
@@ -47,7 +47,7 @@ add_executable(b_list commands/b_list.cpp
|
|||||||
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
target_link_libraries(b_list "${PyClassifiers}" "${BayesNet}" mdlp ${Python3_LIBRARIES} "${TORCH_LIBRARIES}" ${LIBTORCH_PYTHON} Boost::python Boost::numpy "${XLSXWRITER_LIB}")
|
||||||
|
|
||||||
# b_main
|
# b_main
|
||||||
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp)
|
set(main_sources Experiment.cpp Models.cpp HyperParameters.cpp Scores.cpp RocAuc.cpp)
|
||||||
list(TRANSFORM main_sources PREPEND main/)
|
list(TRANSFORM main_sources PREPEND main/)
|
||||||
add_executable(b_main commands/b_main.cpp ${main_sources}
|
add_executable(b_main commands/b_main.cpp ${main_sources}
|
||||||
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
|
common/Datasets.cpp common/Dataset.cpp common/Discretization.cpp
|
||||||
|
@@ -3,6 +3,7 @@
|
|||||||
#include "common/Paths.h"
|
#include "common/Paths.h"
|
||||||
#include "Models.h"
|
#include "Models.h"
|
||||||
#include "Scores.h"
|
#include "Scores.h"
|
||||||
|
#include "RocAuc.h"
|
||||||
#include "Experiment.h"
|
#include "Experiment.h"
|
||||||
namespace platform {
|
namespace platform {
|
||||||
using json = nlohmann::ordered_json;
|
using json = nlohmann::ordered_json;
|
||||||
@@ -160,6 +161,8 @@ namespace platform {
|
|||||||
int nResults = nfolds * static_cast<int>(randomSeeds.size());
|
int nResults = nfolds * static_cast<int>(randomSeeds.size());
|
||||||
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
|
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
|
||||||
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
|
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
|
||||||
|
auto auc_test = torch::zeros({ nResults }, torch::kFloat64);
|
||||||
|
auto auc_train = torch::zeros({ nResults }, torch::kFloat64);
|
||||||
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
|
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
|
||||||
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
|
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
|
||||||
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
|
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
|
||||||
@@ -228,10 +231,13 @@ namespace platform {
|
|||||||
//
|
//
|
||||||
// Score train
|
// Score train
|
||||||
//
|
//
|
||||||
|
double auc_train_value = 0;
|
||||||
if (!no_train_score) {
|
if (!no_train_score) {
|
||||||
auto y_predict = clf->predict(X_train);
|
auto roc_auc = RocAuc();
|
||||||
Scores scores(y_train, y_predict, num_classes, labels);
|
auto y_proba_train = clf->predict_proba(X_train);
|
||||||
|
Scores scores(y_train, y_proba_train, num_classes, labels);
|
||||||
accuracy_train_value = scores.accuracy();
|
accuracy_train_value = scores.accuracy();
|
||||||
|
auc_train_value = roc_auc.compute(y_proba_train, y_train);
|
||||||
confusion_matrices_train.push_back(scores.get_confusion_matrix_json(true));
|
confusion_matrices_train.push_back(scores.get_confusion_matrix_json(true));
|
||||||
}
|
}
|
||||||
//
|
//
|
||||||
@@ -240,10 +246,15 @@ namespace platform {
|
|||||||
if (!quiet)
|
if (!quiet)
|
||||||
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
|
showProgress(nfold + 1, getColor(clf->getStatus()), "c");
|
||||||
test_timer.start();
|
test_timer.start();
|
||||||
auto y_predict = clf->predict(X_test);
|
// auto y_predict = clf->predict(X_test);
|
||||||
Scores scores(y_test, y_predict, num_classes, labels);
|
auto y_proba_test = clf->predict_proba(X_test);
|
||||||
|
Scores scores(y_test, y_proba_test, num_classes, labels);
|
||||||
auto accuracy_test_value = scores.accuracy();
|
auto accuracy_test_value = scores.accuracy();
|
||||||
|
auto roc_auc = RocAuc();
|
||||||
|
double auc_test_value = roc_auc.compute(y_proba_test, y_test);
|
||||||
test_time[item] = test_timer.getDuration();
|
test_time[item] = test_timer.getDuration();
|
||||||
|
auc_train[item] = auc_train_value;
|
||||||
|
auc_test[item] = auc_test_value;
|
||||||
accuracy_train[item] = accuracy_train_value;
|
accuracy_train[item] = accuracy_train_value;
|
||||||
accuracy_test[item] = accuracy_test_value;
|
accuracy_test[item] = accuracy_test_value;
|
||||||
confusion_matrices.push_back(scores.get_confusion_matrix_json(true));
|
confusion_matrices.push_back(scores.get_confusion_matrix_json(true));
|
||||||
@@ -252,6 +263,8 @@ namespace platform {
|
|||||||
//
|
//
|
||||||
// Store results and times in std::vector
|
// Store results and times in std::vector
|
||||||
//
|
//
|
||||||
|
partial_result.addAucTrain(auc_train_value);
|
||||||
|
partial_result.addAucTest(auc_test_value);
|
||||||
partial_result.addScoreTrain(accuracy_train_value);
|
partial_result.addScoreTrain(accuracy_train_value);
|
||||||
partial_result.addScoreTest(accuracy_test_value);
|
partial_result.addScoreTest(accuracy_test_value);
|
||||||
partial_result.addTimeTrain(train_time[item].item<double>());
|
partial_result.addTimeTrain(train_time[item].item<double>());
|
||||||
@@ -275,6 +288,8 @@ namespace platform {
|
|||||||
partial_result.setGraph(graphs);
|
partial_result.setGraph(graphs);
|
||||||
partial_result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
|
partial_result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
|
||||||
partial_result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
|
partial_result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
|
||||||
|
partial_result.setAucTest(torch::mean(auc_test).item<double>()).setAucTrain(torch::mean(auc_train).item<double>());
|
||||||
|
partial_result.setAucTestStd(torch::std(auc_test).item<double>()).setAucTrainStd(torch::std(auc_train).item<double>());
|
||||||
partial_result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
partial_result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
|
||||||
partial_result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
|
partial_result.setTestTimeStd(torch::std(test_time).item<double>()).setTrainTimeStd(torch::std(train_time).item<double>());
|
||||||
partial_result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
|
partial_result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
|
||||||
|
@@ -44,6 +44,10 @@ namespace platform {
|
|||||||
PartialResult& setScoreTrainStd(double score_std) { data["score_train_std"] = score_std; return *this; }
|
PartialResult& setScoreTrainStd(double score_std) { data["score_train_std"] = score_std; return *this; }
|
||||||
PartialResult& setScoreTest(double score) { data["score"] = score; return *this; }
|
PartialResult& setScoreTest(double score) { data["score"] = score; return *this; }
|
||||||
PartialResult& setScoreTestStd(double score_std) { data["score_std"] = score_std; return *this; }
|
PartialResult& setScoreTestStd(double score_std) { data["score_std"] = score_std; return *this; }
|
||||||
|
PartialResult& setAucTrain(double score) { data["auc_train"] = score; return *this; }
|
||||||
|
PartialResult& setAucTrainStd(double score_std) { data["auc_train_std"] = score_std; return *this; }
|
||||||
|
PartialResult& setAucTest(double score) { data["auc"] = score; return *this; }
|
||||||
|
PartialResult& setAucTestStd(double score_std) { data["auc_std"] = score_std; return *this; }
|
||||||
PartialResult& setTrainTime(double train_time)
|
PartialResult& setTrainTime(double train_time)
|
||||||
{
|
{
|
||||||
data["train_time"] = train_time;
|
data["train_time"] = train_time;
|
||||||
@@ -71,6 +75,8 @@ namespace platform {
|
|||||||
PartialResult& setNodes(float nodes) { data["nodes"] = nodes; return *this; }
|
PartialResult& setNodes(float nodes) { data["nodes"] = nodes; return *this; }
|
||||||
PartialResult& setLeaves(float leaves) { data["leaves"] = leaves; return *this; }
|
PartialResult& setLeaves(float leaves) { data["leaves"] = leaves; return *this; }
|
||||||
PartialResult& setDepth(float depth) { data["depth"] = depth; return *this; }
|
PartialResult& setDepth(float depth) { data["depth"] = depth; return *this; }
|
||||||
|
PartialResult& addAucTrain(double score) { data["aucs_train"].push_back(score); return *this; }
|
||||||
|
PartialResult& addAucTest(double score) { data["aucs_test"].push_back(score); return *this; }
|
||||||
PartialResult& addScoreTrain(double score) { data["scores_train"].push_back(score); return *this; }
|
PartialResult& addScoreTrain(double score) { data["scores_train"].push_back(score); return *this; }
|
||||||
PartialResult& addScoreTest(double score) { data["scores_test"].push_back(score); return *this; }
|
PartialResult& addScoreTest(double score) { data["scores_test"].push_back(score); return *this; }
|
||||||
PartialResult& addTimeTrain(double time) { data["times_train"].push_back(time); return *this; }
|
PartialResult& addTimeTrain(double time) { data["times_train"].push_back(time); return *this; }
|
||||||
|
84
src/main/RocAuc.cpp
Normal file
84
src/main/RocAuc.cpp
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <numeric>
|
||||||
|
#include <utility>
|
||||||
|
#include "common/Colors.h"
|
||||||
|
#include "RocAuc.h"
|
||||||
|
namespace platform {
|
||||||
|
std::vector<int> tensorToVector(const torch::Tensor& tensor)
|
||||||
|
{
|
||||||
|
// Ensure the tensor is of type kInt32
|
||||||
|
if (tensor.dtype() != torch::kInt32) {
|
||||||
|
throw std::runtime_error("Tensor must be of type kInt32");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure the tensor is contiguous
|
||||||
|
torch::Tensor contig_tensor = tensor.contiguous();
|
||||||
|
|
||||||
|
// Get the number of elements in the tensor
|
||||||
|
auto num_elements = contig_tensor.numel();
|
||||||
|
|
||||||
|
// Get a pointer to the tensor data
|
||||||
|
const int32_t* tensor_data = contig_tensor.data_ptr<int32_t>();
|
||||||
|
|
||||||
|
// Create a std::vector<int> and copy the data
|
||||||
|
std::vector<int> result(tensor_data, tensor_data + num_elements);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
double RocAuc::compute(const torch::Tensor& y_proba, const torch::Tensor& labels)
|
||||||
|
{
|
||||||
|
size_t nClasses = y_proba.size(1);
|
||||||
|
size_t nSamples = y_proba.size(0);
|
||||||
|
assert(nSamples = y_test.size(0));
|
||||||
|
y_test = tensorToVector(labels);
|
||||||
|
std::vector<double> aucScores(nClasses, 0.0);
|
||||||
|
for (size_t classIdx = 0; classIdx < nClasses; ++classIdx) {
|
||||||
|
scoresAndLabels.clear();
|
||||||
|
for (size_t i = 0; i < nSamples; ++i) {
|
||||||
|
scoresAndLabels.emplace_back(y_proba[i][classIdx].item<float>(), y_test[i] == classIdx ? 1 : 0);
|
||||||
|
}
|
||||||
|
aucScores[classIdx] = compute_common(nSamples, classIdx);
|
||||||
|
}
|
||||||
|
return std::accumulate(aucScores.begin(), aucScores.end(), 0.0) / nClasses;
|
||||||
|
}
|
||||||
|
double RocAuc::compute(const std::vector<std::vector<double>>& y_proba, const std::vector<int>& labels)
|
||||||
|
{
|
||||||
|
y_test = labels;
|
||||||
|
size_t nClasses = y_proba[0].size();
|
||||||
|
size_t nSamples = y_proba.size();
|
||||||
|
std::vector<double> aucScores(nClasses, 0.0);
|
||||||
|
for (size_t classIdx = 0; classIdx < nClasses; ++classIdx) {
|
||||||
|
scoresAndLabels.clear();
|
||||||
|
for (size_t i = 0; i < nSamples; ++i) {
|
||||||
|
scoresAndLabels.emplace_back(y_proba[i][classIdx], labels[i] == classIdx ? 1 : 0);
|
||||||
|
}
|
||||||
|
aucScores[classIdx] = compute_common(nSamples, classIdx);
|
||||||
|
}
|
||||||
|
return std::accumulate(aucScores.begin(), aucScores.end(), 0.0) / nClasses;
|
||||||
|
}
|
||||||
|
double RocAuc::compute_common(size_t nSamples, size_t classIdx)
|
||||||
|
{
|
||||||
|
std::sort(scoresAndLabels.begin(), scoresAndLabels.end(), std::greater<>());
|
||||||
|
std::vector<double> tpr, fpr;
|
||||||
|
double tp = 0, fp = 0;
|
||||||
|
double totalPos = std::count(y_test.begin(), y_test.end(), classIdx);
|
||||||
|
double totalNeg = nSamples - totalPos;
|
||||||
|
|
||||||
|
for (const auto& [score, label] : scoresAndLabels) {
|
||||||
|
if (label == 1) {
|
||||||
|
tp += 1;
|
||||||
|
} else {
|
||||||
|
fp += 1;
|
||||||
|
}
|
||||||
|
tpr.push_back(tp / totalPos);
|
||||||
|
fpr.push_back(fp / totalNeg);
|
||||||
|
}
|
||||||
|
double auc = 0.0;
|
||||||
|
for (size_t i = 1; i < tpr.size(); ++i) {
|
||||||
|
auc += 0.5 * (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]);
|
||||||
|
}
|
||||||
|
return auc;
|
||||||
|
}
|
||||||
|
}
|
21
src/main/RocAuc.h
Normal file
21
src/main/RocAuc.h
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#ifndef ROCAUC_H
|
||||||
|
#define ROCAUC_H
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
|
||||||
|
namespace platform {
|
||||||
|
using json = nlohmann::ordered_json;
|
||||||
|
class RocAuc {
|
||||||
|
public:
|
||||||
|
RocAuc() = default;
|
||||||
|
double compute(const std::vector<std::vector<double>>& y_proba, const std::vector<int>& y_test);
|
||||||
|
double compute(const torch::Tensor& y_proba, const torch::Tensor& y_test);
|
||||||
|
private:
|
||||||
|
double compute_common(size_t nSamples, size_t classIdx);
|
||||||
|
std::vector<std::pair<double, int>> scoresAndLabels;
|
||||||
|
std::vector<int> y_test;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@@ -2,12 +2,13 @@
|
|||||||
#include "Scores.h"
|
#include "Scores.h"
|
||||||
#include "common/Colors.h"
|
#include "common/Colors.h"
|
||||||
namespace platform {
|
namespace platform {
|
||||||
Scores::Scores(torch::Tensor& y_test, torch::Tensor& y_pred, int num_classes, std::vector<std::string> labels) : num_classes(num_classes), labels(labels)
|
Scores::Scores(torch::Tensor& y_test, torch::Tensor& y_proba, int num_classes, std::vector<std::string> labels) : num_classes(num_classes), labels(labels)
|
||||||
{
|
{
|
||||||
if (labels.size() == 0) {
|
if (labels.size() == 0) {
|
||||||
init_default_labels();
|
init_default_labels();
|
||||||
}
|
}
|
||||||
total = y_test.size(0);
|
total = y_test.size(0);
|
||||||
|
auto y_pred = y_proba.argmax(1);
|
||||||
accuracy_value = (y_pred == y_test).sum().item<float>() / total;
|
accuracy_value = (y_pred == y_test).sum().item<float>() / total;
|
||||||
init_confusion_matrix();
|
init_confusion_matrix();
|
||||||
for (int i = 0; i < total; i++) {
|
for (int i = 0; i < total; i++) {
|
||||||
|
Reference in New Issue
Block a user