Complete predict & predict_proba with voting & probabilities

This commit is contained in:
Ricardo Montañana Gómez 2024-02-23 23:11:14 +01:00
parent 52abd2d670
commit 8477698d8d
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
5 changed files with 161 additions and 283 deletions

View File

@ -51,32 +51,6 @@ namespace bayesnet {
result /= sum;
return result;
}
std::vector<std::vector<double>> Ensemble::voting(std::vector<std::vector<int>>& votes)
{
// Convert n_models x m matrix to a m x n_class_states matrix
std::vector<std::vector<double>> y_pred_final;
int numClasses = states.at(className).size();
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
// y_pred is m x n_models with the prediction of every model for each sample
std::cout << std::string(80, '*') << std::endl;
for (int i = 0; i < votes.size(); ++i) {
// n_votes store in each index (value of class) the significance added by each model
// i.e. n_votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
std::vector<double> n_votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
n_votes[votes[i][j]] += significanceModels.at(j);
}
for (auto& x : n_votes) {
std::cout << x << " ";
}
std::cout << std::endl;
// To only do one division per result and gain precision
std::transform(n_votes.begin(), n_votes.end(), n_votes.begin(), [sum](double x) { return x / sum; });
y_pred_final.push_back(n_votes);
}
std::cout << std::string(80, '*') << std::endl;
return y_pred_final;
}
std::vector<std::vector<double>> Ensemble::predict_proba(std::vector<std::vector<int>>& X)
{
if (!fitted) {
@ -94,7 +68,6 @@ namespace bayesnet {
std::vector<int> Ensemble::predict(std::vector<std::vector<int>>& X)
{
auto res = predict_proba(X);
std::cout << "res: " << res.size() << ", " << res[0].size() << std::endl;
return compute_arg_max(res);
}
torch::Tensor Ensemble::predict(torch::Tensor& X)
@ -151,6 +124,13 @@ namespace bayesnet {
}
return y_pred;
}
std::vector<std::vector<double>> Ensemble::predict_average_voting(std::vector<std::vector<int>>& X)
{
torch::Tensor Xt = bayesnet::vectorToTensor(X, false);
auto y_pred = predict_average_voting(Xt);
std::vector<std::vector<double>> result = tensorToVectorDouble(y_pred);
return result;
}
torch::Tensor Ensemble::predict_average_voting(torch::Tensor& X)
{
// Build a m x n_models tensor with the predictions of each model
@ -169,21 +149,6 @@ namespace bayesnet {
}
return voting(y_pred);
}
std::vector<std::vector<double>> Ensemble::predict_average_voting(std::vector<std::vector<int>>& X)
{
auto Xt = vectorToTensor(X);
auto y_pred = predict_average_voting(Xt);
auto res = voting(y_pred);
std::vector<std::vector<double>> result;
// Iterate over cols
for (int i = 0; i < res.size(1); ++i) {
auto col_tensor = res.index({ "...", i });
auto col = std::vector<double>(col_tensor.data_ptr<double>(), col_tensor.data_ptr<double>() + res.size(0));
result.push_back(col);
}
return result;
//return tensorToVector<double>(res);
}
float Ensemble::score(torch::Tensor& X, torch::Tensor& y)
{
auto y_pred = predict(X);

View File

@ -36,7 +36,6 @@ namespace bayesnet {
torch::Tensor compute_arg_max(torch::Tensor& X);
std::vector<int> compute_arg_max(std::vector<std::vector<double>>& X);
torch::Tensor voting(torch::Tensor& votes);
std::vector<std::vector<double>> voting(std::vector<std::vector<int>>& votes);
unsigned n_models;
std::vector<std::unique_ptr<Classifier>> models;
std::vector<double> significanceModels;

View File

@ -10,28 +10,39 @@ namespace bayesnet {
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
return indices;
}
template<typename T>
std::vector<std::vector<T>> tensorToVector(torch::Tensor& dtensor)
std::vector<std::vector<int>> tensorToVector(torch::Tensor& dtensor)
{
// convert mxn tensor to nxm std::vector
std::vector<std::vector<T>> result;
std::vector<std::vector<int>> result;
// Iterate over cols
for (int i = 0; i < dtensor.size(1); ++i) {
auto col_tensor = dtensor.index({ "...", i });
auto col = std::vector<T>(col_tensor.data_ptr<T>(), col_tensor.data_ptr<T>() + dtensor.size(0));
auto col = std::vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + dtensor.size(0));
result.push_back(col);
}
return result;
}
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector)
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor)
{
// convert nxm std::vector to mxn tensor
long int m = vector[0].size();
long int n = vector.size();
// convert mxn tensor to mxn std::vector
std::vector<std::vector<double>> result;
// Iterate over cols
for (int i = 0; i < dtensor.size(0); ++i) {
auto col_tensor = dtensor.index({ i, "..." });
auto col = std::vector<double>(col_tensor.data_ptr<float>(), col_tensor.data_ptr<float>() + dtensor.size(1));
result.push_back(col);
}
return result;
}
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector, bool transpose)
{
// convert nxm std::vector to mxn tensor if transpose
long int m = transpose ? vector[0].size() : vector.size();
long int n = transpose ? vector.size() : vector[0].size();
auto tensor = torch::zeros({ m, n }, torch::kInt32);
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
tensor[i][j] = vector[j][i];
tensor[i][j] = transpose ? vector[j][i] : vector[i][j];
}
}
return tensor;

View File

@ -4,8 +4,8 @@
#include <vector>
namespace bayesnet {
std::vector<int> argsort(std::vector<double>& nums);
template<typename T>
std::vector<std::vector<T>> tensorToVector(torch::Tensor& dtensor);
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector);
std::vector<std::vector<int>> tensorToVector(torch::Tensor& dtensor);
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor);
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector, bool transpose = true);
}
#endif //BAYESNET_UTILS_H

View File

@ -21,104 +21,104 @@ TEST_CASE("Library check version", "[BayesNet]")
auto clf = bayesnet::KDB(2);
REQUIRE(clf.getVersion() == "1.0.2");
}
// TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
// {
// map <pair<std::string, std::string>, float> scores = {
// // Diabetes
// {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
// {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
// // Ecoli
// {{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
// {{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f},
// // Glass
// {{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
// {{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f},
// // Iris
// {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
// {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f}
// };
TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
{
map <pair<std::string, std::string>, float> scores = {
// Diabetes
{{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
{{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
// Ecoli
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
{{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f},
// Glass
{{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
{{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f},
// Iris
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
{{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f}
};
// std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
// auto raw = RawDatasets(file_name, false);
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
auto raw = RawDatasets(file_name, false);
// SECTION("Test TAN classifier (" + file_name + ")")
// {
// auto clf = bayesnet::TAN();
// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
// auto score = clf.score(raw.Xv, raw.yv);
// //scores[{file_name, "TAN"}] = score;
// REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon));
// }
// SECTION("Test TANLd classifier (" + file_name + ")")
// {
// auto clf = bayesnet::TANLd();
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// auto score = clf.score(raw.Xt, raw.yt);
// //scores[{file_name, "TANLd"}] = score;
// REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon));
// }
// SECTION("Test KDB classifier (" + file_name + ")")
// {
// auto clf = bayesnet::KDB(2);
// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
// auto score = clf.score(raw.Xv, raw.yv);
// //scores[{file_name, "KDB"}] = score;
// REQUIRE(score == Catch::Approx(scores[{file_name, "KDB"
// }]).epsilon(raw.epsilon));
// }
// SECTION("Test KDBLd classifier (" + file_name + ")")
// {
// auto clf = bayesnet::KDBLd(2);
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// auto score = clf.score(raw.Xt, raw.yt);
// //scores[{file_name, "KDBLd"}] = score;
// REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd"
// }]).epsilon(raw.epsilon));
// }
// SECTION("Test SPODE classifier (" + file_name + ")")
// {
// auto clf = bayesnet::SPODE(1);
// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
// auto score = clf.score(raw.Xv, raw.yv);
// // scores[{file_name, "SPODE"}] = score;
// REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon));
// }
// SECTION("Test SPODELd classifier (" + file_name + ")")
// {
// auto clf = bayesnet::SPODELd(1);
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// auto score = clf.score(raw.Xt, raw.yt);
// // scores[{file_name, "SPODELd"}] = score;
// REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon));
// }
// SECTION("Test AODE classifier (" + file_name + ")")
// {
// auto clf = bayesnet::AODE();
// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
// auto score = clf.score(raw.Xv, raw.yv);
// // scores[{file_name, "AODE"}] = score;
// REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon));
// }
// SECTION("Test AODELd classifier (" + file_name + ")")
// {
// auto clf = bayesnet::AODELd();
// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
// auto score = clf.score(raw.Xt, raw.yt);
// // scores[{file_name, "AODELd"}] = score;
// REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon));
// }
// SECTION("Test BoostAODE classifier (" + file_name + ")")
// {
// auto clf = bayesnet::BoostAODE();
// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
// auto score = clf.score(raw.Xv, raw.yv);
// // scores[{file_name, "BoostAODE"}] = score;
// REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon));
// }
// // for (auto scores : scores) {
// // std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, ";
// // }
// }
SECTION("Test TAN classifier (" + file_name + ")")
{
auto clf = bayesnet::TAN();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
//scores[{file_name, "TAN"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon));
}
SECTION("Test TANLd classifier (" + file_name + ")")
{
auto clf = bayesnet::TANLd();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
//scores[{file_name, "TANLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon));
}
SECTION("Test KDB classifier (" + file_name + ")")
{
auto clf = bayesnet::KDB(2);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
//scores[{file_name, "KDB"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDB"
}]).epsilon(raw.epsilon));
}
SECTION("Test KDBLd classifier (" + file_name + ")")
{
auto clf = bayesnet::KDBLd(2);
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
//scores[{file_name, "KDBLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd"
}]).epsilon(raw.epsilon));
}
SECTION("Test SPODE classifier (" + file_name + ")")
{
auto clf = bayesnet::SPODE(1);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
// scores[{file_name, "SPODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon));
}
SECTION("Test SPODELd classifier (" + file_name + ")")
{
auto clf = bayesnet::SPODELd(1);
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
// scores[{file_name, "SPODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon));
}
SECTION("Test AODE classifier (" + file_name + ")")
{
auto clf = bayesnet::AODE();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
// scores[{file_name, "AODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon));
}
SECTION("Test AODELd classifier (" + file_name + ")")
{
auto clf = bayesnet::AODELd();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
// scores[{file_name, "AODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon));
}
SECTION("Test BoostAODE classifier (" + file_name + ")")
{
auto clf = bayesnet::BoostAODE(true);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
// scores[{file_name, "BoostAODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon));
}
// for (auto scores : scores) {
// std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, ";
// }
}
TEST_CASE("Models features", "[BayesNet]")
{
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
@ -158,35 +158,31 @@ TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]")
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
// TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]")
// {
// auto raw = RawDatasets("diabetes", true);
// auto clf = bayesnet::BoostAODE();
// clf.setHyperparameters({
// {"ascending",true},
// {"convergence", true},
// {"repeatSparent",true},
// {"select_features","CFS"},
// });
// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
// REQUIRE(clf.getNumberOfNodes() == 72);
// REQUIRE(clf.getNumberOfEdges() == 120);
// REQUIRE(clf.getNotes().size() == 3);
// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
// REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8");
// REQUIRE(clf.getNotes()[2] == "Number of models: 8");
// auto score = clf.score(raw.Xv, raw.yv);
// auto scoret = clf.score(raw.Xt, raw.yt);
// REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon));
// REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon));
// }
TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]")
{
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostAODE(true);
clf.setHyperparameters({
{"ascending",true},
{"convergence", true},
{"repeatSparent",true},
{"select_features","CFS"},
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 72);
REQUIRE(clf.getNumberOfEdges() == 120);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8");
REQUIRE(clf.getNotes()[2] == "Number of models: 8");
auto score = clf.score(raw.Xv, raw.yv);
auto scoret = clf.score(raw.Xt, raw.yt);
REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon));
REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon));
}
TEST_CASE("Model predict_proba", "[BayesNet]")
{
// std::string model = GENERATE("TAN", "SPODE", "BoostAODEprobabilities", "BoostAODEvoting");
std::string model = GENERATE("TAN", "SPODE");
std::cout << string(100, '*') << std::endl;
std::cout << "************************************* CHANGE MODEL GENERATE ****************************************" << std::endl;
std::cout << string(100, '*') << std::endl;
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting");
auto res_prob_tan = std::vector<std::vector<double>>({
{ 0.00375671, 0.994457, 0.00178621 },
{ 0.00137462, 0.992734, 0.00589123 },
@ -220,7 +216,18 @@ TEST_CASE("Model predict_proba", "[BayesNet]")
{0.0204803, 0.844276, 0.135244},
{0.00576313, 0.961665, 0.0325716},
});
std::map<std::string, std::vector<std::vector<double>>> res_prob = { {"TAN", res_prob_tan}, {"SPODE", res_prob_spode} , {"BoostAODEproba", res_prob_baode }, {"BoostAODEvoting", res_prob_baode } };
auto res_prob_voting = std::vector<std::vector<double>>({
{0, 1, 0},
{0, 1, 0},
{0, 1, 0},
{0, 1, 0},
{0, 1, 0},
{0, 0.447909, 0.552091},
{0, 0.811482, 0.188517},
{0, 1, 0},
{0, 1, 0}
});
std::map<std::string, std::vector<std::vector<double>>> res_prob = { {"TAN", res_prob_tan}, {"SPODE", res_prob_spode} , {"BoostAODEproba", res_prob_baode }, {"BoostAODEvoting", res_prob_voting } };
std::map<std::string, bayesnet::BaseClassifier*> models = { {"TAN", new bayesnet::TAN()}, {"SPODE", new bayesnet::SPODE(0)}, {"BoostAODEproba", new bayesnet::BoostAODE(false)}, {"BoostAODEvoting", new bayesnet::BoostAODE(true)} };
int init_index = 78;
auto raw = RawDatasets("iris", true);
@ -257,107 +264,3 @@ TEST_CASE("Model predict_proba", "[BayesNet]")
delete clf;
}
}
TEST_CASE("BoostAODE predict_proba proba", "[BayesNet]")
{
auto res_prob = std::vector<std::vector<double>>({
{0.00803291, 0.9676, 0.0243672},
{0.00398714, 0.945126, 0.050887},
{0.00398714, 0.945126, 0.050887},
{0.00398714, 0.945126, 0.050887},
{0.00189227, 0.859575, 0.138533},
{0.0118341, 0.442149, 0.546017},
{0.0216135, 0.785781, 0.192605},
{0.0204803, 0.844276, 0.135244},
{0.00576313, 0.961665, 0.0325716},
});
int init_index = 78;
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::BoostAODE(false);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto y_pred_proba = clf.predict_proba(raw.Xv);
auto y_pred = clf.predict(raw.Xv);
auto yt_pred = clf.predict(raw.Xt);
auto yt_pred_proba = clf.predict_proba(raw.Xt);
std::cout << "yt_pred_proba proba sizes " << yt_pred_proba.sizes() << std::endl;
REQUIRE(y_pred.size() == yt_pred.size(0));
REQUIRE(y_pred.size() == y_pred_proba.size());
REQUIRE(y_pred.size() == yt_pred_proba.size(0));
REQUIRE(y_pred.size() == raw.yv.size());
REQUIRE(y_pred_proba[0].size() == 3);
REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size());
for (int i = 0; i < y_pred_proba.size(); ++i) {
// Check predict is coherent with predict_proba
auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end());
int predictedClass = distance(y_pred_proba[i].begin(), maxElem);
REQUIRE(predictedClass == y_pred[i]);
REQUIRE(yt_pred_proba[i].argmax().item<int>() == y_pred[i]);
}
// Check predict_proba values for vectors and tensors
for (int i = 0; i < res_prob.size(); i++) {
REQUIRE(y_pred[i] == yt_pred[i].item<int>());
for (int j = 0; j < 3; j++) {
REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon));
REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item<double>()).epsilon(raw.epsilon));
}
}
// for (int i = 0; i < res_prob.size(); i++) {
// for (int j = 0; j < 3; j++) {
// std::cout << y_pred_proba[i + init_index][j] << " ";
// }
// std::cout << std::endl;
// }
}
TEST_CASE("BoostAODE predict_proba voting", "[BayesNet]")
{
auto res_prob = std::vector<std::vector<double>>({
{0.00803291, 0.9676, 0.0243672},
{0.00398714, 0.945126, 0.050887},
{0.00398714, 0.945126, 0.050887},
{0.00398714, 0.945126, 0.050887},
{0.00189227, 0.859575, 0.138533},
{0.0118341, 0.442149, 0.546017},
{0.0216135, 0.785781, 0.192605},
{0.0204803, 0.844276, 0.135244},
{0.00576313, 0.961665, 0.0325716},
});
int init_index = 78;
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::BoostAODE(true);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto y_pred_proba = clf.predict_proba(raw.Xv);
auto y_pred = clf.predict(raw.Xv);
auto yt_pred = clf.predict(raw.Xt);
auto yt_pred_proba = clf.predict_proba(raw.Xt);
std::cout << "yt_pred_proba proba sizes " << yt_pred_proba.sizes() << std::endl;
REQUIRE(y_pred.size() == yt_pred.size(0));
REQUIRE(y_pred.size() == y_pred_proba.size());
REQUIRE(y_pred.size() == yt_pred_proba.size(0));
REQUIRE(y_pred.size() == raw.yv.size());
REQUIRE(y_pred_proba[0].size() == 3);
REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size());
for (int i = 0; i < y_pred_proba.size(); ++i) {
auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end());
int predictedClass = distance(y_pred_proba[i].begin(), maxElem);
REQUIRE(predictedClass == y_pred[i]);
// Check predict is coherent with predict_proba
for (int k = 0; k < yt_pred_proba[i].size(0); k++) {
std::cout << yt_pred_proba[i][k].item<double>() << " ";
}
std::cout << "-> " << y_pred[i] << std::endl;
REQUIRE(yt_pred_proba[i].argmax().item<int>() == y_pred[i]);
}
// Check predict_proba values for vectors and tensors
for (int i = 0; i < res_prob.size(); i++) {
REQUIRE(y_pred[i] == yt_pred[i].item<int>());
for (int j = 0; j < 3; j++) {
REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon));
REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item<double>()).epsilon(raw.epsilon));
}
}
// for (int i = 0; i < res_prob.size(); i++) {
// for (int j = 0; j < 3; j++) {
// std::cout << y_pred_proba[i + init_index][j] << " ";
// }
// std::cout << std::endl;
// }
}