From 8477698d8dc04410b3983c99bc86943c4bbcd255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Fri, 23 Feb 2024 23:11:14 +0100 Subject: [PATCH] Complete predict & predict_proba with voting & probabilities --- src/Ensemble.cc | 49 +----- src/Ensemble.h | 1 - src/bayesnetUtils.cc | 29 +++- src/bayesnetUtils.h | 6 +- tests/TestBayesModels.cc | 359 ++++++++++++++------------------------- 5 files changed, 161 insertions(+), 283 deletions(-) diff --git a/src/Ensemble.cc b/src/Ensemble.cc index cebcb9a..b0b8d9c 100644 --- a/src/Ensemble.cc +++ b/src/Ensemble.cc @@ -51,32 +51,6 @@ namespace bayesnet { result /= sum; return result; } - std::vector> Ensemble::voting(std::vector>& votes) - { - // Convert n_models x m matrix to a m x n_class_states matrix - std::vector> y_pred_final; - int numClasses = states.at(className).size(); - auto sum = std::reduce(significanceModels.begin(), significanceModels.end()); - // y_pred is m x n_models with the prediction of every model for each sample - std::cout << std::string(80, '*') << std::endl; - for (int i = 0; i < votes.size(); ++i) { - // n_votes store in each index (value of class) the significance added by each model - // i.e. n_votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions - std::vector n_votes(numClasses, 0.0); - for (int j = 0; j < n_models; ++j) { - n_votes[votes[i][j]] += significanceModels.at(j); - } - for (auto& x : n_votes) { - std::cout << x << " "; - } - std::cout << std::endl; - // To only do one division per result and gain precision - std::transform(n_votes.begin(), n_votes.end(), n_votes.begin(), [sum](double x) { return x / sum; }); - y_pred_final.push_back(n_votes); - } - std::cout << std::string(80, '*') << std::endl; - return y_pred_final; - } std::vector> Ensemble::predict_proba(std::vector>& X) { if (!fitted) { @@ -94,7 +68,6 @@ namespace bayesnet { std::vector Ensemble::predict(std::vector>& X) { auto res = predict_proba(X); - std::cout << "res: " << res.size() << ", " << res[0].size() << std::endl; return compute_arg_max(res); } torch::Tensor Ensemble::predict(torch::Tensor& X) @@ -151,6 +124,13 @@ namespace bayesnet { } return y_pred; } + std::vector> Ensemble::predict_average_voting(std::vector>& X) + { + torch::Tensor Xt = bayesnet::vectorToTensor(X, false); + auto y_pred = predict_average_voting(Xt); + std::vector> result = tensorToVectorDouble(y_pred); + return result; + } torch::Tensor Ensemble::predict_average_voting(torch::Tensor& X) { // Build a m x n_models tensor with the predictions of each model @@ -169,21 +149,6 @@ namespace bayesnet { } return voting(y_pred); } - std::vector> Ensemble::predict_average_voting(std::vector>& X) - { - auto Xt = vectorToTensor(X); - auto y_pred = predict_average_voting(Xt); - auto res = voting(y_pred); - std::vector> result; - // Iterate over cols - for (int i = 0; i < res.size(1); ++i) { - auto col_tensor = res.index({ "...", i }); - auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + res.size(0)); - result.push_back(col); - } - return result; - //return tensorToVector(res); - } float Ensemble::score(torch::Tensor& X, torch::Tensor& y) { auto y_pred = predict(X); diff --git a/src/Ensemble.h b/src/Ensemble.h index dd14046..cd42cee 100644 --- a/src/Ensemble.h +++ b/src/Ensemble.h @@ -36,7 +36,6 @@ namespace bayesnet { torch::Tensor compute_arg_max(torch::Tensor& X); std::vector compute_arg_max(std::vector>& X); torch::Tensor voting(torch::Tensor& votes); - std::vector> voting(std::vector>& votes); unsigned n_models; std::vector> models; std::vector significanceModels; diff --git a/src/bayesnetUtils.cc b/src/bayesnetUtils.cc index 4b4e3c2..f620983 100644 --- a/src/bayesnetUtils.cc +++ b/src/bayesnetUtils.cc @@ -10,28 +10,39 @@ namespace bayesnet { sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];}); return indices; } - template - std::vector> tensorToVector(torch::Tensor& dtensor) + std::vector> tensorToVector(torch::Tensor& dtensor) { // convert mxn tensor to nxm std::vector - std::vector> result; + std::vector> result; // Iterate over cols for (int i = 0; i < dtensor.size(1); ++i) { auto col_tensor = dtensor.index({ "...", i }); - auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + dtensor.size(0)); + auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + dtensor.size(0)); result.push_back(col); } return result; } - torch::Tensor vectorToTensor(std::vector>& vector) + std::vector> tensorToVectorDouble(torch::Tensor& dtensor) { - // convert nxm std::vector to mxn tensor - long int m = vector[0].size(); - long int n = vector.size(); + // convert mxn tensor to mxn std::vector + std::vector> result; + // Iterate over cols + for (int i = 0; i < dtensor.size(0); ++i) { + auto col_tensor = dtensor.index({ i, "..." }); + auto col = std::vector(col_tensor.data_ptr(), col_tensor.data_ptr() + dtensor.size(1)); + result.push_back(col); + } + return result; + } + torch::Tensor vectorToTensor(std::vector>& vector, bool transpose) + { + // convert nxm std::vector to mxn tensor if transpose + long int m = transpose ? vector[0].size() : vector.size(); + long int n = transpose ? vector.size() : vector[0].size(); auto tensor = torch::zeros({ m, n }, torch::kInt32); for (int i = 0; i < m; ++i) { for (int j = 0; j < n; ++j) { - tensor[i][j] = vector[j][i]; + tensor[i][j] = transpose ? vector[j][i] : vector[i][j]; } } return tensor; diff --git a/src/bayesnetUtils.h b/src/bayesnetUtils.h index 2790d16..0e741be 100644 --- a/src/bayesnetUtils.h +++ b/src/bayesnetUtils.h @@ -4,8 +4,8 @@ #include namespace bayesnet { std::vector argsort(std::vector& nums); - template - std::vector> tensorToVector(torch::Tensor& dtensor); - torch::Tensor vectorToTensor(std::vector>& vector); + std::vector> tensorToVector(torch::Tensor& dtensor); + std::vector> tensorToVectorDouble(torch::Tensor& dtensor); + torch::Tensor vectorToTensor(std::vector>& vector, bool transpose = true); } #endif //BAYESNET_UTILS_H \ No newline at end of file diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc index 3ecf4f3..f8ae718 100644 --- a/tests/TestBayesModels.cc +++ b/tests/TestBayesModels.cc @@ -21,104 +21,104 @@ TEST_CASE("Library check version", "[BayesNet]") auto clf = bayesnet::KDB(2); REQUIRE(clf.getVersion() == "1.0.2"); } -// TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") -// { -// map , float> scores = { -// // Diabetes -// {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, -// {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f}, -// // Ecoli -// {{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857}, -// {{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f}, -// // Glass -// {{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103}, -// {{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f}, -// // Iris -// {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}, -// {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} -// }; +TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]") +{ + map , float> scores = { + // Diabetes + {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, + {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f}, + // Ecoli + {{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857}, + {{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f}, + // Glass + {{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103}, + {{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f}, + // Iris + {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}, + {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f} + }; -// std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); -// auto raw = RawDatasets(file_name, false); + std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); + auto raw = RawDatasets(file_name, false); -// SECTION("Test TAN classifier (" + file_name + ")") -// { -// auto clf = bayesnet::TAN(); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// //scores[{file_name, "TAN"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test TANLd classifier (" + file_name + ")") -// { -// auto clf = bayesnet::TANLd(); -// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); -// auto score = clf.score(raw.Xt, raw.yt); -// //scores[{file_name, "TANLd"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test KDB classifier (" + file_name + ")") -// { -// auto clf = bayesnet::KDB(2); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// //scores[{file_name, "KDB"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "KDB" -// }]).epsilon(raw.epsilon)); -// } -// SECTION("Test KDBLd classifier (" + file_name + ")") -// { -// auto clf = bayesnet::KDBLd(2); -// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); -// auto score = clf.score(raw.Xt, raw.yt); -// //scores[{file_name, "KDBLd"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd" -// }]).epsilon(raw.epsilon)); -// } -// SECTION("Test SPODE classifier (" + file_name + ")") -// { -// auto clf = bayesnet::SPODE(1); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// // scores[{file_name, "SPODE"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test SPODELd classifier (" + file_name + ")") -// { -// auto clf = bayesnet::SPODELd(1); -// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); -// auto score = clf.score(raw.Xt, raw.yt); -// // scores[{file_name, "SPODELd"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test AODE classifier (" + file_name + ")") -// { -// auto clf = bayesnet::AODE(); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// // scores[{file_name, "AODE"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test AODELd classifier (" + file_name + ")") -// { -// auto clf = bayesnet::AODELd(); -// clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); -// auto score = clf.score(raw.Xt, raw.yt); -// // scores[{file_name, "AODELd"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon)); -// } -// SECTION("Test BoostAODE classifier (" + file_name + ")") -// { -// auto clf = bayesnet::BoostAODE(); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// auto score = clf.score(raw.Xv, raw.yv); -// // scores[{file_name, "BoostAODE"}] = score; -// REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon)); -// } -// // for (auto scores : scores) { -// // std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; -// // } -// } + SECTION("Test TAN classifier (" + file_name + ")") + { + auto clf = bayesnet::TAN(); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + //scores[{file_name, "TAN"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon)); + } + SECTION("Test TANLd classifier (" + file_name + ")") + { + auto clf = bayesnet::TANLd(); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto score = clf.score(raw.Xt, raw.yt); + //scores[{file_name, "TANLd"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon)); + } + SECTION("Test KDB classifier (" + file_name + ")") + { + auto clf = bayesnet::KDB(2); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + //scores[{file_name, "KDB"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "KDB" + }]).epsilon(raw.epsilon)); + } + SECTION("Test KDBLd classifier (" + file_name + ")") + { + auto clf = bayesnet::KDBLd(2); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto score = clf.score(raw.Xt, raw.yt); + //scores[{file_name, "KDBLd"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd" + }]).epsilon(raw.epsilon)); + } + SECTION("Test SPODE classifier (" + file_name + ")") + { + auto clf = bayesnet::SPODE(1); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + // scores[{file_name, "SPODE"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon)); + } + SECTION("Test SPODELd classifier (" + file_name + ")") + { + auto clf = bayesnet::SPODELd(1); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto score = clf.score(raw.Xt, raw.yt); + // scores[{file_name, "SPODELd"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon)); + } + SECTION("Test AODE classifier (" + file_name + ")") + { + auto clf = bayesnet::AODE(); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + // scores[{file_name, "AODE"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon)); + } + SECTION("Test AODELd classifier (" + file_name + ")") + { + auto clf = bayesnet::AODELd(); + clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest); + auto score = clf.score(raw.Xt, raw.yt); + // scores[{file_name, "AODELd"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon)); + } + SECTION("Test BoostAODE classifier (" + file_name + ")") + { + auto clf = bayesnet::BoostAODE(true); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + auto score = clf.score(raw.Xv, raw.yv); + // scores[{file_name, "BoostAODE"}] = score; + REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon)); + } + // for (auto scores : scores) { + // std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; + // } +} TEST_CASE("Models features", "[BayesNet]") { auto graph = std::vector({ "digraph BayesNet {\nlabel=\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n", @@ -158,35 +158,31 @@ TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]") REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS"); REQUIRE(clf.getNotes()[1] == "Number of models: 9"); } -// TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]") -// { -// auto raw = RawDatasets("diabetes", true); -// auto clf = bayesnet::BoostAODE(); -// clf.setHyperparameters({ -// {"ascending",true}, -// {"convergence", true}, -// {"repeatSparent",true}, -// {"select_features","CFS"}, -// }); -// clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); -// REQUIRE(clf.getNumberOfNodes() == 72); -// REQUIRE(clf.getNumberOfEdges() == 120); -// REQUIRE(clf.getNotes().size() == 3); -// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); -// REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8"); -// REQUIRE(clf.getNotes()[2] == "Number of models: 8"); -// auto score = clf.score(raw.Xv, raw.yv); -// auto scoret = clf.score(raw.Xt, raw.yt); -// REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon)); -// REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon)); -// } +TEST_CASE("BoostAODE test used features in train note and score", "[BayesNet]") +{ + auto raw = RawDatasets("diabetes", true); + auto clf = bayesnet::BoostAODE(true); + clf.setHyperparameters({ + {"ascending",true}, + {"convergence", true}, + {"repeatSparent",true}, + {"select_features","CFS"}, + }); + clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); + REQUIRE(clf.getNumberOfNodes() == 72); + REQUIRE(clf.getNumberOfEdges() == 120); + REQUIRE(clf.getNotes().size() == 3); + REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS"); + REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8"); + REQUIRE(clf.getNotes()[2] == "Number of models: 8"); + auto score = clf.score(raw.Xv, raw.yv); + auto scoret = clf.score(raw.Xt, raw.yt); + REQUIRE(score == Catch::Approx(0.8138).epsilon(raw.epsilon)); + REQUIRE(scoret == Catch::Approx(0.8138).epsilon(raw.epsilon)); +} TEST_CASE("Model predict_proba", "[BayesNet]") { - // std::string model = GENERATE("TAN", "SPODE", "BoostAODEprobabilities", "BoostAODEvoting"); - std::string model = GENERATE("TAN", "SPODE"); - std::cout << string(100, '*') << std::endl; - std::cout << "************************************* CHANGE MODEL GENERATE ****************************************" << std::endl; - std::cout << string(100, '*') << std::endl; + std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting"); auto res_prob_tan = std::vector>({ { 0.00375671, 0.994457, 0.00178621 }, { 0.00137462, 0.992734, 0.00589123 }, @@ -220,7 +216,18 @@ TEST_CASE("Model predict_proba", "[BayesNet]") {0.0204803, 0.844276, 0.135244}, {0.00576313, 0.961665, 0.0325716}, }); - std::map>> res_prob = { {"TAN", res_prob_tan}, {"SPODE", res_prob_spode} , {"BoostAODEproba", res_prob_baode }, {"BoostAODEvoting", res_prob_baode } }; + auto res_prob_voting = std::vector>({ + {0, 1, 0}, + {0, 1, 0}, + {0, 1, 0}, + {0, 1, 0}, + {0, 1, 0}, + {0, 0.447909, 0.552091}, + {0, 0.811482, 0.188517}, + {0, 1, 0}, + {0, 1, 0} + }); + std::map>> res_prob = { {"TAN", res_prob_tan}, {"SPODE", res_prob_spode} , {"BoostAODEproba", res_prob_baode }, {"BoostAODEvoting", res_prob_voting } }; std::map models = { {"TAN", new bayesnet::TAN()}, {"SPODE", new bayesnet::SPODE(0)}, {"BoostAODEproba", new bayesnet::BoostAODE(false)}, {"BoostAODEvoting", new bayesnet::BoostAODE(true)} }; int init_index = 78; auto raw = RawDatasets("iris", true); @@ -257,107 +264,3 @@ TEST_CASE("Model predict_proba", "[BayesNet]") delete clf; } } -TEST_CASE("BoostAODE predict_proba proba", "[BayesNet]") -{ - auto res_prob = std::vector>({ - {0.00803291, 0.9676, 0.0243672}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00189227, 0.859575, 0.138533}, - {0.0118341, 0.442149, 0.546017}, - {0.0216135, 0.785781, 0.192605}, - {0.0204803, 0.844276, 0.135244}, - {0.00576313, 0.961665, 0.0325716}, - }); - int init_index = 78; - auto raw = RawDatasets("iris", true); - auto clf = bayesnet::BoostAODE(false); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto y_pred_proba = clf.predict_proba(raw.Xv); - auto y_pred = clf.predict(raw.Xv); - auto yt_pred = clf.predict(raw.Xt); - auto yt_pred_proba = clf.predict_proba(raw.Xt); - std::cout << "yt_pred_proba proba sizes " << yt_pred_proba.sizes() << std::endl; - REQUIRE(y_pred.size() == yt_pred.size(0)); - REQUIRE(y_pred.size() == y_pred_proba.size()); - REQUIRE(y_pred.size() == yt_pred_proba.size(0)); - REQUIRE(y_pred.size() == raw.yv.size()); - REQUIRE(y_pred_proba[0].size() == 3); - REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); - for (int i = 0; i < y_pred_proba.size(); ++i) { - // Check predict is coherent with predict_proba - auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); - int predictedClass = distance(y_pred_proba[i].begin(), maxElem); - REQUIRE(predictedClass == y_pred[i]); - REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); - } - // Check predict_proba values for vectors and tensors - for (int i = 0; i < res_prob.size(); i++) { - REQUIRE(y_pred[i] == yt_pred[i].item()); - for (int j = 0; j < 3; j++) { - REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); - REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); - } - } - // for (int i = 0; i < res_prob.size(); i++) { - // for (int j = 0; j < 3; j++) { - // std::cout << y_pred_proba[i + init_index][j] << " "; - // } - // std::cout << std::endl; - // } -} -TEST_CASE("BoostAODE predict_proba voting", "[BayesNet]") -{ - auto res_prob = std::vector>({ - {0.00803291, 0.9676, 0.0243672}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00398714, 0.945126, 0.050887}, - {0.00189227, 0.859575, 0.138533}, - {0.0118341, 0.442149, 0.546017}, - {0.0216135, 0.785781, 0.192605}, - {0.0204803, 0.844276, 0.135244}, - {0.00576313, 0.961665, 0.0325716}, - }); - int init_index = 78; - auto raw = RawDatasets("iris", true); - auto clf = bayesnet::BoostAODE(true); - clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv); - auto y_pred_proba = clf.predict_proba(raw.Xv); - auto y_pred = clf.predict(raw.Xv); - auto yt_pred = clf.predict(raw.Xt); - auto yt_pred_proba = clf.predict_proba(raw.Xt); - std::cout << "yt_pred_proba proba sizes " << yt_pred_proba.sizes() << std::endl; - REQUIRE(y_pred.size() == yt_pred.size(0)); - REQUIRE(y_pred.size() == y_pred_proba.size()); - REQUIRE(y_pred.size() == yt_pred_proba.size(0)); - REQUIRE(y_pred.size() == raw.yv.size()); - REQUIRE(y_pred_proba[0].size() == 3); - REQUIRE(yt_pred_proba.size(1) == y_pred_proba[0].size()); - for (int i = 0; i < y_pred_proba.size(); ++i) { - auto maxElem = max_element(y_pred_proba[i].begin(), y_pred_proba[i].end()); - int predictedClass = distance(y_pred_proba[i].begin(), maxElem); - REQUIRE(predictedClass == y_pred[i]); - // Check predict is coherent with predict_proba - for (int k = 0; k < yt_pred_proba[i].size(0); k++) { - std::cout << yt_pred_proba[i][k].item() << " "; - } - std::cout << "-> " << y_pred[i] << std::endl; - REQUIRE(yt_pred_proba[i].argmax().item() == y_pred[i]); - } - // Check predict_proba values for vectors and tensors - for (int i = 0; i < res_prob.size(); i++) { - REQUIRE(y_pred[i] == yt_pred[i].item()); - for (int j = 0; j < 3; j++) { - REQUIRE(res_prob[i][j] == Catch::Approx(y_pred_proba[i + init_index][j]).epsilon(raw.epsilon)); - REQUIRE(res_prob[i][j] == Catch::Approx(yt_pred_proba[i + init_index][j].item()).epsilon(raw.epsilon)); - } - } - // for (int i = 0; i < res_prob.size(); i++) { - // for (int j = 0; j < 3; j++) { - // std::cout << y_pred_proba[i + init_index][j] << " "; - // } - // std::cout << std::endl; - // } -}