Add First BayesMetrics Tests

This commit is contained in:
Ricardo Montañana Gómez 2023-10-05 01:14:16 +02:00
parent 3448fb1299
commit 5f0676691c
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
4 changed files with 64 additions and 62 deletions

View File

@ -1,7 +1,7 @@
#include "BayesMetrics.h" #include "BayesMetrics.h"
#include "Mst.h" #include "Mst.h"
namespace bayesnet { namespace bayesnet {
//samples is nxm tensor used to fit the model //samples is n+1xm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates) Metrics::Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates)
: samples(samples) : samples(samples)
, features(features) , features(features)

View File

@ -2,54 +2,55 @@
#include <catch2/catch_approx.hpp> #include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp> #include <catch2/generators/catch_generators.hpp>
#include "BayesMetrics.h" #include "BayesMetrics.h"
#include "TestUtils.h"
using namespace std; using namespace std;
TEST_CASE("Metrics Test", "[Metrics]") TEST_CASE("Metrics Test", "[Metrics]")
{ {
string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
map<string, pair<int, vector<int>>> results = {
{"glass", {7, { 3, 2, 0, 1, 6, 7, 5 }}},
{"iris", {3, { 1, 0, 2 }} },
{"ecoli", {6, { 2, 3, 1, 0, 4, 5 }}},
{"diabetes", {2, { 2, 0 }}}
};
auto [XDisc, yDisc, featuresDisc, classNameDisc, statesDisc] = loadDataset(file_name, true, true);
int classNumStates = statesDisc.at(classNameDisc).size();
auto yresized = torch::transpose(yDisc.view({ yDisc.size(0), 1 }), 0, 1);
torch::Tensor dataset = torch::cat({ XDisc, yresized }, 0);
int nSamples = dataset.size(1);
SECTION("Test Constructor") SECTION("Test Constructor")
{ {
torch::Tensor samples = torch::rand({ 10, 5 }); bayesnet::Metrics metrics(XDisc, featuresDisc, classNameDisc, classNumStates);
vector<string> features = { "feature1", "feature2", "feature3", "feature4", "feature5" }; REQUIRE(metrics.getScoresKBest().size() == 0);
string className = "class1";
int classNumStates = 2;
bayesnet::Metrics obj(samples, features, className, classNumStates);
REQUIRE(obj.getScoresKBest().size() == 0);
} }
SECTION("Test SelectKBestWeighted") SECTION("Test SelectKBestWeighted")
{ {
torch::Tensor samples = torch::rand({ 10, 5 }); bayesnet::Metrics metrics(XDisc, featuresDisc, classNameDisc, classNumStates);
vector<string> features = { "feature1", "feature2", "feature3", "feature4", "feature5" }; torch::Tensor weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble);
string className = "class1"; vector<int> kBest = metrics.SelectKBestWeighted(weights, true, results.at(file_name).first);
int classNumStates = 2; REQUIRE(kBest.size() == results.at(file_name).first);
REQUIRE(kBest == results.at(file_name).second);
bayesnet::Metrics obj(samples, features, className, classNumStates);
torch::Tensor weights = torch::ones({ 5 });
vector<int> kBest = obj.SelectKBestWeighted(weights, true, 3);
REQUIRE(kBest.size() == 3);
} }
SECTION("Test mutualInformation") SECTION("Test mutualInformation")
{ {
torch::Tensor samples = torch::rand({ 10, 5 }); // torch::Tensor samples = torch::rand({ 10, 5 });
vector<string> features = { "feature1", "feature2", "feature3", "feature4", "feature5" }; // vector<string> features = { "feature1", "feature2", "feature3", "feature4", "feature5" };
string className = "class1"; // string className = "class1";
int classNumStates = 2; // int classNumStates = 2;
bayesnet::Metrics obj(samples, features, className, classNumStates); // bayesnet::Metrics obj(samples, features, className, classNumStates);
torch::Tensor firstFeature = samples.select(1, 0); // torch::Tensor firstFeature = samples.select(1, 0);
torch::Tensor secondFeature = samples.select(1, 1); // torch::Tensor secondFeature = samples.select(1, 1);
torch::Tensor weights = torch::ones({ 10 }); // torch::Tensor weights = torch::ones({ 10 });
double mi = obj.mutualInformation(firstFeature, secondFeature, weights); // double mi = obj.mutualInformation(firstFeature, secondFeature, weights);
REQUIRE(mi >= 0); // REQUIRE(mi >= 0);
} }
} }

View File

@ -21,29 +21,30 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
map <pair<string, string>, float> scores = { map <pair<string, string>, float> scores = {
// Diabetes // Diabetes
{{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615}, {{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
{{"diabetes", "AODELd"}, 0.811198}, {{"diabetes", "KDBLd"}, 0.852865}, {{"diabetes", "SPODELd"}, 0.802083}, {{"diabetes", "TANLd"}, 0.821615}, {{"diabetes", "BoostAODE"}, 0.821615}, {{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
// Ecoli // Ecoli
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857}, {{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
{{"ecoli", "AODELd"}, 0.889881}, {{"ecoli", "KDBLd"}, 0.889881}, {{"ecoli", "SPODELd"}, 0.880952}, {{"ecoli", "TANLd"}, 0.892857}, {{"ecoli", "BoostAODE"}, 0.892857}, {{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f},
// Glass // Glass
{{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103}, {{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
{{"glass", "AODELd"}, 0.78972}, {{"glass", "KDBLd"}, 0.827103}, {{"glass", "SPODELd"}, 0.775701}, {{"glass", "TANLd"}, 0.827103}, {{"glass", "BoostAODE"}, 0.827103}, {{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f},
// Iris // Iris
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}, {{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
{{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.973333}, {{"iris", "TANLd"}, 0.973333}, {{"iris", "BoostAODE"}, 0.973333} {{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f}
}; };
string file_name = GENERATE("glass", "iris", "ecoli", "diabetes"); string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
auto [XCont, yCont, featuresCont, classNameCont, statesCont] = loadDataset(file_name, true, false); auto [XCont, yCont, featuresCont, classNameCont, statesCont] = loadDataset(file_name, true, false);
auto [XDisc, yDisc, featuresDisc, className, statesDisc] = loadFile(file_name); auto [XDisc, yDisc, featuresDisc, classNameDisc, statesDisc] = loadFile(file_name);
double epsilon = 1e-5;
SECTION("Test TAN classifier (" + file_name + ")") SECTION("Test TAN classifier (" + file_name + ")")
{ {
auto clf = bayesnet::TAN(); auto clf = bayesnet::TAN();
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc); clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc); auto score = clf.score(XDisc, yDisc);
//scores[{file_name, "TAN"}] = score; //scores[{file_name, "TAN"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(1e-6)); REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(epsilon));
} }
SECTION("Test TANLd classifier (" + file_name + ")") SECTION("Test TANLd classifier (" + file_name + ")")
{ {
@ -51,16 +52,16 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
clf.fit(XCont, yCont, featuresCont, classNameCont, statesCont); clf.fit(XCont, yCont, featuresCont, classNameCont, statesCont);
auto score = clf.score(XCont, yCont); auto score = clf.score(XCont, yCont);
//scores[{file_name, "TANLd"}] = score; //scores[{file_name, "TANLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(1e-6)); REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(epsilon));
} }
SECTION("Test KDB classifier (" + file_name + ")") SECTION("Test KDB classifier (" + file_name + ")")
{ {
auto clf = bayesnet::KDB(2); auto clf = bayesnet::KDB(2);
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc); clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc); auto score = clf.score(XDisc, yDisc);
//scores[{file_name, "KDB"}] = score; //scores[{file_name, "KDB"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDB" REQUIRE(score == Catch::Approx(scores[{file_name, "KDB"
}]).epsilon(1e-6)); }]).epsilon(epsilon));
} }
SECTION("Test KDBLd classifier (" + file_name + ")") SECTION("Test KDBLd classifier (" + file_name + ")")
{ {
@ -69,15 +70,15 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
auto score = clf.score(XCont, yCont); auto score = clf.score(XCont, yCont);
//scores[{file_name, "KDBLd"}] = score; //scores[{file_name, "KDBLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd" REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd"
}]).epsilon(1e-6)); }]).epsilon(epsilon));
} }
SECTION("Test SPODE classifier (" + file_name + ")") SECTION("Test SPODE classifier (" + file_name + ")")
{ {
auto clf = bayesnet::SPODE(1); auto clf = bayesnet::SPODE(1);
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc); clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc); auto score = clf.score(XDisc, yDisc);
// scores[{file_name, "SPODE"}] = score; // scores[{file_name, "SPODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(1e-6)); REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(epsilon));
} }
SECTION("Test SPODELd classifier (" + file_name + ")") SECTION("Test SPODELd classifier (" + file_name + ")")
{ {
@ -85,31 +86,31 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
clf.fit(XCont, yCont, featuresCont, classNameCont, statesCont); clf.fit(XCont, yCont, featuresCont, classNameCont, statesCont);
auto score = clf.score(XCont, yCont); auto score = clf.score(XCont, yCont);
// scores[{file_name, "SPODELd"}] = score; // scores[{file_name, "SPODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(1e-6)); REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(epsilon));
} }
SECTION("Test AODE classifier (" + file_name + ")") SECTION("Test AODE classifier (" + file_name + ")")
{ {
auto clf = bayesnet::AODE(); auto clf = bayesnet::AODE();
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc); clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc); auto score = clf.score(XDisc, yDisc);
// scores[{file_name, "AODE"}] = score; // scores[{file_name, "AODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(1e-6)); REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(epsilon));
} }
SECTION("Test AODELd classifier (" + file_name + ")") SECTION("Test AODELd classifier (" + file_name + ")")
{ {
auto clf = bayesnet::AODE(); auto clf = bayesnet::AODELd();
clf.fit(XCont, yCont, featuresCont, classNameCont, statesCont); clf.fit(XCont, yCont, featuresCont, classNameCont, statesCont);
auto score = clf.score(XCont, yCont); auto score = clf.score(XCont, yCont);
// scores[{file_name, "AODELd"}] = score; // scores[{file_name, "AODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(1e-6)); REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(epsilon));
} }
SECTION("Test BoostAODE classifier (" + file_name + ")") SECTION("Test BoostAODE classifier (" + file_name + ")")
{ {
auto clf = bayesnet::BoostAODE(); auto clf = bayesnet::BoostAODE();
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc); clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc); auto score = clf.score(XDisc, yDisc);
// scores[{file_name, "BoostAODE"}] = score; // scores[{file_name, "BoostAODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(1e-6)); REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(epsilon));
} }
// for (auto scores : scores) { // for (auto scores : scores) {
// cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, "; // cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, ";
@ -126,18 +127,18 @@ TEST_CASE("Models featuresDisc")
); );
auto clf = bayesnet::TAN(); auto clf = bayesnet::TAN();
auto [XDisc, yDisc, featuresDisc, className, statesDisc] = loadFile("iris"); auto [XDisc, yDisc, featuresDisc, classNameDisc, statesDisc] = loadFile("iris");
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc); clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfNodes() == 6);
REQUIRE(clf.getNumberOfEdges() == 7); REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.show() == vector<string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "}); REQUIRE(clf.show() == vector<string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
REQUIRE(clf.graph("Test") == graph); REQUIRE(clf.graph("Test") == graph);
} }
TEST_CASE("Get num featuresDisc & num edges") TEST_CASE("Get num featuresDisc & num edges")
{ {
auto [XDisc, yDisc, featuresDisc, className, statesDisc] = loadFile("iris"); auto [XDisc, yDisc, featuresDisc, classNameDisc, statesDisc] = loadFile("iris");
auto clf = bayesnet::KDB(2); auto clf = bayesnet::KDB(2);
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc); clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
REQUIRE(clf.getNumberOfNodes() == 5); REQUIRE(clf.getNumberOfNodes() == 6);
REQUIRE(clf.getNumberOfEdges() == 8); REQUIRE(clf.getNumberOfEdges() == 8);
} }

View File

@ -6,7 +6,7 @@ class Paths {
public: public:
static string datasets() static string datasets()
{ {
return "../data/"; return "../../data/";
} }
}; };
@ -62,19 +62,19 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
auto states = map<string, vector<int>>(); auto states = map<string, vector<int>>();
if (discretize_dataset) { if (discretize_dataset) {
auto Xr = discretizeDataset(X, y); auto Xr = discretizeDataset(X, y);
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32); Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1); states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
auto item = states.at(features[i]); auto item = states.at(features[i]);
iota(begin(item), end(item), 0); iota(begin(item), end(item), 0);
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32)); Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
} }
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1); states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0); iota(begin(states.at(className)), end(states.at(className)), 0);
} else { } else {
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32); Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) { for (int i = 0; i < features.size(); ++i) {
Xd.index_put_({ "...", i }, torch::tensor(X[i])); Xd.index_put_({ i, "..." }, torch::tensor(X[i]));
} }
} }
return { Xd, torch::tensor(y, torch::kInt32), features, className, states }; return { Xd, torch::tensor(y, torch::kInt32), features, className, states };