Add First BayesMetrics Tests

This commit is contained in:
Ricardo Montañana Gómez 2023-10-05 01:14:16 +02:00
parent 3448fb1299
commit 5f0676691c
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
4 changed files with 64 additions and 62 deletions

View File

@ -1,7 +1,7 @@
#include "BayesMetrics.h"
#include "Mst.h"
namespace bayesnet {
//samples is nxm tensor used to fit the model
//samples is n+1xm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const vector<string>& features, const string& className, const int classNumStates)
: samples(samples)
, features(features)

View File

@ -2,54 +2,55 @@
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "BayesMetrics.h"
#include "TestUtils.h"
using namespace std;
TEST_CASE("Metrics Test", "[Metrics]")
{
string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
map<string, pair<int, vector<int>>> results = {
{"glass", {7, { 3, 2, 0, 1, 6, 7, 5 }}},
{"iris", {3, { 1, 0, 2 }} },
{"ecoli", {6, { 2, 3, 1, 0, 4, 5 }}},
{"diabetes", {2, { 2, 0 }}}
};
auto [XDisc, yDisc, featuresDisc, classNameDisc, statesDisc] = loadDataset(file_name, true, true);
int classNumStates = statesDisc.at(classNameDisc).size();
auto yresized = torch::transpose(yDisc.view({ yDisc.size(0), 1 }), 0, 1);
torch::Tensor dataset = torch::cat({ XDisc, yresized }, 0);
int nSamples = dataset.size(1);
SECTION("Test Constructor")
{
torch::Tensor samples = torch::rand({ 10, 5 });
vector<string> features = { "feature1", "feature2", "feature3", "feature4", "feature5" };
string className = "class1";
int classNumStates = 2;
bayesnet::Metrics obj(samples, features, className, classNumStates);
REQUIRE(obj.getScoresKBest().size() == 0);
bayesnet::Metrics metrics(XDisc, featuresDisc, classNameDisc, classNumStates);
REQUIRE(metrics.getScoresKBest().size() == 0);
}
SECTION("Test SelectKBestWeighted")
{
torch::Tensor samples = torch::rand({ 10, 5 });
vector<string> features = { "feature1", "feature2", "feature3", "feature4", "feature5" };
string className = "class1";
int classNumStates = 2;
bayesnet::Metrics obj(samples, features, className, classNumStates);
torch::Tensor weights = torch::ones({ 5 });
vector<int> kBest = obj.SelectKBestWeighted(weights, true, 3);
REQUIRE(kBest.size() == 3);
bayesnet::Metrics metrics(XDisc, featuresDisc, classNameDisc, classNumStates);
torch::Tensor weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble);
vector<int> kBest = metrics.SelectKBestWeighted(weights, true, results.at(file_name).first);
REQUIRE(kBest.size() == results.at(file_name).first);
REQUIRE(kBest == results.at(file_name).second);
}
SECTION("Test mutualInformation")
{
torch::Tensor samples = torch::rand({ 10, 5 });
vector<string> features = { "feature1", "feature2", "feature3", "feature4", "feature5" };
string className = "class1";
int classNumStates = 2;
// torch::Tensor samples = torch::rand({ 10, 5 });
// vector<string> features = { "feature1", "feature2", "feature3", "feature4", "feature5" };
// string className = "class1";
// int classNumStates = 2;
bayesnet::Metrics obj(samples, features, className, classNumStates);
// bayesnet::Metrics obj(samples, features, className, classNumStates);
torch::Tensor firstFeature = samples.select(1, 0);
torch::Tensor secondFeature = samples.select(1, 1);
torch::Tensor weights = torch::ones({ 10 });
// torch::Tensor firstFeature = samples.select(1, 0);
// torch::Tensor secondFeature = samples.select(1, 1);
// torch::Tensor weights = torch::ones({ 10 });
double mi = obj.mutualInformation(firstFeature, secondFeature, weights);
// double mi = obj.mutualInformation(firstFeature, secondFeature, weights);
REQUIRE(mi >= 0);
// REQUIRE(mi >= 0);
}
}

View File

@ -21,29 +21,30 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
map <pair<string, string>, float> scores = {
// Diabetes
{{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
{{"diabetes", "AODELd"}, 0.811198}, {{"diabetes", "KDBLd"}, 0.852865}, {{"diabetes", "SPODELd"}, 0.802083}, {{"diabetes", "TANLd"}, 0.821615}, {{"diabetes", "BoostAODE"}, 0.821615},
{{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
// Ecoli
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
{{"ecoli", "AODELd"}, 0.889881}, {{"ecoli", "KDBLd"}, 0.889881}, {{"ecoli", "SPODELd"}, 0.880952}, {{"ecoli", "TANLd"}, 0.892857}, {{"ecoli", "BoostAODE"}, 0.892857},
{{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f},
// Glass
{{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
{{"glass", "AODELd"}, 0.78972}, {{"glass", "KDBLd"}, 0.827103}, {{"glass", "SPODELd"}, 0.775701}, {{"glass", "TANLd"}, 0.827103}, {{"glass", "BoostAODE"}, 0.827103},
{{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f},
// Iris
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
{{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.973333}, {{"iris", "TANLd"}, 0.973333}, {{"iris", "BoostAODE"}, 0.973333}
{{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f}
};
string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
auto [XCont, yCont, featuresCont, classNameCont, statesCont] = loadDataset(file_name, true, false);
auto [XDisc, yDisc, featuresDisc, className, statesDisc] = loadFile(file_name);
auto [XDisc, yDisc, featuresDisc, classNameDisc, statesDisc] = loadFile(file_name);
double epsilon = 1e-5;
SECTION("Test TAN classifier (" + file_name + ")")
{
auto clf = bayesnet::TAN();
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc);
clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc);
//scores[{file_name, "TAN"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(1e-6));
REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(epsilon));
}
SECTION("Test TANLd classifier (" + file_name + ")")
{
@ -51,16 +52,16 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
clf.fit(XCont, yCont, featuresCont, classNameCont, statesCont);
auto score = clf.score(XCont, yCont);
//scores[{file_name, "TANLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(1e-6));
REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(epsilon));
}
SECTION("Test KDB classifier (" + file_name + ")")
{
auto clf = bayesnet::KDB(2);
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc);
clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc);
//scores[{file_name, "KDB"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDB"
}]).epsilon(1e-6));
}]).epsilon(epsilon));
}
SECTION("Test KDBLd classifier (" + file_name + ")")
{
@ -69,15 +70,15 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
auto score = clf.score(XCont, yCont);
//scores[{file_name, "KDBLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd"
}]).epsilon(1e-6));
}]).epsilon(epsilon));
}
SECTION("Test SPODE classifier (" + file_name + ")")
{
auto clf = bayesnet::SPODE(1);
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc);
clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc);
// scores[{file_name, "SPODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(1e-6));
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(epsilon));
}
SECTION("Test SPODELd classifier (" + file_name + ")")
{
@ -85,31 +86,31 @@ TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
clf.fit(XCont, yCont, featuresCont, classNameCont, statesCont);
auto score = clf.score(XCont, yCont);
// scores[{file_name, "SPODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(1e-6));
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(epsilon));
}
SECTION("Test AODE classifier (" + file_name + ")")
{
auto clf = bayesnet::AODE();
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc);
clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc);
// scores[{file_name, "AODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(1e-6));
REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(epsilon));
}
SECTION("Test AODELd classifier (" + file_name + ")")
{
auto clf = bayesnet::AODE();
auto clf = bayesnet::AODELd();
clf.fit(XCont, yCont, featuresCont, classNameCont, statesCont);
auto score = clf.score(XCont, yCont);
// scores[{file_name, "AODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(1e-6));
REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(epsilon));
}
SECTION("Test BoostAODE classifier (" + file_name + ")")
{
auto clf = bayesnet::BoostAODE();
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc);
clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
auto score = clf.score(XDisc, yDisc);
// scores[{file_name, "BoostAODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(1e-6));
REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(epsilon));
}
// for (auto scores : scores) {
// cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, ";
@ -126,18 +127,18 @@ TEST_CASE("Models featuresDisc")
);
auto clf = bayesnet::TAN();
auto [XDisc, yDisc, featuresDisc, className, statesDisc] = loadFile("iris");
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc);
REQUIRE(clf.getNumberOfNodes() == 5);
auto [XDisc, yDisc, featuresDisc, classNameDisc, statesDisc] = loadFile("iris");
clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
REQUIRE(clf.getNumberOfNodes() == 6);
REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.show() == vector<string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
REQUIRE(clf.graph("Test") == graph);
}
TEST_CASE("Get num featuresDisc & num edges")
{
auto [XDisc, yDisc, featuresDisc, className, statesDisc] = loadFile("iris");
auto [XDisc, yDisc, featuresDisc, classNameDisc, statesDisc] = loadFile("iris");
auto clf = bayesnet::KDB(2);
clf.fit(XDisc, yDisc, featuresDisc, className, statesDisc);
REQUIRE(clf.getNumberOfNodes() == 5);
clf.fit(XDisc, yDisc, featuresDisc, classNameDisc, statesDisc);
REQUIRE(clf.getNumberOfNodes() == 6);
REQUIRE(clf.getNumberOfEdges() == 8);
}

View File

@ -6,7 +6,7 @@ class Paths {
public:
static string datasets()
{
return "../data/";
return "../../data/";
}
};
@ -62,19 +62,19 @@ tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadData
auto states = map<string, vector<int>>();
if (discretize_dataset) {
auto Xr = discretizeDataset(X, y);
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
}
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0);
} else {
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) {
Xd.index_put_({ "...", i }, torch::tensor(X[i]));
Xd.index_put_({ i, "..." }, torch::tensor(X[i]));
}
}
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };