Complete implementation with tests
This commit is contained in:
@@ -8,7 +8,7 @@
|
|||||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||||
[](https://deepwiki.com/Doctorado-ML/BayesNet)
|
[](https://deepwiki.com/Doctorado-ML/BayesNet)
|
||||||

|

|
||||||
[](https://gitea.rmontanana.es/rmontanana/BayesNet)
|
[](https://gitea.rmontanana.es/rmontanana/BayesNet)
|
||||||
[](https://doi.org/10.5281/zenodo.14210344)
|
[](https://doi.org/10.5281/zenodo.14210344)
|
||||||
|
|
||||||
Bayesian Network Classifiers library
|
Bayesian Network Classifiers library
|
||||||
|
@@ -14,33 +14,29 @@ namespace bayesnet {
|
|||||||
validHyperparameters.push_back("k");
|
validHyperparameters.push_back("k");
|
||||||
validHyperparameters.push_back("theta");
|
validHyperparameters.push_back("theta");
|
||||||
}
|
}
|
||||||
void KDBLd::setHyperparameters(const nlohmann::json& hyperparameters_)
|
|
||||||
{
|
|
||||||
auto hyperparameters = hyperparameters_;
|
|
||||||
if (hyperparameters.contains("k")) {
|
|
||||||
k = hyperparameters["k"];
|
|
||||||
hyperparameters.erase("k");
|
|
||||||
}
|
|
||||||
if (hyperparameters.contains("theta")) {
|
|
||||||
theta = hyperparameters["theta"];
|
|
||||||
hyperparameters.erase("theta");
|
|
||||||
}
|
|
||||||
Proposal::setHyperparameters(hyperparameters);
|
|
||||||
}
|
|
||||||
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
checkInput(X_, y_);
|
checkInput(X_, y_);
|
||||||
features = features_;
|
|
||||||
className = className_;
|
|
||||||
Xf = X_;
|
Xf = X_;
|
||||||
y = y_;
|
y = y_;
|
||||||
|
return commonFit(features_, className_, states_, smoothing);
|
||||||
// Use iterative local discretization instead of the two-phase approach
|
}
|
||||||
|
KDBLd& KDBLd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||||
|
{
|
||||||
|
if (!torch::is_floating_point(dataset)) {
|
||||||
|
throw std::runtime_error("Dataset must be a floating point tensor");
|
||||||
|
}
|
||||||
|
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
|
||||||
|
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
|
||||||
|
return commonFit(features_, className_, states_, smoothing);
|
||||||
|
}
|
||||||
|
|
||||||
|
KDBLd& KDBLd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||||
|
{
|
||||||
|
features = features_;
|
||||||
|
className = className_;
|
||||||
states = iterativeLocalDiscretization(y, static_cast<KDB*>(this), dataset, features, className, states_, smoothing);
|
states = iterativeLocalDiscretization(y, static_cast<KDB*>(this), dataset, features, className, states_, smoothing);
|
||||||
|
|
||||||
// Final fit with converged discretization
|
|
||||||
KDB::fit(dataset, features, className, states, smoothing);
|
KDB::fit(dataset, features, className, states, smoothing);
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
torch::Tensor KDBLd::predict(torch::Tensor& X)
|
torch::Tensor KDBLd::predict(torch::Tensor& X)
|
||||||
|
@@ -15,8 +15,15 @@ namespace bayesnet {
|
|||||||
explicit KDBLd(int k);
|
explicit KDBLd(int k);
|
||||||
virtual ~KDBLd() = default;
|
virtual ~KDBLd() = default;
|
||||||
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||||
|
KDBLd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||||
|
KDBLd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||||
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
std::vector<std::string> graph(const std::string& name = "KDB") const override;
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override
|
||||||
|
{
|
||||||
|
auto hyperparameters = hyperparameters_;
|
||||||
|
Proposal::setHyperparameters(hyperparameters);
|
||||||
|
KDB::setHyperparameters(hyperparameters);
|
||||||
|
}
|
||||||
torch::Tensor predict(torch::Tensor& X) override;
|
torch::Tensor predict(torch::Tensor& X) override;
|
||||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||||
static inline std::string version() { return "0.0.1"; };
|
static inline std::string version() { return "0.0.1"; };
|
||||||
|
@@ -11,6 +11,7 @@
|
|||||||
#include "Classifier.h"
|
#include "Classifier.h"
|
||||||
#include "KDB.h"
|
#include "KDB.h"
|
||||||
#include "TAN.h"
|
#include "TAN.h"
|
||||||
|
#include "SPODE.h"
|
||||||
#include "KDBLd.h"
|
#include "KDBLd.h"
|
||||||
#include "TANLd.h"
|
#include "TANLd.h"
|
||||||
|
|
||||||
@@ -18,9 +19,8 @@ namespace bayesnet {
|
|||||||
Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_)
|
Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
void Proposal::setHyperparameters(const nlohmann::json& hyperparameters_)
|
void Proposal::setHyperparameters(nlohmann::json& hyperparameters)
|
||||||
{
|
{
|
||||||
auto hyperparameters = hyperparameters_;
|
|
||||||
if (hyperparameters.contains("ld_proposed_cuts")) {
|
if (hyperparameters.contains("ld_proposed_cuts")) {
|
||||||
ld_params.proposed_cuts = hyperparameters["ld_proposed_cuts"];
|
ld_params.proposed_cuts = hyperparameters["ld_proposed_cuts"];
|
||||||
hyperparameters.erase("ld_proposed_cuts");
|
hyperparameters.erase("ld_proposed_cuts");
|
||||||
@@ -55,9 +55,6 @@ namespace bayesnet {
|
|||||||
convergence_params.verbose = hyperparameters["verbose_convergence"];
|
convergence_params.verbose = hyperparameters["verbose_convergence"];
|
||||||
hyperparameters.erase("verbose_convergence");
|
hyperparameters.erase("verbose_convergence");
|
||||||
}
|
}
|
||||||
if (!hyperparameters.empty()) {
|
|
||||||
throw std::invalid_argument("Invalid hyperparameters for Proposal: " + hyperparameters.dump());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
|
void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
|
||||||
@@ -209,7 +206,7 @@ namespace bayesnet {
|
|||||||
|
|
||||||
// Phase 2: Build model with current discretization
|
// Phase 2: Build model with current discretization
|
||||||
classifier->fit(dataset, features, className, currentStates, weights, smoothing);
|
classifier->fit(dataset, features, className, currentStates, weights, smoothing);
|
||||||
|
|
||||||
// Phase 3: Network-aware discretization refinement
|
// Phase 3: Network-aware discretization refinement
|
||||||
currentStates = localDiscretizationProposal(currentStates, classifier->getModel());
|
currentStates = localDiscretizationProposal(currentStates, classifier->getModel());
|
||||||
|
|
||||||
@@ -228,51 +225,15 @@ namespace bayesnet {
|
|||||||
return currentStates;
|
return currentStates;
|
||||||
}
|
}
|
||||||
|
|
||||||
double Proposal::computeLogLikelihood(Network& model, const torch::Tensor& dataset)
|
|
||||||
{
|
|
||||||
double logLikelihood = 0.0;
|
|
||||||
int n_samples = dataset.size(0);
|
|
||||||
int n_features = dataset.size(1);
|
|
||||||
|
|
||||||
for (int i = 0; i < n_samples; ++i) {
|
|
||||||
double sampleLogLikelihood = 0.0;
|
|
||||||
|
|
||||||
// Get class value for this sample
|
|
||||||
int classValue = dataset[i][n_features - 1].item<int>();
|
|
||||||
|
|
||||||
// Compute log-likelihood for each feature given its parents and class
|
|
||||||
for (const auto& node : model.getNodes()) {
|
|
||||||
if (node.first == model.getClassName()) {
|
|
||||||
// For class node, add log P(class)
|
|
||||||
auto classCounts = node.second->getCPT();
|
|
||||||
double classProb = classCounts[classValue].item<double>() / dataset.size(0);
|
|
||||||
sampleLogLikelihood += std::log(std::max(classProb, 1e-10));
|
|
||||||
} else {
|
|
||||||
// For feature nodes, add log P(feature | parents, class)
|
|
||||||
int featureIdx = std::distance(model.getFeatures().begin(),
|
|
||||||
std::find(model.getFeatures().begin(),
|
|
||||||
model.getFeatures().end(),
|
|
||||||
node.first));
|
|
||||||
int featureValue = dataset[i][featureIdx].item<int>();
|
|
||||||
|
|
||||||
// Simplified probability computation - in practice would need full CPT lookup
|
|
||||||
double featureProb = 0.1; // Placeholder - would compute from CPT
|
|
||||||
sampleLogLikelihood += std::log(std::max(featureProb, 1e-10));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logLikelihood += sampleLogLikelihood;
|
|
||||||
}
|
|
||||||
|
|
||||||
return logLikelihood;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Explicit template instantiation for common classifier types
|
// Explicit template instantiation for common classifier types
|
||||||
template map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization<KDB>(
|
template map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization<KDB>(
|
||||||
const torch::Tensor&, KDB*, torch::Tensor&, const std::vector<std::string>&,
|
const torch::Tensor&, KDB*, torch::Tensor&, const std::vector<std::string>&,
|
||||||
const std::string&, const map<std::string, std::vector<int>>&, Smoothing_t);
|
const std::string&, const map<std::string, std::vector<int>>&, Smoothing_t);
|
||||||
|
|
||||||
template map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization<TAN>(
|
template map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization<TAN>(
|
||||||
const torch::Tensor&, TAN*, torch::Tensor&, const std::vector<std::string>&,
|
const torch::Tensor&, TAN*, torch::Tensor&, const std::vector<std::string>&,
|
||||||
const std::string&, const map<std::string, std::vector<int>>&, Smoothing_t);
|
const std::string&, const map<std::string, std::vector<int>>&, Smoothing_t);
|
||||||
|
template map<std::string, std::vector<int>> Proposal::iterativeLocalDiscretization<SPODE>(
|
||||||
|
const torch::Tensor&, SPODE*, torch::Tensor&, const std::vector<std::string>&,
|
||||||
|
const std::string&, const map<std::string, std::vector<int>>&, Smoothing_t);
|
||||||
}
|
}
|
||||||
|
@@ -19,7 +19,7 @@ namespace bayesnet {
|
|||||||
class Proposal {
|
class Proposal {
|
||||||
public:
|
public:
|
||||||
Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_);
|
Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_);
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters_);
|
void setHyperparameters(nlohmann::json& hyperparameters_);
|
||||||
protected:
|
protected:
|
||||||
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
|
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
|
||||||
torch::Tensor prepareX(torch::Tensor& X);
|
torch::Tensor prepareX(torch::Tensor& X);
|
||||||
@@ -61,7 +61,6 @@ namespace bayesnet {
|
|||||||
};
|
};
|
||||||
private:
|
private:
|
||||||
std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
std::vector<int> factorize(const std::vector<std::string>& labels_t);
|
||||||
double computeLogLikelihood(Network& model, const torch::Tensor& dataset);
|
|
||||||
torch::Tensor& pDataset; // (n+1)xm tensor
|
torch::Tensor& pDataset; // (n+1)xm tensor
|
||||||
std::vector<std::string>& pFeatures;
|
std::vector<std::string>& pFeatures;
|
||||||
std::string& pClassName;
|
std::string& pClassName;
|
||||||
|
@@ -34,12 +34,8 @@ namespace bayesnet {
|
|||||||
{
|
{
|
||||||
features = features_;
|
features = features_;
|
||||||
className = className_;
|
className = className_;
|
||||||
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
|
states = iterativeLocalDiscretization(y, static_cast<SPODE*>(this), dataset, features, className, states_, smoothing);
|
||||||
states = fit_local_discretization(y);
|
|
||||||
// We have discretized the input data
|
|
||||||
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
|
|
||||||
SPODE::fit(dataset, features, className, states, smoothing);
|
SPODE::fit(dataset, features, className, states, smoothing);
|
||||||
states = localDiscretizationProposal(states, model);
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
torch::Tensor SPODELd::predict(torch::Tensor& X)
|
torch::Tensor SPODELd::predict(torch::Tensor& X)
|
||||||
|
@@ -18,6 +18,12 @@ namespace bayesnet {
|
|||||||
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||||
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||||
std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
|
std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
|
||||||
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override
|
||||||
|
{
|
||||||
|
auto hyperparameters = hyperparameters_;
|
||||||
|
Proposal::setHyperparameters(hyperparameters);
|
||||||
|
SPODE::setHyperparameters(hyperparameters);
|
||||||
|
}
|
||||||
torch::Tensor predict(torch::Tensor& X) override;
|
torch::Tensor predict(torch::Tensor& X) override;
|
||||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||||
static inline std::string version() { return "0.0.1"; };
|
static inline std::string version() { return "0.0.1"; };
|
||||||
|
@@ -12,17 +12,26 @@ namespace bayesnet {
|
|||||||
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
checkInput(X_, y_);
|
checkInput(X_, y_);
|
||||||
features = features_;
|
|
||||||
className = className_;
|
|
||||||
Xf = X_;
|
Xf = X_;
|
||||||
y = y_;
|
y = y_;
|
||||||
|
return commonFit(features_, className_, states_, smoothing);
|
||||||
// Use iterative local discretization instead of the two-phase approach
|
}
|
||||||
|
TANLd& TANLd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||||
|
{
|
||||||
|
if (!torch::is_floating_point(dataset)) {
|
||||||
|
throw std::runtime_error("Dataset must be a floating point tensor");
|
||||||
|
}
|
||||||
|
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
|
||||||
|
y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
|
||||||
|
return commonFit(features_, className_, states_, smoothing);
|
||||||
|
}
|
||||||
|
|
||||||
|
TANLd& TANLd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
|
||||||
|
{
|
||||||
|
features = features_;
|
||||||
|
className = className_;
|
||||||
states = iterativeLocalDiscretization(y, static_cast<TAN*>(this), dataset, features, className, states_, smoothing);
|
states = iterativeLocalDiscretization(y, static_cast<TAN*>(this), dataset, features, className, states_, smoothing);
|
||||||
|
|
||||||
// Final fit with converged discretization
|
|
||||||
TAN::fit(dataset, features, className, states, smoothing);
|
TAN::fit(dataset, features, className, states, smoothing);
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
torch::Tensor TANLd::predict(torch::Tensor& X)
|
torch::Tensor TANLd::predict(torch::Tensor& X)
|
||||||
|
@@ -16,7 +16,15 @@ namespace bayesnet {
|
|||||||
TANLd();
|
TANLd();
|
||||||
virtual ~TANLd() = default;
|
virtual ~TANLd() = default;
|
||||||
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||||
|
TANLd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
|
||||||
|
TANLd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
|
||||||
std::vector<std::string> graph(const std::string& name = "TANLd") const override;
|
std::vector<std::string> graph(const std::string& name = "TANLd") const override;
|
||||||
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override
|
||||||
|
{
|
||||||
|
auto hyperparameters = hyperparameters_;
|
||||||
|
Proposal::setHyperparameters(hyperparameters);
|
||||||
|
TAN::setHyperparameters(hyperparameters);
|
||||||
|
}
|
||||||
torch::Tensor predict(torch::Tensor& X) override;
|
torch::Tensor predict(torch::Tensor& X) override;
|
||||||
torch::Tensor predict_proba(torch::Tensor& X) override;
|
torch::Tensor predict_proba(torch::Tensor& X) override;
|
||||||
};
|
};
|
||||||
|
@@ -17,6 +17,10 @@ namespace bayesnet {
|
|||||||
virtual ~AODELd() = default;
|
virtual ~AODELd() = default;
|
||||||
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing) override;
|
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing) override;
|
||||||
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
|
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
|
||||||
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override
|
||||||
|
{
|
||||||
|
hyperparameters = hyperparameters_;
|
||||||
|
}
|
||||||
protected:
|
protected:
|
||||||
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
|
||||||
void buildModel(const torch::Tensor& weights) override;
|
void buildModel(const torch::Tensor& weights) override;
|
||||||
|
@@ -31,9 +31,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
|||||||
{{"diabetes", "SPODE"}, 0.802083},
|
{{"diabetes", "SPODE"}, 0.802083},
|
||||||
{{"diabetes", "TAN"}, 0.821615},
|
{{"diabetes", "TAN"}, 0.821615},
|
||||||
{{"diabetes", "AODELd"}, 0.8125f},
|
{{"diabetes", "AODELd"}, 0.8125f},
|
||||||
{{"diabetes", "KDBLd"}, 0.80208f},
|
{{"diabetes", "KDBLd"}, 0.804688f},
|
||||||
{{"diabetes", "SPODELd"}, 0.7890625f},
|
{{"diabetes", "SPODELd"}, 0.7890625f},
|
||||||
{{"diabetes", "TANLd"}, 0.803385437f},
|
{{"diabetes", "TANLd"}, 0.8125f},
|
||||||
{{"diabetes", "BoostAODE"}, 0.83984f},
|
{{"diabetes", "BoostAODE"}, 0.83984f},
|
||||||
// Ecoli
|
// Ecoli
|
||||||
{{"ecoli", "AODE"}, 0.889881},
|
{{"ecoli", "AODE"}, 0.889881},
|
||||||
@@ -42,9 +42,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
|||||||
{{"ecoli", "SPODE"}, 0.880952},
|
{{"ecoli", "SPODE"}, 0.880952},
|
||||||
{{"ecoli", "TAN"}, 0.892857},
|
{{"ecoli", "TAN"}, 0.892857},
|
||||||
{{"ecoli", "AODELd"}, 0.875f},
|
{{"ecoli", "AODELd"}, 0.875f},
|
||||||
{{"ecoli", "KDBLd"}, 0.880952358f},
|
{{"ecoli", "KDBLd"}, 0.872024f},
|
||||||
{{"ecoli", "SPODELd"}, 0.839285731f},
|
{{"ecoli", "SPODELd"}, 0.839285731f},
|
||||||
{{"ecoli", "TANLd"}, 0.848214269f},
|
{{"ecoli", "TANLd"}, 0.869047642f},
|
||||||
{{"ecoli", "BoostAODE"}, 0.89583f},
|
{{"ecoli", "BoostAODE"}, 0.89583f},
|
||||||
// Glass
|
// Glass
|
||||||
{{"glass", "AODE"}, 0.79439},
|
{{"glass", "AODE"}, 0.79439},
|
||||||
@@ -53,9 +53,9 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
|||||||
{{"glass", "SPODE"}, 0.775701},
|
{{"glass", "SPODE"}, 0.775701},
|
||||||
{{"glass", "TAN"}, 0.827103},
|
{{"glass", "TAN"}, 0.827103},
|
||||||
{{"glass", "AODELd"}, 0.799065411f},
|
{{"glass", "AODELd"}, 0.799065411f},
|
||||||
{{"glass", "KDBLd"}, 0.82710278f},
|
{{"glass", "KDBLd"}, 0.864485979f},
|
||||||
{{"glass", "SPODELd"}, 0.780373812f},
|
{{"glass", "SPODELd"}, 0.780373812f},
|
||||||
{{"glass", "TANLd"}, 0.869158864f},
|
{{"glass", "TANLd"}, 0.831775725f},
|
||||||
{{"glass", "BoostAODE"}, 0.84579f},
|
{{"glass", "BoostAODE"}, 0.84579f},
|
||||||
// Iris
|
// Iris
|
||||||
{{"iris", "AODE"}, 0.973333},
|
{{"iris", "AODE"}, 0.973333},
|
||||||
@@ -68,29 +68,29 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
|||||||
{{"iris", "SPODELd"}, 0.96f},
|
{{"iris", "SPODELd"}, 0.96f},
|
||||||
{{"iris", "TANLd"}, 0.97333f},
|
{{"iris", "TANLd"}, 0.97333f},
|
||||||
{{"iris", "BoostAODE"}, 0.98f} };
|
{{"iris", "BoostAODE"}, 0.98f} };
|
||||||
std::map<std::string, bayesnet::BaseClassifier*> models{ {"AODE", new bayesnet::AODE()},
|
std::map<std::string, std::unique_ptr<bayesnet::BaseClassifier>> models;
|
||||||
{"AODELd", new bayesnet::AODELd()},
|
models["AODE"] = std::make_unique<bayesnet::AODE>();
|
||||||
{"BoostAODE", new bayesnet::BoostAODE()},
|
models["AODELd"] = std::make_unique<bayesnet::AODELd>();
|
||||||
{"KDB", new bayesnet::KDB(2)},
|
models["BoostAODE"] = std::make_unique<bayesnet::BoostAODE>();
|
||||||
{"KDBLd", new bayesnet::KDBLd(2)},
|
models["KDB"] = std::make_unique<bayesnet::KDB>(2);
|
||||||
{"XSPODE", new bayesnet::XSpode(1)},
|
models["KDBLd"] = std::make_unique<bayesnet::KDBLd>(2);
|
||||||
{"SPODE", new bayesnet::SPODE(1)},
|
models["XSPODE"] = std::make_unique<bayesnet::XSpode>(1);
|
||||||
{"SPODELd", new bayesnet::SPODELd(1)},
|
models["SPODE"] = std::make_unique<bayesnet::SPODE>(1);
|
||||||
{"TAN", new bayesnet::TAN()},
|
models["SPODELd"] = std::make_unique<bayesnet::SPODELd>(1);
|
||||||
{"TANLd", new bayesnet::TANLd()} };
|
models["TAN"] = std::make_unique<bayesnet::TAN>();
|
||||||
|
models["TANLd"] = std::make_unique<bayesnet::TANLd>();
|
||||||
std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "XSPODE", "SPODELd", "TAN", "TANLd");
|
std::string name = GENERATE("AODE", "AODELd", "KDB", "KDBLd", "SPODE", "XSPODE", "SPODELd", "TAN", "TANLd");
|
||||||
auto clf = models[name];
|
auto clf = std::move(models[name]);
|
||||||
|
|
||||||
SECTION("Test " + name + " classifier")
|
SECTION("Test " + name + " classifier")
|
||||||
{
|
{
|
||||||
for (const std::string& file_name : { "glass", "iris", "ecoli", "diabetes" }) {
|
for (const std::string& file_name : { "glass", "iris", "ecoli", "diabetes" }) {
|
||||||
auto clf = models[name];
|
|
||||||
auto discretize = name.substr(name.length() - 2) != "Ld";
|
auto discretize = name.substr(name.length() - 2) != "Ld";
|
||||||
auto raw = RawDatasets(file_name, discretize);
|
auto raw = RawDatasets(file_name, discretize);
|
||||||
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||||
auto score = clf->score(raw.Xt, raw.yt);
|
auto score = clf->score(raw.Xt, raw.yt);
|
||||||
// std::cout << "Classifier: " << name << " File: " << file_name << " Score: " << score << " expected = " <<
|
// std::cout << "Classifier: " << name << " File: " << file_name << " Score: " << score << " expected = " <<
|
||||||
// scores[{file_name, name}] << std::endl;
|
// scores[{file_name, name}] << std::endl;
|
||||||
INFO("Classifier: " << name << " File: " << file_name);
|
INFO("Classifier: " << name << " File: " << file_name);
|
||||||
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
REQUIRE(score == Catch::Approx(scores[{file_name, name}]).epsilon(raw.epsilon));
|
||||||
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
|
REQUIRE(clf->getStatus() == bayesnet::NORMAL);
|
||||||
@@ -101,7 +101,6 @@ TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
|||||||
INFO("Checking version of " << name << " classifier");
|
INFO("Checking version of " << name << " classifier");
|
||||||
REQUIRE(clf->getVersion() == ACTUAL_VERSION);
|
REQUIRE(clf->getVersion() == ACTUAL_VERSION);
|
||||||
}
|
}
|
||||||
delete clf;
|
|
||||||
}
|
}
|
||||||
TEST_CASE("Models features & Graph", "[Models]")
|
TEST_CASE("Models features & Graph", "[Models]")
|
||||||
{
|
{
|
||||||
@@ -133,7 +132,7 @@ TEST_CASE("Models features & Graph", "[Models]")
|
|||||||
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||||
REQUIRE(clf.getNumberOfEdges() == 7);
|
REQUIRE(clf.getNumberOfEdges() == 7);
|
||||||
REQUIRE(clf.getNumberOfStates() == 27);
|
REQUIRE(clf.getNumberOfStates() == 26);
|
||||||
REQUIRE(clf.getClassNumStates() == 3);
|
REQUIRE(clf.getClassNumStates() == 3);
|
||||||
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ",
|
||||||
"petallength -> sepallength, ", "petalwidth -> ",
|
"petallength -> sepallength, ", "petalwidth -> ",
|
||||||
@@ -149,7 +148,6 @@ TEST_CASE("Get num features & num edges", "[Models]")
|
|||||||
REQUIRE(clf.getNumberOfNodes() == 5);
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||||
REQUIRE(clf.getNumberOfEdges() == 8);
|
REQUIRE(clf.getNumberOfEdges() == 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Model predict_proba", "[Models]")
|
TEST_CASE("Model predict_proba", "[Models]")
|
||||||
{
|
{
|
||||||
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting", "TANLd", "SPODELd", "KDBLd");
|
std::string model = GENERATE("TAN", "SPODE", "BoostAODEproba", "BoostAODEvoting", "TANLd", "SPODELd", "KDBLd");
|
||||||
@@ -180,15 +178,15 @@ TEST_CASE("Model predict_proba", "[Models]")
|
|||||||
{0.0284828, 0.770524, 0.200993},
|
{0.0284828, 0.770524, 0.200993},
|
||||||
{0.0213182, 0.857189, 0.121493},
|
{0.0213182, 0.857189, 0.121493},
|
||||||
{0.00868436, 0.949494, 0.0418215} });
|
{0.00868436, 0.949494, 0.0418215} });
|
||||||
auto res_prob_tanld = std::vector<std::vector<double>>({ {0.000544493, 0.995796, 0.00365992 },
|
auto res_prob_tanld = std::vector<std::vector<double>>({ {0.000597557, 0.9957, 0.00370254},
|
||||||
{0.000908092, 0.997268, 0.00182429 },
|
{0.000731377, 0.997914, 0.0013544},
|
||||||
{0.000908092, 0.997268, 0.00182429 },
|
{0.000731377, 0.997914, 0.0013544},
|
||||||
{0.000908092, 0.997268, 0.00182429 },
|
{0.000731377, 0.997914, 0.0013544},
|
||||||
{0.00228423, 0.994645, 0.00307078 },
|
{0.000838614, 0.998122, 0.00103923},
|
||||||
{0.00120539, 0.0666788, 0.932116 },
|
{0.00130852, 0.0659492, 0.932742},
|
||||||
{0.00361847, 0.979203, 0.017179 },
|
{0.00365946, 0.979412, 0.0169281},
|
||||||
{0.00483293, 0.985326, 0.00984064 },
|
{0.00435035, 0.986248, 0.00940212},
|
||||||
{0.000595606, 0.9977, 0.00170441 } });
|
{0.000583815, 0.997746, 0.00167066} });
|
||||||
auto res_prob_spodeld = std::vector<std::vector<double>>({ {0.000908024, 0.993742, 0.00535024 },
|
auto res_prob_spodeld = std::vector<std::vector<double>>({ {0.000908024, 0.993742, 0.00535024 },
|
||||||
{0.00187726, 0.99167, 0.00645308 },
|
{0.00187726, 0.99167, 0.00645308 },
|
||||||
{0.00187726, 0.99167, 0.00645308 },
|
{0.00187726, 0.99167, 0.00645308 },
|
||||||
@@ -216,29 +214,33 @@ TEST_CASE("Model predict_proba", "[Models]")
|
|||||||
{"TANLd", res_prob_tanld},
|
{"TANLd", res_prob_tanld},
|
||||||
{"SPODELd", res_prob_spodeld},
|
{"SPODELd", res_prob_spodeld},
|
||||||
{"KDBLd", res_prob_kdbld} };
|
{"KDBLd", res_prob_kdbld} };
|
||||||
std::map<std::string, bayesnet::BaseClassifier*> models{ {"TAN", new bayesnet::TAN()},
|
|
||||||
{"SPODE", new bayesnet::SPODE(0)},
|
std::map<std::string, std::unique_ptr<bayesnet::BaseClassifier>> models;
|
||||||
{"BoostAODEproba", new bayesnet::BoostAODE(false)},
|
models["TAN"] = std::make_unique<bayesnet::TAN>();
|
||||||
{"BoostAODEvoting", new bayesnet::BoostAODE(true)},
|
models["SPODE"] = std::make_unique<bayesnet::SPODE>(0);
|
||||||
{"TANLd", new bayesnet::TANLd()},
|
models["BoostAODEproba"] = std::make_unique<bayesnet::BoostAODE>(false);
|
||||||
{"SPODELd", new bayesnet::SPODELd(0)},
|
models["BoostAODEvoting"] = std::make_unique<bayesnet::BoostAODE>(true);
|
||||||
{"KDBLd", new bayesnet::KDBLd(2)} };
|
models["TANLd"] = std::make_unique<bayesnet::TANLd>();
|
||||||
|
models["SPODELd"] = std::make_unique<bayesnet::SPODELd>(0);
|
||||||
|
models["KDBLd"] = std::make_unique<bayesnet::KDBLd>(2);
|
||||||
|
|
||||||
int init_index = 78;
|
int init_index = 78;
|
||||||
|
|
||||||
SECTION("Test " + model + " predict_proba")
|
SECTION("Test " + model + " predict_proba")
|
||||||
{
|
{
|
||||||
|
INFO("Testing " << model << " predict_proba");
|
||||||
auto ld_model = model.substr(model.length() - 2) == "Ld";
|
auto ld_model = model.substr(model.length() - 2) == "Ld";
|
||||||
auto discretize = !ld_model;
|
auto discretize = !ld_model;
|
||||||
auto raw = RawDatasets("iris", discretize);
|
auto raw = RawDatasets("iris", discretize);
|
||||||
auto clf = models[model];
|
auto& clf = *models[model];
|
||||||
clf->fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||||
auto yt_pred_proba = clf->predict_proba(raw.Xt);
|
auto yt_pred_proba = clf.predict_proba(raw.Xt);
|
||||||
auto yt_pred = clf->predict(raw.Xt);
|
auto yt_pred = clf.predict(raw.Xt);
|
||||||
std::vector<int> y_pred;
|
std::vector<int> y_pred;
|
||||||
std::vector<std::vector<double>> y_pred_proba;
|
std::vector<std::vector<double>> y_pred_proba;
|
||||||
if (!ld_model) {
|
if (!ld_model) {
|
||||||
y_pred = clf->predict(raw.Xv);
|
y_pred = clf.predict(raw.Xv);
|
||||||
y_pred_proba = clf->predict_proba(raw.Xv);
|
y_pred_proba = clf.predict_proba(raw.Xv);
|
||||||
REQUIRE(y_pred.size() == y_pred_proba.size());
|
REQUIRE(y_pred.size() == y_pred_proba.size());
|
||||||
REQUIRE(y_pred.size() == yt_pred.size(0));
|
REQUIRE(y_pred.size() == yt_pred.size(0));
|
||||||
REQUIRE(y_pred.size() == yt_pred_proba.size(0));
|
REQUIRE(y_pred.size() == yt_pred_proba.size(0));
|
||||||
@@ -267,18 +269,20 @@ TEST_CASE("Model predict_proba", "[Models]")
|
|||||||
} else {
|
} else {
|
||||||
// Check predict_proba values for vectors and tensors
|
// Check predict_proba values for vectors and tensors
|
||||||
auto predictedClasses = yt_pred_proba.argmax(1);
|
auto predictedClasses = yt_pred_proba.argmax(1);
|
||||||
|
// std::cout << model << std::endl;
|
||||||
for (int i = 0; i < 9; i++) {
|
for (int i = 0; i < 9; i++) {
|
||||||
REQUIRE(predictedClasses[i].item<int>() == yt_pred[i].item<int>());
|
REQUIRE(predictedClasses[i].item<int>() == yt_pred[i].item<int>());
|
||||||
|
// std::cout << "{";
|
||||||
for (int j = 0; j < 3; j++) {
|
for (int j = 0; j < 3; j++) {
|
||||||
|
// std::cout << yt_pred_proba[i + init_index][j].item<double>() << ", ";
|
||||||
REQUIRE(res_prob[model][i][j] ==
|
REQUIRE(res_prob[model][i][j] ==
|
||||||
Catch::Approx(yt_pred_proba[i + init_index][j].item<double>()).epsilon(raw.epsilon));
|
Catch::Approx(yt_pred_proba[i + init_index][j].item<double>()).epsilon(raw.epsilon));
|
||||||
}
|
}
|
||||||
|
// std::cout << "\b\b}," << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
delete clf;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("AODE voting-proba", "[Models]")
|
TEST_CASE("AODE voting-proba", "[Models]")
|
||||||
{
|
{
|
||||||
auto raw = RawDatasets("glass", true);
|
auto raw = RawDatasets("glass", true);
|
||||||
@@ -324,11 +328,15 @@ TEST_CASE("KDB with hyperparameters", "[Models]")
|
|||||||
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
|
REQUIRE(score == Catch::Approx(0.827103).epsilon(raw.epsilon));
|
||||||
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
|
REQUIRE(scoret == Catch::Approx(0.761682).epsilon(raw.epsilon));
|
||||||
}
|
}
|
||||||
TEST_CASE("Incorrect type of data for SPODELd", "[Models]")
|
TEST_CASE("Incorrect type of data for Ld models", "[Models]")
|
||||||
{
|
{
|
||||||
auto raw = RawDatasets("iris", true);
|
auto raw = RawDatasets("iris", true);
|
||||||
auto clf = bayesnet::SPODELd(0);
|
auto clfs = bayesnet::SPODELd(0);
|
||||||
REQUIRE_THROWS_AS(clf.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
REQUIRE_THROWS_AS(clfs.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
||||||
|
auto clft = bayesnet::TANLd();
|
||||||
|
REQUIRE_THROWS_AS(clft.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
||||||
|
auto clfk = bayesnet::KDBLd(0);
|
||||||
|
REQUIRE_THROWS_AS(clfk.fit(raw.dataset, raw.features, raw.className, raw.states, raw.smoothing), std::runtime_error);
|
||||||
}
|
}
|
||||||
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
|
TEST_CASE("Predict, predict_proba & score without fitting", "[Models]")
|
||||||
{
|
{
|
||||||
@@ -428,3 +436,49 @@ TEST_CASE("Check KDB loop detection", "[Models]")
|
|||||||
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 0, S, weights));
|
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 0, S, weights));
|
||||||
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 1, S, weights));
|
REQUIRE_NOTHROW(clf.test_add_m_edges(features, 1, S, weights));
|
||||||
}
|
}
|
||||||
|
TEST_CASE("Local discretization hyperparameters", "[Models]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("iris", false);
|
||||||
|
auto clfs = bayesnet::SPODELd(0);
|
||||||
|
clfs.setHyperparameters({
|
||||||
|
{"max_iterations", 7},
|
||||||
|
{"verbose_convergence", true},
|
||||||
|
});
|
||||||
|
REQUIRE_NOTHROW(clfs.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
|
||||||
|
REQUIRE(clfs.getStatus() == bayesnet::NORMAL);
|
||||||
|
auto clfk = bayesnet::KDBLd(0);
|
||||||
|
clfk.setHyperparameters({
|
||||||
|
{"k", 3},
|
||||||
|
{"theta", 1e-4},
|
||||||
|
});
|
||||||
|
REQUIRE_NOTHROW(clfk.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
|
||||||
|
REQUIRE(clfk.getStatus() == bayesnet::NORMAL);
|
||||||
|
auto clfa = bayesnet::AODELd();
|
||||||
|
clfa.setHyperparameters({
|
||||||
|
{"ld_proposed_cuts", 9},
|
||||||
|
{"ld_algorithm", "BINQ"},
|
||||||
|
});
|
||||||
|
REQUIRE_NOTHROW(clfa.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
|
||||||
|
REQUIRE(clfa.getStatus() == bayesnet::NORMAL);
|
||||||
|
auto clft = bayesnet::TANLd();
|
||||||
|
clft.setHyperparameters({
|
||||||
|
{"ld_proposed_cuts", 7},
|
||||||
|
{"mdlp_max_depth", 5},
|
||||||
|
{"mdlp_min_length", 3},
|
||||||
|
{"ld_algorithm", "MDLP"},
|
||||||
|
});
|
||||||
|
REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
|
||||||
|
REQUIRE(clft.getStatus() == bayesnet::NORMAL);
|
||||||
|
clft.setHyperparameters({
|
||||||
|
{"ld_proposed_cuts", 9},
|
||||||
|
{"ld_algorithm", "BINQ"},
|
||||||
|
});
|
||||||
|
REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
|
||||||
|
REQUIRE(clft.getStatus() == bayesnet::NORMAL);
|
||||||
|
clft.setHyperparameters({
|
||||||
|
{"ld_proposed_cuts", 5},
|
||||||
|
{"ld_algorithm", "BINU"},
|
||||||
|
});
|
||||||
|
REQUIRE_NOTHROW(clft.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing));
|
||||||
|
REQUIRE(clft.getStatus() == bayesnet::NORMAL);
|
||||||
|
}
|
||||||
|
@@ -345,12 +345,12 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
|||||||
auto net1 = bayesnet::Network();
|
auto net1 = bayesnet::Network();
|
||||||
buildModel(net1, raw.features, raw.className);
|
buildModel(net1, raw.features, raw.className);
|
||||||
net1.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
net1.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||||
|
|
||||||
// Create empty network and assign
|
// Create empty network and assign
|
||||||
auto net2 = bayesnet::Network();
|
auto net2 = bayesnet::Network();
|
||||||
net2.addNode("TempNode"); // Add something to make sure it gets cleared
|
net2.addNode("TempNode"); // Add something to make sure it gets cleared
|
||||||
net2 = net1;
|
net2 = net1;
|
||||||
|
|
||||||
// Verify they are equal
|
// Verify they are equal
|
||||||
REQUIRE(net1.getFeatures() == net2.getFeatures());
|
REQUIRE(net1.getFeatures() == net2.getFeatures());
|
||||||
REQUIRE(net1.getEdges() == net2.getEdges());
|
REQUIRE(net1.getEdges() == net2.getEdges());
|
||||||
@@ -361,10 +361,10 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
|||||||
REQUIRE(net1.getSamples().size(0) == net2.getSamples().size(0));
|
REQUIRE(net1.getSamples().size(0) == net2.getSamples().size(0));
|
||||||
REQUIRE(net1.getSamples().size(1) == net2.getSamples().size(1));
|
REQUIRE(net1.getSamples().size(1) == net2.getSamples().size(1));
|
||||||
REQUIRE(net1.getNodes().size() == net2.getNodes().size());
|
REQUIRE(net1.getNodes().size() == net2.getNodes().size());
|
||||||
|
|
||||||
// Verify topology equality
|
// Verify topology equality
|
||||||
REQUIRE(net1 == net2);
|
REQUIRE(net1 == net2);
|
||||||
|
|
||||||
// Verify they are separate objects by modifying one
|
// Verify they are separate objects by modifying one
|
||||||
net2.initialize();
|
net2.initialize();
|
||||||
net2.addNode("OnlyInNet2");
|
net2.addNode("OnlyInNet2");
|
||||||
@@ -376,46 +376,47 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
|||||||
INFO("Test self assignment");
|
INFO("Test self assignment");
|
||||||
buildModel(net, raw.features, raw.className);
|
buildModel(net, raw.features, raw.className);
|
||||||
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||||
|
|
||||||
int original_edges = net.getNumEdges();
|
int original_edges = net.getNumEdges();
|
||||||
int original_nodes = net.getNodes().size();
|
int original_nodes = net.getNodes().size();
|
||||||
|
|
||||||
// Self assignment should not corrupt the network
|
// Self assignment should not corrupt the network
|
||||||
net = net;
|
net = net;
|
||||||
|
auto all_features = raw.features;
|
||||||
|
all_features.push_back(raw.className);
|
||||||
REQUIRE(net.getNumEdges() == original_edges);
|
REQUIRE(net.getNumEdges() == original_edges);
|
||||||
REQUIRE(net.getNodes().size() == original_nodes);
|
REQUIRE(net.getNodes().size() == original_nodes);
|
||||||
REQUIRE(net.getFeatures() == raw.features);
|
REQUIRE(net.getFeatures() == all_features);
|
||||||
REQUIRE(net.getClassName() == raw.className);
|
REQUIRE(net.getClassName() == raw.className);
|
||||||
}
|
}
|
||||||
SECTION("Test operator== topology comparison")
|
SECTION("Test operator== topology comparison")
|
||||||
{
|
{
|
||||||
INFO("Test operator== topology comparison");
|
INFO("Test operator== topology comparison");
|
||||||
|
|
||||||
// Test 1: Two identical networks
|
// Test 1: Two identical networks
|
||||||
auto net1 = bayesnet::Network();
|
auto net1 = bayesnet::Network();
|
||||||
auto net2 = bayesnet::Network();
|
auto net2 = bayesnet::Network();
|
||||||
|
|
||||||
net1.addNode("A");
|
net1.addNode("A");
|
||||||
net1.addNode("B");
|
net1.addNode("B");
|
||||||
net1.addNode("C");
|
net1.addNode("C");
|
||||||
net1.addEdge("A", "B");
|
net1.addEdge("A", "B");
|
||||||
net1.addEdge("B", "C");
|
net1.addEdge("B", "C");
|
||||||
|
|
||||||
net2.addNode("A");
|
net2.addNode("A");
|
||||||
net2.addNode("B");
|
net2.addNode("B");
|
||||||
net2.addNode("C");
|
net2.addNode("C");
|
||||||
net2.addEdge("A", "B");
|
net2.addEdge("A", "B");
|
||||||
net2.addEdge("B", "C");
|
net2.addEdge("B", "C");
|
||||||
|
|
||||||
REQUIRE(net1 == net2);
|
REQUIRE(net1 == net2);
|
||||||
|
|
||||||
// Test 2: Different nodes
|
// Test 2: Different nodes
|
||||||
auto net3 = bayesnet::Network();
|
auto net3 = bayesnet::Network();
|
||||||
net3.addNode("A");
|
net3.addNode("A");
|
||||||
net3.addNode("D"); // Different node
|
net3.addNode("D"); // Different node
|
||||||
REQUIRE_FALSE(net1 == net3);
|
REQUIRE_FALSE(net1 == net3);
|
||||||
|
|
||||||
// Test 3: Same nodes, different edges
|
// Test 3: Same nodes, different edges
|
||||||
auto net4 = bayesnet::Network();
|
auto net4 = bayesnet::Network();
|
||||||
net4.addNode("A");
|
net4.addNode("A");
|
||||||
@@ -424,12 +425,12 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
|||||||
net4.addEdge("A", "C"); // Different topology
|
net4.addEdge("A", "C"); // Different topology
|
||||||
net4.addEdge("B", "C");
|
net4.addEdge("B", "C");
|
||||||
REQUIRE_FALSE(net1 == net4);
|
REQUIRE_FALSE(net1 == net4);
|
||||||
|
|
||||||
// Test 4: Empty networks
|
// Test 4: Empty networks
|
||||||
auto net5 = bayesnet::Network();
|
auto net5 = bayesnet::Network();
|
||||||
auto net6 = bayesnet::Network();
|
auto net6 = bayesnet::Network();
|
||||||
REQUIRE(net5 == net6);
|
REQUIRE(net5 == net6);
|
||||||
|
|
||||||
// Test 5: Same topology, different edge order
|
// Test 5: Same topology, different edge order
|
||||||
auto net7 = bayesnet::Network();
|
auto net7 = bayesnet::Network();
|
||||||
net7.addNode("A");
|
net7.addNode("A");
|
||||||
@@ -442,35 +443,36 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
|||||||
SECTION("Test RAII compliance with smart pointers")
|
SECTION("Test RAII compliance with smart pointers")
|
||||||
{
|
{
|
||||||
INFO("Test RAII compliance with smart pointers");
|
INFO("Test RAII compliance with smart pointers");
|
||||||
|
|
||||||
std::unique_ptr<bayesnet::Network> net1 = std::make_unique<bayesnet::Network>();
|
std::unique_ptr<bayesnet::Network> net1 = std::make_unique<bayesnet::Network>();
|
||||||
buildModel(*net1, raw.features, raw.className);
|
buildModel(*net1, raw.features, raw.className);
|
||||||
net1->fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
net1->fit(raw.Xv, raw.yv, raw.weightsv, raw.features, raw.className, raw.states, raw.smoothing);
|
||||||
|
|
||||||
// Test that copy constructor works with smart pointers
|
// Test that copy constructor works with smart pointers
|
||||||
std::unique_ptr<bayesnet::Network> net2 = std::make_unique<bayesnet::Network>(*net1);
|
std::unique_ptr<bayesnet::Network> net2 = std::make_unique<bayesnet::Network>(*net1);
|
||||||
|
|
||||||
REQUIRE(*net1 == *net2);
|
REQUIRE(*net1 == *net2);
|
||||||
REQUIRE(net1->getNumEdges() == net2->getNumEdges());
|
REQUIRE(net1->getNumEdges() == net2->getNumEdges());
|
||||||
REQUIRE(net1->getNodes().size() == net2->getNodes().size());
|
REQUIRE(net1->getNodes().size() == net2->getNodes().size());
|
||||||
|
|
||||||
// Destroy original
|
// Destroy original
|
||||||
net1.reset();
|
net1.reset();
|
||||||
|
|
||||||
|
// Test predictions still work
|
||||||
|
std::vector<std::vector<int>> test = { {1}, {2}, {0}, {1} };
|
||||||
|
REQUIRE_NOTHROW(net2->predict(test));
|
||||||
|
|
||||||
// net2 should still be valid and functional
|
// net2 should still be valid and functional
|
||||||
|
net2->initialize();
|
||||||
REQUIRE_NOTHROW(net2->addNode("NewNode"));
|
REQUIRE_NOTHROW(net2->addNode("NewNode"));
|
||||||
REQUIRE(net2->getNodes().count("NewNode") == 1);
|
REQUIRE(net2->getNodes().count("NewNode") == 1);
|
||||||
|
|
||||||
// Test predictions still work
|
|
||||||
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1} };
|
|
||||||
REQUIRE_NOTHROW(net2->predict(test));
|
|
||||||
}
|
}
|
||||||
SECTION("Test complex topology copy")
|
SECTION("Test complex topology copy")
|
||||||
{
|
{
|
||||||
INFO("Test complex topology copy");
|
INFO("Test complex topology copy");
|
||||||
|
|
||||||
auto original = bayesnet::Network();
|
auto original = bayesnet::Network();
|
||||||
|
|
||||||
// Create a more complex network
|
// Create a more complex network
|
||||||
original.addNode("Root");
|
original.addNode("Root");
|
||||||
original.addNode("Child1");
|
original.addNode("Child1");
|
||||||
@@ -478,45 +480,45 @@ TEST_CASE("Test Bayesian Network", "[Network]")
|
|||||||
original.addNode("Grandchild1");
|
original.addNode("Grandchild1");
|
||||||
original.addNode("Grandchild2");
|
original.addNode("Grandchild2");
|
||||||
original.addNode("Grandchild3");
|
original.addNode("Grandchild3");
|
||||||
|
|
||||||
original.addEdge("Root", "Child1");
|
original.addEdge("Root", "Child1");
|
||||||
original.addEdge("Root", "Child2");
|
original.addEdge("Root", "Child2");
|
||||||
original.addEdge("Child1", "Grandchild1");
|
original.addEdge("Child1", "Grandchild1");
|
||||||
original.addEdge("Child1", "Grandchild2");
|
original.addEdge("Child1", "Grandchild2");
|
||||||
original.addEdge("Child2", "Grandchild3");
|
original.addEdge("Child2", "Grandchild3");
|
||||||
|
|
||||||
// Copy it
|
// Copy it
|
||||||
auto copy = original;
|
auto copy = original;
|
||||||
|
|
||||||
// Verify topology is identical
|
// Verify topology is identical
|
||||||
REQUIRE(original == copy);
|
REQUIRE(original == copy);
|
||||||
REQUIRE(original.getNodes().size() == copy.getNodes().size());
|
REQUIRE(original.getNodes().size() == copy.getNodes().size());
|
||||||
REQUIRE(original.getNumEdges() == copy.getNumEdges());
|
REQUIRE(original.getNumEdges() == copy.getNumEdges());
|
||||||
|
|
||||||
// Verify edges are properly reconstructed
|
// Verify edges are properly reconstructed
|
||||||
auto originalEdges = original.getEdges();
|
auto originalEdges = original.getEdges();
|
||||||
auto copyEdges = copy.getEdges();
|
auto copyEdges = copy.getEdges();
|
||||||
REQUIRE(originalEdges.size() == copyEdges.size());
|
REQUIRE(originalEdges.size() == copyEdges.size());
|
||||||
|
|
||||||
// Verify node relationships are properly copied
|
// Verify node relationships are properly copied
|
||||||
for (const auto& nodePair : original.getNodes()) {
|
for (const auto& nodePair : original.getNodes()) {
|
||||||
const std::string& nodeName = nodePair.first;
|
const std::string& nodeName = nodePair.first;
|
||||||
auto* originalNode = nodePair.second.get();
|
auto* originalNode = nodePair.second.get();
|
||||||
auto* copyNode = copy.getNodes().at(nodeName).get();
|
auto* copyNode = copy.getNodes().at(nodeName).get();
|
||||||
|
|
||||||
REQUIRE(originalNode->getParents().size() == copyNode->getParents().size());
|
REQUIRE(originalNode->getParents().size() == copyNode->getParents().size());
|
||||||
REQUIRE(originalNode->getChildren().size() == copyNode->getChildren().size());
|
REQUIRE(originalNode->getChildren().size() == copyNode->getChildren().size());
|
||||||
|
|
||||||
// Verify parent names match
|
// Verify parent names match
|
||||||
for (size_t i = 0; i < originalNode->getParents().size(); ++i) {
|
for (size_t i = 0; i < originalNode->getParents().size(); ++i) {
|
||||||
REQUIRE(originalNode->getParents()[i]->getName() ==
|
REQUIRE(originalNode->getParents()[i]->getName() ==
|
||||||
copyNode->getParents()[i]->getName());
|
copyNode->getParents()[i]->getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify child names match
|
// Verify child names match
|
||||||
for (size_t i = 0; i < originalNode->getChildren().size(); ++i) {
|
for (size_t i = 0; i < originalNode->getChildren().size(); ++i) {
|
||||||
REQUIRE(originalNode->getChildren()[i]->getName() ==
|
REQUIRE(originalNode->getChildren()[i]->getName() ==
|
||||||
copyNode->getChildren()[i]->getName());
|
copyNode->getChildren()[i]->getName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -158,4 +158,47 @@ TEST_CASE("TEST MinFill method", "[Node]")
|
|||||||
REQUIRE(node_2.minFill() == 6);
|
REQUIRE(node_2.minFill() == 6);
|
||||||
REQUIRE(node_3.minFill() == 3);
|
REQUIRE(node_3.minFill() == 3);
|
||||||
REQUIRE(node_4.minFill() == 1);
|
REQUIRE(node_4.minFill() == 1);
|
||||||
|
}
|
||||||
|
TEST_CASE("Test operator =", "[Node]")
|
||||||
|
{
|
||||||
|
// Generate a test to test the operator = of the Node class
|
||||||
|
// Create a node with 3 parents and 2 children
|
||||||
|
auto node = bayesnet::Node("N1");
|
||||||
|
auto parent_1 = bayesnet::Node("P1");
|
||||||
|
parent_1.setNumStates(3);
|
||||||
|
auto child_1 = bayesnet::Node("H1");
|
||||||
|
child_1.setNumStates(2);
|
||||||
|
node.addParent(&parent_1);
|
||||||
|
node.addChild(&child_1);
|
||||||
|
// Create a cpt in the node using computeCPT
|
||||||
|
auto dataset = torch::tensor({ {1, 0, 0, 1}, {0, 1, 2, 1}, {0, 1, 1, 0} });
|
||||||
|
auto states = std::vector<int>({ 2, 3, 3 });
|
||||||
|
auto features = std::vector<std::string>{ "N1", "P1", "H1" };
|
||||||
|
auto className = std::string("Class");
|
||||||
|
auto weights = torch::tensor({ 1.0, 1.0, 1.0, 1.0 }, torch::kDouble);
|
||||||
|
node.setNumStates(2);
|
||||||
|
node.computeCPT(dataset, features, 0.0, weights);
|
||||||
|
// Get the cpt of the node
|
||||||
|
auto cpt = node.getCPT();
|
||||||
|
// Check that the cpt is not empty
|
||||||
|
REQUIRE(cpt.numel() > 0);
|
||||||
|
// Check that the cpt has the correct dimensions
|
||||||
|
auto dimensions = cpt.sizes();
|
||||||
|
REQUIRE(dimensions.size() == 2);
|
||||||
|
REQUIRE(dimensions[0] == 2); // Number of states of the node
|
||||||
|
REQUIRE(dimensions[1] == 3); // Number of states of the first parent
|
||||||
|
// Create a copy of the node
|
||||||
|
auto node_copy = node;
|
||||||
|
// Check that the copy has not any parents or children
|
||||||
|
auto parents = node_copy.getParents();
|
||||||
|
auto children = node_copy.getChildren();
|
||||||
|
REQUIRE(parents.size() == 0);
|
||||||
|
REQUIRE(children.size() == 0);
|
||||||
|
// Check that the copy has the same name
|
||||||
|
REQUIRE(node_copy.getName() == "N1");
|
||||||
|
// Check that the copy has the same cpt
|
||||||
|
auto cpt_copy = node_copy.getCPT();
|
||||||
|
REQUIRE(cpt_copy.equal(cpt));
|
||||||
|
// Check that the copy has the same number of states
|
||||||
|
REQUIRE(node_copy.getNumStates() == node.getNumStates());
|
||||||
}
|
}
|
Reference in New Issue
Block a user