Tests XSpode & XBAODE
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -44,4 +44,5 @@ docs/manual
|
|||||||
docs/man3
|
docs/man3
|
||||||
docs/man
|
docs/man
|
||||||
docs/Doxyfile
|
docs/Doxyfile
|
||||||
|
.cache
|
||||||
|
|
||||||
|
@@ -7,7 +7,7 @@
|
|||||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||||
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
[](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||||

|

|
||||||
[](html/index.html)
|
[](html/index.html)
|
||||||
[](https://doi.org/10.5281/zenodo.14210344)
|
[](https://doi.org/10.5281/zenodo.14210344)
|
||||||
|
|
||||||
Bayesian Network Classifiers library
|
Bayesian Network Classifiers library
|
||||||
|
@@ -3,14 +3,14 @@
|
|||||||
// SPDX-FileType: SOURCE
|
// SPDX-FileType: SOURCE
|
||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
// ***************************************************************
|
// ***************************************************************
|
||||||
#include "XSPODE.h"
|
|
||||||
#include "bayesnet/utils/TensorUtils.h"
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
#include "XSPODE.h"
|
||||||
|
#include "bayesnet/utils/TensorUtils.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
|
||||||
@@ -35,7 +35,7 @@ namespace bayesnet {
|
|||||||
Classifier::setHyperparameters(hyperparameters);
|
Classifier::setHyperparameters(hyperparameters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void XSpode::fit(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
|
void XSpode::fitx(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
|
||||||
{
|
{
|
||||||
m = X.size(1);
|
m = X.size(1);
|
||||||
n = X.size(0);
|
n = X.size(0);
|
||||||
@@ -390,9 +390,8 @@ namespace bayesnet {
|
|||||||
}
|
}
|
||||||
int XSpode::getNumberOfEdges() const
|
int XSpode::getNumberOfEdges() const
|
||||||
{
|
{
|
||||||
return nFeatures_ * (2 * nFeatures_ - 1);
|
return 2 * nFeatures_ + 1;
|
||||||
}
|
}
|
||||||
std::vector<int>& XSpode::getStates() { return states_; }
|
|
||||||
|
|
||||||
// ------------------------------------------------------
|
// ------------------------------------------------------
|
||||||
// Predict overrides (classifier interface)
|
// Predict overrides (classifier interface)
|
||||||
|
@@ -29,7 +29,7 @@ namespace bayesnet {
|
|||||||
int getClassNumStates() const override;
|
int getClassNumStates() const override;
|
||||||
std::vector<int>& getStates();
|
std::vector<int>& getStates();
|
||||||
std::vector<std::string> graph(const std::string& title) const override { return std::vector<std::string>({ title }); }
|
std::vector<std::string> graph(const std::string& title) const override { return std::vector<std::string>({ title }); }
|
||||||
void fit(torch::Tensor& X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing);
|
void fitx(torch::Tensor& X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing);
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
void setHyperparameters(const nlohmann::json& hyperparameters_) override;
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@@ -85,6 +85,7 @@ namespace bayesnet {
|
|||||||
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
|
torch::Tensor y_pred = torch::zeros({ X.size(1), n_states }, torch::kFloat32);
|
||||||
for (auto i = 0; i < n_models; ++i) {
|
for (auto i = 0; i < n_models; ++i) {
|
||||||
auto ypredict = models[i]->predict_proba(X);
|
auto ypredict = models[i]->predict_proba(X);
|
||||||
|
/*std::cout << "model " << i << " prediction: " << ypredict << " significance " << significanceModels[i] << std::endl;*/
|
||||||
y_pred += ypredict * significanceModels[i];
|
y_pred += ypredict * significanceModels[i];
|
||||||
}
|
}
|
||||||
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
|
auto sum = std::reduce(significanceModels.begin(), significanceModels.end());
|
||||||
|
@@ -1,267 +0,0 @@
|
|||||||
// ***************************************************************
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
|
||||||
// SPDX-FileType: SOURCE
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
// ***************************************************************
|
|
||||||
#include "WA2DE.h"
|
|
||||||
namespace bayesnet {
|
|
||||||
WA2DE::WA2DE(bool predict_voting)
|
|
||||||
: num_classes_(0), num_attributes_(0), total_count_(0.0), weighted_a2de_(false), smoothing_factor_(1.0)
|
|
||||||
{
|
|
||||||
validHyperparameters = { "predict_voting" };
|
|
||||||
std::cout << "WA2DE classifier created.\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
void bayesnet::WA2DE::setHyperparameters(const nlohmann::json& hyperparameters_)
|
|
||||||
{
|
|
||||||
auto hyperparameters = hyperparameters_;
|
|
||||||
if (hyperparameters.contains("predict_voting")) {
|
|
||||||
predict_voting = hyperparameters["predict_voting"];
|
|
||||||
hyperparameters.erase("predict_voting");
|
|
||||||
}
|
|
||||||
Classifier::setHyperparameters(hyperparameters);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void WA2DE::buildModel(const torch::Tensor& weights)
|
|
||||||
{
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
class_counts_[c] += 1e-4; // Laplace smoothing
|
|
||||||
}
|
|
||||||
for (int a = 0; a < num_attributes_; ++a) {
|
|
||||||
for (int v = 0; v < attribute_cardinalities_[a]; ++v) {
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
freq_attr_class_[a][v][c] =
|
|
||||||
(freq_attr_class_[a][v][c] + 1.0) / (class_counts_[c] + attribute_cardinalities_[a]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int sp = 0; sp < num_attributes_; ++sp) {
|
|
||||||
for (int spv = 0; spv < attribute_cardinalities_[sp]; ++spv) {
|
|
||||||
for (int ch = 0; ch < num_attributes_; ++ch) {
|
|
||||||
if (sp != ch) {
|
|
||||||
for (int chv = 0; chv < attribute_cardinalities_[ch]; ++chv) {
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
freq_pair_class_[sp][spv][ch][chv][c] =
|
|
||||||
(freq_pair_class_[sp][spv][ch][chv][c] + 1.0) /
|
|
||||||
(class_counts_[c] + attribute_cardinalities_[sp] * attribute_cardinalities_[ch]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << "Model probabilities computed.\n";
|
|
||||||
}
|
|
||||||
void WA2DE::trainModel(const torch::Tensor& weights, const Smoothing_t smoothing)
|
|
||||||
{
|
|
||||||
auto data = dataset.clone();
|
|
||||||
auto labels = data[-1];
|
|
||||||
// Remove class row from data
|
|
||||||
data = data.index({ at::indexing::Slice(0, -1) });
|
|
||||||
std::cout << "Training A2DE model...\n";
|
|
||||||
std::cout << "Data: " << data.sizes() << std::endl;
|
|
||||||
std::cout << "Labels: " << labels.sizes() << std::endl;
|
|
||||||
std::cout << std::string(80, '-') << std::endl;
|
|
||||||
if (data.dim() != 2 || labels.dim() != 1) {
|
|
||||||
throw std::invalid_argument("Invalid input dimensions.");
|
|
||||||
}
|
|
||||||
num_attributes_ = data.size(0);
|
|
||||||
num_classes_ = labels.max().item<int>() + 1;
|
|
||||||
total_count_ = data.size(1);
|
|
||||||
std::cout << "Number of attributes: " << num_attributes_ << std::endl;
|
|
||||||
std::cout << "Number of classes: " << num_classes_ << std::endl;
|
|
||||||
std::cout << "Total count: " << total_count_ << std::endl;
|
|
||||||
|
|
||||||
// Compute cardinalities
|
|
||||||
attribute_cardinalities_.clear();
|
|
||||||
for (int i = 0; i < num_attributes_; ++i) {
|
|
||||||
attribute_cardinalities_.push_back(data[i].max().item<int>() + 1);
|
|
||||||
}
|
|
||||||
std::cout << "Attribute cardinalities: ";
|
|
||||||
for (int i = 0; i < num_attributes_; ++i) {
|
|
||||||
std::cout << attribute_cardinalities_[i] << " ";
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
// output the map of states
|
|
||||||
std::cout << "States: ";
|
|
||||||
for (int i = 0; i < states.size() - 1; i++) {
|
|
||||||
std::cout << features[i] << " " << states[features[i]].size() << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resize storage
|
|
||||||
class_counts_.resize(num_classes_, 0.0);
|
|
||||||
freq_attr_class_.resize(num_attributes_);
|
|
||||||
freq_pair_class_.resize(num_attributes_);
|
|
||||||
|
|
||||||
for (int i = 0; i < num_attributes_; ++i) {
|
|
||||||
freq_attr_class_[i].resize(attribute_cardinalities_[i], std::vector<double>(num_classes_, 0.0));
|
|
||||||
freq_pair_class_[i].resize(attribute_cardinalities_[i]); // Ensure first level exists
|
|
||||||
for (int j = 0; j < attribute_cardinalities_[i]; ++j) {
|
|
||||||
freq_pair_class_[i][j].resize(num_attributes_); // Ensure second level exists
|
|
||||||
for (int k = 0; k < num_attributes_; ++k) {
|
|
||||||
if (i != k) {
|
|
||||||
freq_pair_class_[i][j][k].resize(attribute_cardinalities_[k]); // Ensure third level exists
|
|
||||||
for (int l = 0; l < attribute_cardinalities_[k]; ++l) {
|
|
||||||
freq_pair_class_[i][j][k][l].resize(num_classes_, 0.0); // Finally, initialize with 0.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Count frequencies
|
|
||||||
auto data_cpu = data.to(torch::kCPU);
|
|
||||||
auto labels_cpu = labels.to(torch::kCPU);
|
|
||||||
int32_t* data_ptr = data_cpu.data_ptr<int32_t>();
|
|
||||||
int32_t* labels_ptr = labels_cpu.data_ptr<int32_t>();
|
|
||||||
|
|
||||||
for (int i = 0; i < total_count_; ++i) {
|
|
||||||
int class_label = labels_ptr[i];
|
|
||||||
class_counts_[class_label] += 1.0;
|
|
||||||
|
|
||||||
std::vector<int> attr_values(num_attributes_);
|
|
||||||
for (int a = 0; a < num_attributes_; ++a) {
|
|
||||||
attr_values[a] = toIntValue(a, data_ptr[i * num_attributes_ + a]);
|
|
||||||
freq_attr_class_[a][attr_values[a]][class_label] += 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pairwise counts
|
|
||||||
for (int sp = 0; sp < num_attributes_; ++sp) {
|
|
||||||
for (int ch = 0; ch < num_attributes_; ++ch) {
|
|
||||||
if (sp != ch) {
|
|
||||||
freq_pair_class_[sp][attr_values[sp]][ch][attr_values[ch]][class_label] += 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << "Verifying Frequency Counts:\n";
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
std::cout << "Class " << c << " Count: " << class_counts_[c] << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int a = 0; a < num_attributes_; ++a) {
|
|
||||||
for (int v = 0; v < attribute_cardinalities_[a]; ++v) {
|
|
||||||
std::cout << "P(A[" << a << "]=" << v << "|C): ";
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
std::cout << freq_attr_class_[a][v][c] << " ";
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
torch::Tensor WA2DE::computeProbabilities(const torch::Tensor& data) const
|
|
||||||
{
|
|
||||||
int M = data.size(1);
|
|
||||||
auto output = torch::zeros({ M, num_classes_ }, torch::kF64);
|
|
||||||
|
|
||||||
auto data_cpu = data.to(torch::kCPU);
|
|
||||||
int32_t* data_ptr = data_cpu.data_ptr<int32_t>();
|
|
||||||
|
|
||||||
for (int i = 0; i < M; ++i) {
|
|
||||||
std::vector<int> attr_values(num_attributes_);
|
|
||||||
for (int a = 0; a < num_attributes_; ++a) {
|
|
||||||
attr_values[a] = toIntValue(a, data_ptr[i * num_attributes_ + a]);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<double> log_prob(num_classes_, 0.0);
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
log_prob[c] = std::log((class_counts_[c] + smoothing_factor_) / (total_count_ + num_classes_ * smoothing_factor_));
|
|
||||||
|
|
||||||
double sum_log = 0.0;
|
|
||||||
for (int sp = 0; sp < num_attributes_; ++sp) {
|
|
||||||
double sp_log = log_prob[c];
|
|
||||||
for (int ch = 0; ch < num_attributes_; ++ch) {
|
|
||||||
if (sp == ch) continue;
|
|
||||||
double num = freq_pair_class_[sp][attr_values[sp]][ch][attr_values[ch]][c] + smoothing_factor_;
|
|
||||||
double denom = class_counts_[c] + attribute_cardinalities_[sp] * attribute_cardinalities_[ch] * smoothing_factor_;
|
|
||||||
sp_log += std::log(num / denom);
|
|
||||||
}
|
|
||||||
sum_log += std::exp(sp_log);
|
|
||||||
}
|
|
||||||
log_prob[c] = std::log(sum_log / num_attributes_);
|
|
||||||
}
|
|
||||||
|
|
||||||
double max_log = *std::max_element(log_prob.begin(), log_prob.end());
|
|
||||||
double sum_exp = 0.0;
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
sum_exp += std::exp(log_prob[c] - max_log);
|
|
||||||
}
|
|
||||||
double log_sum_exp = max_log + std::log(sum_exp);
|
|
||||||
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
output[i][c] = std::exp(log_prob[c] - log_sum_exp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return output.to(torch::kF32);
|
|
||||||
}
|
|
||||||
int WA2DE::toIntValue(int attributeIndex, float value) const
|
|
||||||
{
|
|
||||||
int v = static_cast<int>(value);
|
|
||||||
return std::max(0, std::min(v, attribute_cardinalities_[attributeIndex] - 1));
|
|
||||||
}
|
|
||||||
torch::Tensor WA2DE::AODEConditionalProb(const torch::Tensor& data)
|
|
||||||
{
|
|
||||||
int M = data.size(1); // Number of test samples
|
|
||||||
torch::Tensor output = torch::zeros({ M, num_classes_ }, torch::kF32);
|
|
||||||
|
|
||||||
auto data_cpu = data.to(torch::kCPU);
|
|
||||||
int32_t* data_ptr = data_cpu.data_ptr<int32_t>();
|
|
||||||
|
|
||||||
for (int i = 0; i < M; ++i) {
|
|
||||||
std::vector<int> attr_values(num_attributes_);
|
|
||||||
for (int a = 0; a < num_attributes_; ++a) {
|
|
||||||
attr_values[a] = toIntValue(a, data_ptr[i * num_attributes_ + a]);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<double> log_prob(num_classes_, 0.0);
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
log_prob[c] = std::log(class_counts_[c] / total_count_);
|
|
||||||
|
|
||||||
double sum_log = 0.0;
|
|
||||||
for (int sp = 0; sp < num_attributes_; ++sp) {
|
|
||||||
double sp_log = log_prob[c];
|
|
||||||
for (int ch = 0; ch < num_attributes_; ++ch) {
|
|
||||||
if (sp == ch) continue;
|
|
||||||
double prob = freq_pair_class_[sp][attr_values[sp]][ch][attr_values[ch]][c];
|
|
||||||
sp_log += std::log(prob);
|
|
||||||
}
|
|
||||||
sum_log += std::exp(sp_log);
|
|
||||||
}
|
|
||||||
log_prob[c] = std::log(sum_log / num_attributes_);
|
|
||||||
}
|
|
||||||
|
|
||||||
double max_log = *std::max_element(log_prob.begin(), log_prob.end());
|
|
||||||
double sum_exp = 0.0;
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
sum_exp += std::exp(log_prob[c] - max_log);
|
|
||||||
}
|
|
||||||
double log_sum_exp = max_log + std::log(sum_exp);
|
|
||||||
|
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
|
||||||
output[i][c] = std::exp(log_prob[c] - log_sum_exp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
double WA2DE::score(const torch::Tensor& X, const torch::Tensor& y)
|
|
||||||
{
|
|
||||||
torch::Tensor preds = AODEConditionalProb(X);
|
|
||||||
torch::Tensor pred_labels = preds.argmax(1);
|
|
||||||
|
|
||||||
auto correct = pred_labels.eq(y).sum().item<int>();
|
|
||||||
auto total = y.size(0);
|
|
||||||
|
|
||||||
return static_cast<double>(correct) / total;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> WA2DE::graph(const std::string& title) const
|
|
||||||
{
|
|
||||||
return { title, "Graph visualization not implemented." };
|
|
||||||
}
|
|
||||||
}
|
|
@@ -1,52 +0,0 @@
|
|||||||
// ***************************************************************
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
|
||||||
// SPDX-FileType: SOURCE
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
// ***************************************************************
|
|
||||||
#ifndef WA2DE_H
|
|
||||||
#define WA2DE_H
|
|
||||||
#include "Ensemble.h"
|
|
||||||
#include <torch/torch.h>
|
|
||||||
#include <vector>
|
|
||||||
#include <map>
|
|
||||||
#include <nlohmann/json.hpp>
|
|
||||||
namespace bayesnet {
|
|
||||||
/**
|
|
||||||
* Geoffrey I. Webb's A2DE (Averaged 2-Dependence Estimators) classifier
|
|
||||||
* Implements the A2DE algorithm as an ensemble of SPODE models.
|
|
||||||
*/
|
|
||||||
class WA2DE : public Ensemble {
|
|
||||||
public:
|
|
||||||
explicit WA2DE(bool predict_voting = false);
|
|
||||||
virtual ~WA2DE() {};
|
|
||||||
|
|
||||||
// Override method to set hyperparameters
|
|
||||||
void setHyperparameters(const nlohmann::json& hyperparameters) override;
|
|
||||||
|
|
||||||
// Graph visualization function
|
|
||||||
std::vector<std::string> graph(const std::string& title = "A2DE") const override;
|
|
||||||
torch::Tensor computeProbabilities(const torch::Tensor& data) const;
|
|
||||||
double score(const torch::Tensor& X, const torch::Tensor& y);
|
|
||||||
protected:
|
|
||||||
// Model-building function
|
|
||||||
void buildModel(const torch::Tensor& weights) override;
|
|
||||||
void trainModel(const torch::Tensor& data, const Smoothing_t smoothing) override;
|
|
||||||
private:
|
|
||||||
int num_classes_; // Number of classes
|
|
||||||
int num_attributes_; // Number of attributes
|
|
||||||
std::vector<int> attribute_cardinalities_; // Cardinalities of attributes
|
|
||||||
|
|
||||||
// Frequency counts (similar to Java implementation)
|
|
||||||
std::vector<double> class_counts_; // Class frequency
|
|
||||||
std::vector<std::vector<std::vector<double>>> freq_attr_class_; // P(A | C)
|
|
||||||
std::vector<std::vector<std::vector<std::vector<std::vector<double>>>>> freq_pair_class_; // P(A_i, A_j | C)
|
|
||||||
|
|
||||||
double total_count_; // Total instance count
|
|
||||||
|
|
||||||
bool weighted_a2de_; // Whether to use weighted A2DE
|
|
||||||
double smoothing_factor_; // Smoothing parameter (default: Laplace)
|
|
||||||
torch::Tensor AODEConditionalProb(const torch::Tensor& data);
|
|
||||||
int toIntValue(int attributeIndex, float value) const;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
#endif
|
|
@@ -3,112 +3,117 @@
|
|||||||
// SPDX-FileType: SOURCE
|
// SPDX-FileType: SOURCE
|
||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
// ***************************************************************
|
// ***************************************************************
|
||||||
#include <random>
|
|
||||||
#include <set>
|
|
||||||
#include <functional>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <tuple>
|
|
||||||
#include "XBAODE.h"
|
#include "XBAODE.h"
|
||||||
#include "bayesnet/classifiers/XSPODE.h"
|
#include "bayesnet/classifiers/XSPODE.h"
|
||||||
#include "bayesnet/utils/TensorUtils.h"
|
#include "bayesnet/utils/TensorUtils.h"
|
||||||
|
#include <limits.h>
|
||||||
|
#include <random>
|
||||||
|
#include <tuple>
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
XBAODE::XBAODE()
|
XBAODE::XBAODE() : Boost(false) {
|
||||||
{
|
validHyperparameters = {
|
||||||
validHyperparameters = { "alpha_block", "order", "convergence", "convergence_best", "bisection", "threshold", "maxTolerance",
|
"alpha_block", "order", "convergence",
|
||||||
"predict_voting", "select_features" };
|
"convergence_best", "bisection", "threshold",
|
||||||
}
|
"maxTolerance", "predict_voting", "select_features"};
|
||||||
void XBAODE::add_model(std::unique_ptr<Classifier> model, double significance)
|
}
|
||||||
{
|
void XBAODE::add_model(std::unique_ptr<Classifier> model, double significance) {
|
||||||
models.push_back(std::move(model));
|
models.push_back(std::move(model));
|
||||||
n_models++;
|
n_models++;
|
||||||
significanceModels.push_back(significance);
|
significanceModels.push_back(significance);
|
||||||
}
|
}
|
||||||
void XBAODE::remove_last_model()
|
void XBAODE::remove_last_model() {
|
||||||
{
|
|
||||||
models.pop_back();
|
models.pop_back();
|
||||||
significanceModels.pop_back();
|
significanceModels.pop_back();
|
||||||
n_models--;
|
n_models--;
|
||||||
}
|
}
|
||||||
std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing)
|
std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing) {
|
||||||
{
|
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
|
||||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||||
for (const int& feature : featuresSelected) {
|
for (const int &feature : featuresSelected) {
|
||||||
std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
|
std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
|
||||||
// model->fit(dataset, features, className, states, weights_, smoothing);
|
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||||
dynamic_cast<XSpode*>(model.get())->fit(X_train, y_train, weights_, smoothing);
|
|
||||||
add_model(std::move(model), 1.0);
|
add_model(std::move(model), 1.0);
|
||||||
}
|
}
|
||||||
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
notes.push_back("Used features in initialization: " +
|
||||||
|
std::to_string(featuresSelected.size()) + " of " +
|
||||||
|
std::to_string(features.size()) + " with " +
|
||||||
|
select_features_algorithm);
|
||||||
return featuresSelected;
|
return featuresSelected;
|
||||||
}
|
}
|
||||||
void XBAODE::trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
|
void XBAODE::trainModel(const torch::Tensor &weights,
|
||||||
{
|
const bayesnet::Smoothing_t smoothing) {
|
||||||
X_train_ = TensorUtils::to_matrix(X_train);
|
X_train_ = TensorUtils::to_matrix(X_train);
|
||||||
y_train_ = TensorUtils::to_vector<int>(y_train);
|
y_train_ = TensorUtils::to_vector<int>(y_train);
|
||||||
X_test_ = TensorUtils::to_matrix(X_test);
|
X_test_ = TensorUtils::to_matrix(X_test);
|
||||||
y_test_ = TensorUtils::to_vector<int>(y_test);
|
y_test_ = TensorUtils::to_vector<int>(y_test);
|
||||||
significanceModels.resize(n, 0.0); // n initialized in Classifier.cc
|
|
||||||
fitted = true;
|
fitted = true;
|
||||||
double alpha_t;
|
double alpha_t;
|
||||||
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
torch::Tensor weights_ = torch::full({m}, 1.0 / m, torch::kFloat64);
|
||||||
bool finished = false;
|
bool finished = false;
|
||||||
std::vector<int> featuresUsed;
|
std::vector<int> featuresUsed;
|
||||||
n_models = 0;
|
n_models = 0;
|
||||||
if (selectFeatures) {
|
if (selectFeatures) {
|
||||||
featuresUsed = initializeModels(smoothing);
|
featuresUsed = initializeModels(smoothing);
|
||||||
std::cout << "features used: " << featuresUsed.size() << std::endl;
|
|
||||||
auto ypred = predict(X_train_);
|
auto ypred = predict(X_train_);
|
||||||
auto ypred_t = torch::tensor(ypred);
|
auto ypred_t = torch::tensor(ypred);
|
||||||
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
|
std::tie(weights_, alpha_t, finished) =
|
||||||
|
update_weights(y_train, ypred_t, weights_);
|
||||||
// Update significance of the models
|
// Update significance of the models
|
||||||
for (const int& feature : featuresUsed) {
|
for (const int &feature : featuresUsed) {
|
||||||
significanceModels.pop_back();
|
significanceModels.pop_back();
|
||||||
}
|
}
|
||||||
for (const int& feature : featuresUsed) {
|
for (const int &feature : featuresUsed) {
|
||||||
significanceModels.push_back(alpha_t);
|
significanceModels.push_back(alpha_t);
|
||||||
}
|
}
|
||||||
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models);
|
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t,
|
||||||
|
// n_models);
|
||||||
if (finished) {
|
if (finished) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
int numItemsPack =
|
||||||
|
0; // The counter of the models inserted in the current pack
|
||||||
// Variables to control the accuracy finish condition
|
// Variables to control the accuracy finish condition
|
||||||
double priorAccuracy = 0.0;
|
double priorAccuracy = 0.0;
|
||||||
double improvement = 1.0;
|
double improvement = 1.0;
|
||||||
double convergence_threshold = 1e-4;
|
double convergence_threshold = 1e-4;
|
||||||
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
int tolerance =
|
||||||
|
0; // number of times the accuracy is lower than the convergence_threshold
|
||||||
// Step 0: Set the finish condition
|
// Step 0: Set the finish condition
|
||||||
// epsilon sub t > 0.5 => inverse the weights_ policy
|
// epsilon sub t > 0.5 => inverse the weights_ policy
|
||||||
// validation error is not decreasing
|
// validation error is not decreasing
|
||||||
// run out of features
|
// run out of features
|
||||||
bool ascending = order_algorithm == bayesnet::Orders.ASC;
|
bool ascending = order_algorithm == bayesnet::Orders.ASC;
|
||||||
std::mt19937 g{ 173 };
|
std::mt19937 g{173};
|
||||||
while (!finished) {
|
while (!finished) {
|
||||||
// Step 1: Build ranking with mutual information
|
// Step 1: Build ranking with mutual information
|
||||||
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
auto featureSelection = metrics.SelectKBestWeighted(
|
||||||
|
weights_, ascending, n); // Get all the features sorted
|
||||||
if (order_algorithm == bayesnet::Orders.RAND) {
|
if (order_algorithm == bayesnet::Orders.RAND) {
|
||||||
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||||
}
|
}
|
||||||
// Remove used features
|
// Remove used features
|
||||||
featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(), [&](auto x)
|
featureSelection.erase(
|
||||||
{ return std::find(featuresUsed.begin(), featuresUsed.end(), x) != featuresUsed.end();}),
|
remove_if(featureSelection.begin(), featureSelection.end(), [&](auto x) {
|
||||||
featureSelection.end()
|
return std::find(featuresUsed.begin(), featuresUsed.end(), x) != featuresUsed.end();
|
||||||
);
|
}),
|
||||||
|
featureSelection.end());
|
||||||
int k = bisection ? pow(2, tolerance) : 1;
|
int k = bisection ? pow(2, tolerance) : 1;
|
||||||
int counter = 0; // The model counter of the current pack
|
int counter = 0; // The model counter of the current pack
|
||||||
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k,
|
||||||
|
// featureSelection.size());
|
||||||
while (counter++ < k && featureSelection.size() > 0) {
|
while (counter++ < k && featureSelection.size() > 0) {
|
||||||
auto feature = featureSelection[0];
|
auto feature = featureSelection[0];
|
||||||
featureSelection.erase(featureSelection.begin());
|
featureSelection.erase(featureSelection.begin());
|
||||||
std::unique_ptr<Classifier> model;
|
std::unique_ptr<Classifier> model;
|
||||||
model = std::make_unique<XSpode>(feature);
|
model = std::make_unique<XSpode>(feature);
|
||||||
dynamic_cast<XSpode*>(model.get())->fit(X_train, y_train, weights_, smoothing); // using exclusive XSpode fit method
|
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||||
|
/*dynamic_cast<XSpode*>(model.get())->fitx(X_train, y_train, weights_,
|
||||||
|
* smoothing); // using exclusive XSpode fit method*/
|
||||||
// DEBUG
|
// DEBUG
|
||||||
std::cout << "Model fitted." << std::endl;
|
/*std::cout << dynamic_cast<XSpode*>(model.get())->to_string() <<
|
||||||
std::cout << dynamic_cast<XSpode*>(model.get())->to_string() << std::endl;
|
* std::endl;*/
|
||||||
// DEBUG
|
// DEBUG
|
||||||
std::vector<int> ypred;
|
std::vector<int> ypred;
|
||||||
if (alpha_block) {
|
if (alpha_block) {
|
||||||
@@ -120,7 +125,6 @@ namespace bayesnet {
|
|||||||
// Compute the prediction
|
// Compute the prediction
|
||||||
ypred = predict(X_train_);
|
ypred = predict(X_train_);
|
||||||
// Remove the model from the ensemble
|
// Remove the model from the ensemble
|
||||||
significanceModels.pop_back();
|
|
||||||
remove_last_model();
|
remove_last_model();
|
||||||
} else {
|
} else {
|
||||||
ypred = model->predict(X_train_);
|
ypred = model->predict(X_train_);
|
||||||
@@ -132,21 +136,28 @@ namespace bayesnet {
|
|||||||
numItemsPack++;
|
numItemsPack++;
|
||||||
featuresUsed.push_back(feature);
|
featuresUsed.push_back(feature);
|
||||||
add_model(std::move(model), alpha_t);
|
add_model(std::move(model), alpha_t);
|
||||||
// VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size());
|
// VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d
|
||||||
|
// featuresUsed: %zu", finished, numItemsPack, n_models,
|
||||||
|
// featuresUsed.size());
|
||||||
} // End of the pack
|
} // End of the pack
|
||||||
if (convergence && !finished) {
|
if (convergence && !finished) {
|
||||||
auto y_val_predict = predict(X_test);
|
auto y_val_predict = predict(X_test);
|
||||||
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
double accuracy = (y_val_predict == y_test).sum().item<double>() /
|
||||||
|
(double)y_test.size(0);
|
||||||
if (priorAccuracy == 0) {
|
if (priorAccuracy == 0) {
|
||||||
priorAccuracy = accuracy;
|
priorAccuracy = accuracy;
|
||||||
} else {
|
} else {
|
||||||
improvement = accuracy - priorAccuracy;
|
improvement = accuracy - priorAccuracy;
|
||||||
}
|
}
|
||||||
if (improvement < convergence_threshold) {
|
if (improvement < convergence_threshold) {
|
||||||
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d
|
||||||
|
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
|
||||||
|
// numItemsPack, improvement, priorAccuracy, accuracy);
|
||||||
tolerance++;
|
tolerance++;
|
||||||
} else {
|
} else {
|
||||||
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d
|
||||||
|
// numItemsPack: %d improvement: %f prior: %f current: %f", tolerance,
|
||||||
|
// numItemsPack, improvement, priorAccuracy, accuracy);
|
||||||
tolerance = 0; // Reset the counter if the model performs better
|
tolerance = 0; // Reset the counter if the model performs better
|
||||||
numItemsPack = 0;
|
numItemsPack = 0;
|
||||||
}
|
}
|
||||||
@@ -158,28 +169,34 @@ namespace bayesnet {
|
|||||||
priorAccuracy = accuracy;
|
priorAccuracy = accuracy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size:
|
||||||
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
// %zu", tolerance, featuresUsed.size(), features.size());
|
||||||
|
finished = finished || tolerance > maxTolerance ||
|
||||||
|
featuresUsed.size() == features.size();
|
||||||
}
|
}
|
||||||
if (tolerance > maxTolerance) {
|
if (tolerance > maxTolerance) {
|
||||||
if (numItemsPack < n_models) {
|
if (numItemsPack < n_models) {
|
||||||
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
notes.push_back("Convergence threshold reached & " +
|
||||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
std::to_string(numItemsPack) + " models eliminated");
|
||||||
for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) {
|
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated
|
||||||
|
// of %d", numItemsPack, n_models);
|
||||||
|
for (int i = featuresUsed.size() - 1;
|
||||||
|
i >= featuresUsed.size() - numItemsPack; --i) {
|
||||||
remove_last_model();
|
remove_last_model();
|
||||||
significanceModels[featuresUsed[i]] = 0.0;
|
|
||||||
}
|
}
|
||||||
// VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features used.", n_models, featuresUsed.size());
|
// VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features
|
||||||
|
// used.", n_models, featuresUsed.size());
|
||||||
} else {
|
} else {
|
||||||
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||||
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated
|
||||||
|
// n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (featuresUsed.size() != features.size()) {
|
if (featuresUsed.size() != features.size()) {
|
||||||
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
|
notes.push_back( "Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
|
||||||
status = bayesnet::WARNING;
|
status = bayesnet::WARNING;
|
||||||
}
|
}
|
||||||
notes.push_back("Number of models: " + std::to_string(n_models));
|
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
} // namespace bayesnet
|
||||||
|
@@ -10,14 +10,14 @@ if(ENABLE_TESTING)
|
|||||||
)
|
)
|
||||||
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
file(GLOB_RECURSE BayesNet_SOURCES "${BayesNet_SOURCE_DIR}/bayesnet/*.cc")
|
||||||
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc
|
add_executable(TestBayesNet TestBayesNetwork.cc TestBayesNode.cc TestBayesClassifier.cc
|
||||||
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestXBAODE.cc TestA2DE.cc TestWA2DE.cc
|
TestBayesModels.cc TestBayesMetrics.cc TestFeatureSelection.cc TestBoostAODE.cc TestXBAODE.cc TestA2DE.cc
|
||||||
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc ${BayesNet_SOURCES})
|
TestUtils.cc TestBayesEnsemble.cc TestModulesVersions.cc TestBoostA2DE.cc TestMST.cc TestXSPODE.cc ${BayesNet_SOURCES})
|
||||||
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp PRIVATE Catch2::Catch2WithMain)
|
target_link_libraries(TestBayesNet PUBLIC "${TORCH_LIBRARIES}" fimdlp PRIVATE Catch2::Catch2WithMain)
|
||||||
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
add_test(NAME BayesNetworkTest COMMAND TestBayesNet)
|
||||||
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
|
add_test(NAME A2DE COMMAND TestBayesNet "[A2DE]")
|
||||||
add_test(NAME WA2DE COMMAND TestBayesNet "[WA2DE]")
|
|
||||||
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")
|
add_test(NAME BoostA2DE COMMAND TestBayesNet "[BoostA2DE]")
|
||||||
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
|
add_test(NAME BoostAODE COMMAND TestBayesNet "[BoostAODE]")
|
||||||
|
add_test(NAME XSPODE COMMAND TestBayesNet "[XSPODE]")
|
||||||
add_test(NAME XBAODE COMMAND TestBayesNet "[XBAODE]")
|
add_test(NAME XBAODE COMMAND TestBayesNet "[XBAODE]")
|
||||||
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
|
add_test(NAME Classifier COMMAND TestBayesNet "[Classifier]")
|
||||||
add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
|
add_test(NAME Ensemble COMMAND TestBayesNet "[Ensemble]")
|
||||||
|
@@ -1,31 +0,0 @@
|
|||||||
// ***************************************************************
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
|
||||||
// SPDX-FileType: SOURCE
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
// ***************************************************************
|
|
||||||
|
|
||||||
#include <type_traits>
|
|
||||||
#include <catch2/catch_test_macros.hpp>
|
|
||||||
#include <catch2/catch_approx.hpp>
|
|
||||||
#include <catch2/generators/catch_generators.hpp>
|
|
||||||
#include "bayesnet/ensembles/WA2DE.h"
|
|
||||||
#include "TestUtils.h"
|
|
||||||
|
|
||||||
|
|
||||||
TEST_CASE("Fit and Score", "[WA2DE]")
|
|
||||||
{
|
|
||||||
auto raw = RawDatasets("iris", true);
|
|
||||||
auto clf = bayesnet::WA2DE();
|
|
||||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
|
||||||
REQUIRE(clf.score(raw.Xt, raw.yt) == Catch::Approx(0.6333333333333333).epsilon(raw.epsilon));
|
|
||||||
}
|
|
||||||
TEST_CASE("Test graph", "[WA2DE]")
|
|
||||||
{
|
|
||||||
auto raw = RawDatasets("iris", true);
|
|
||||||
auto clf = bayesnet::WA2DE();
|
|
||||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
|
||||||
auto graph = clf.graph("BayesNet WA2DE");
|
|
||||||
REQUIRE(graph.size() == 2);
|
|
||||||
REQUIRE(graph[0] == "BayesNet WA2DE");
|
|
||||||
REQUIRE(graph[1] == "Graph visualization not implemented.");
|
|
||||||
}
|
|
@@ -4,88 +4,94 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
// ***************************************************************
|
// ***************************************************************
|
||||||
|
|
||||||
#include <type_traits>
|
|
||||||
#include <catch2/catch_test_macros.hpp>
|
|
||||||
#include <catch2/catch_approx.hpp>
|
#include <catch2/catch_approx.hpp>
|
||||||
|
#include <catch2/catch_test_macros.hpp>
|
||||||
#include <catch2/generators/catch_generators.hpp>
|
#include <catch2/generators/catch_generators.hpp>
|
||||||
#include <catch2/matchers/catch_matchers.hpp>
|
#include <catch2/matchers/catch_matchers.hpp>
|
||||||
#include "bayesnet/ensembles/XBAODE.h"
|
|
||||||
#include "TestUtils.h"
|
#include "TestUtils.h"
|
||||||
|
#include "bayesnet/ensembles/XBAODE.h"
|
||||||
|
|
||||||
|
TEST_CASE("Normal test", "[XBAODE]") {
|
||||||
TEST_CASE("Normal test", "[XBAODE]")
|
|
||||||
{
|
|
||||||
auto raw = RawDatasets("iris", true);
|
auto raw = RawDatasets("iris", true);
|
||||||
auto clf = bayesnet::XBAODE();
|
auto clf = bayesnet::XBAODE();
|
||||||
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
|
||||||
|
raw.smoothing);
|
||||||
REQUIRE(clf.getNumberOfNodes() == 20);
|
REQUIRE(clf.getNumberOfNodes() == 20);
|
||||||
REQUIRE(clf.getNumberOfEdges() == 112);
|
REQUIRE(clf.getNumberOfEdges() == 36);
|
||||||
REQUIRE(clf.getNotes().size() == 1);
|
REQUIRE(clf.getNotes().size() == 1);
|
||||||
|
REQUIRE(clf.getVersion() == "0.9.7");
|
||||||
|
REQUIRE(clf.getNotes()[0] == "Number of models: 4");
|
||||||
|
REQUIRE(clf.getNumberOfStates() == 256);
|
||||||
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.933333));
|
||||||
}
|
}
|
||||||
//TEST_CASE("Feature_select CFS", "[XBAODE]")
|
TEST_CASE("Feature_select CFS", "[XBAODE]") {
|
||||||
//{
|
auto raw = RawDatasets("glass", true);
|
||||||
// auto raw = RawDatasets("glass", true);
|
auto clf = bayesnet::XBAODE();
|
||||||
// auto clf = bayesnet::XBAODE();
|
clf.setHyperparameters({{"select_features", "CFS"}});
|
||||||
// clf.setHyperparameters({ {"select_features", "CFS"} });
|
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
|
||||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
raw.smoothing);
|
||||||
// REQUIRE(clf.getNumberOfNodes() == 97);
|
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||||
// REQUIRE(clf.getNumberOfEdges() == 153);
|
REQUIRE(clf.getNumberOfEdges() == 171);
|
||||||
// REQUIRE(clf.getNotes().size() == 2);
|
REQUIRE(clf.getNotes().size() == 2);
|
||||||
// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
|
REQUIRE(clf.getNotes()[0] ==
|
||||||
// REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
"Used features in initialization: 6 of 9 with CFS");
|
||||||
//}
|
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||||
// TEST_CASE("Feature_select IWSS", "[XBAODE]")
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219));
|
||||||
// {
|
}
|
||||||
// auto raw = RawDatasets("glass", true);
|
TEST_CASE("Feature_select IWSS", "[XBAODE]") {
|
||||||
// auto clf = bayesnet::XBAODE();
|
auto raw = RawDatasets("glass", true);
|
||||||
// clf.setHyperparameters({ {"select_features", "IWSS"}, {"threshold", 0.5 } });
|
auto clf = bayesnet::XBAODE();
|
||||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
clf.setHyperparameters({{"select_features", "IWSS"}, {"threshold", 0.5}});
|
||||||
// REQUIRE(clf.getNumberOfNodes() == 90);
|
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
|
||||||
// REQUIRE(clf.getNumberOfEdges() == 153);
|
raw.smoothing);
|
||||||
// REQUIRE(clf.getNotes().size() == 2);
|
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||||
// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with IWSS");
|
REQUIRE(clf.getNumberOfEdges() == 171);
|
||||||
// REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
REQUIRE(clf.getNotes().size() == 2);
|
||||||
// }
|
REQUIRE(clf.getNotes()[0] ==
|
||||||
// TEST_CASE("Feature_select FCBF", "[XBAODE]")
|
"Used features in initialization: 4 of 9 with IWSS");
|
||||||
// {
|
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||||
// auto raw = RawDatasets("glass", true);
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.697674394));
|
||||||
// auto clf = bayesnet::XBAODE();
|
}
|
||||||
// clf.setHyperparameters({ {"select_features", "FCBF"}, {"threshold", 1e-7 } });
|
TEST_CASE("Feature_select FCBF", "[XBAODE]") {
|
||||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
auto raw = RawDatasets("glass", true);
|
||||||
// REQUIRE(clf.getNumberOfNodes() == 90);
|
auto clf = bayesnet::XBAODE();
|
||||||
// REQUIRE(clf.getNumberOfEdges() == 153);
|
clf.setHyperparameters({{"select_features", "FCBF"}, {"threshold", 1e-7}});
|
||||||
// REQUIRE(clf.getNotes().size() == 2);
|
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
|
||||||
// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 4 of 9 with FCBF");
|
raw.smoothing);
|
||||||
// REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
REQUIRE(clf.getNumberOfNodes() == 90);
|
||||||
// }
|
REQUIRE(clf.getNumberOfEdges() == 171);
|
||||||
// TEST_CASE("Test used features in train note and score", "[XBAODE]")
|
REQUIRE(clf.getNotes().size() == 2);
|
||||||
// {
|
REQUIRE(clf.getNotes()[0] ==
|
||||||
// auto raw = RawDatasets("diabetes", true);
|
"Used features in initialization: 4 of 9 with FCBF");
|
||||||
// auto clf = bayesnet::XBAODE(true);
|
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
|
||||||
// clf.setHyperparameters({
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.720930219));
|
||||||
// {"order", "asc"},
|
}
|
||||||
// {"convergence", true},
|
TEST_CASE("Test used features in train note and score", "[XBAODE]")
|
||||||
// {"select_features","CFS"},
|
{
|
||||||
// });
|
auto raw = RawDatasets("diabetes", true);
|
||||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
auto clf = bayesnet::XBAODE();
|
||||||
// REQUIRE(clf.getNumberOfNodes() == 72);
|
clf.setHyperparameters({
|
||||||
// REQUIRE(clf.getNumberOfEdges() == 120);
|
{"order", "asc"},
|
||||||
// REQUIRE(clf.getNotes().size() == 2);
|
{"convergence", true},
|
||||||
// REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
{"select_features","CFS"},
|
||||||
// REQUIRE(clf.getNotes()[1] == "Number of models: 8");
|
});
|
||||||
// auto score = clf.score(raw.Xv, raw.yv);
|
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
|
||||||
// auto scoret = clf.score(raw.Xt, raw.yt);
|
raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 72);
|
||||||
// REQUIRE(score == Catch::Approx(0.809895813).epsilon(raw.epsilon));
|
REQUIRE(clf.getNumberOfEdges() == 136);
|
||||||
// REQUIRE(scoret == Catch::Approx(0.809895813).epsilon(raw.epsilon));
|
REQUIRE(clf.getNotes().size() == 2);
|
||||||
// }
|
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
|
||||||
|
REQUIRE(clf.getNotes()[1] == "Number of models: 8");
|
||||||
|
auto score = clf.score(raw.Xv, raw.yv); auto scoret = clf.score(raw.Xt, raw.yt);
|
||||||
|
REQUIRE(score == Catch::Approx(0.819010437f).epsilon(raw.epsilon));
|
||||||
|
REQUIRE(scoret == Catch::Approx(0.819010437f).epsilon(raw.epsilon));
|
||||||
|
}
|
||||||
// TEST_CASE("Voting vs proba", "[XBAODE]")
|
// TEST_CASE("Voting vs proba", "[XBAODE]")
|
||||||
// {
|
// {
|
||||||
// auto raw = RawDatasets("iris", true);
|
// auto raw = RawDatasets("iris", true);
|
||||||
// auto clf = bayesnet::XBAODE(false);
|
// auto clf = bayesnet::XBAODE(false);
|
||||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
|
||||||
// auto score_proba = clf.score(raw.Xv, raw.yv);
|
// raw.smoothing); auto score_proba = clf.score(raw.Xv, raw.yv); auto
|
||||||
// auto pred_proba = clf.predict_proba(raw.Xv);
|
// pred_proba = clf.predict_proba(raw.Xv); clf.setHyperparameters({
|
||||||
// clf.setHyperparameters({
|
|
||||||
// {"predict_voting",true},
|
// {"predict_voting",true},
|
||||||
// });
|
// });
|
||||||
// auto score_voting = clf.score(raw.Xv, raw.yv);
|
// auto score_voting = clf.score(raw.Xv, raw.yv);
|
||||||
@@ -93,9 +99,9 @@ TEST_CASE("Normal test", "[XBAODE]")
|
|||||||
// REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
|
// REQUIRE(score_proba == Catch::Approx(0.97333).epsilon(raw.epsilon));
|
||||||
// REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
// REQUIRE(score_voting == Catch::Approx(0.98).epsilon(raw.epsilon));
|
||||||
// REQUIRE(pred_voting[83][2] == Catch::Approx(1.0).epsilon(raw.epsilon));
|
// REQUIRE(pred_voting[83][2] == Catch::Approx(1.0).epsilon(raw.epsilon));
|
||||||
// REQUIRE(pred_proba[83][2] == Catch::Approx(0.86121525).epsilon(raw.epsilon));
|
// REQUIRE(pred_proba[83][2] ==
|
||||||
// REQUIRE(clf.dump_cpt() == "");
|
// Catch::Approx(0.86121525).epsilon(raw.epsilon)); REQUIRE(clf.dump_cpt()
|
||||||
// REQUIRE(clf.topological_order() == std::vector<std::string>());
|
// == ""); REQUIRE(clf.topological_order() == std::vector<std::string>());
|
||||||
// }
|
// }
|
||||||
// TEST_CASE("Order asc, desc & random", "[XBAODE]")
|
// TEST_CASE("Order asc, desc & random", "[XBAODE]")
|
||||||
// {
|
// {
|
||||||
@@ -111,10 +117,9 @@ TEST_CASE("Normal test", "[XBAODE]")
|
|||||||
// {"maxTolerance", 1},
|
// {"maxTolerance", 1},
|
||||||
// {"convergence", false},
|
// {"convergence", false},
|
||||||
// });
|
// });
|
||||||
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing);
|
// clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
|
||||||
// auto score = clf.score(raw.Xv, raw.yv);
|
// raw.smoothing); auto score = clf.score(raw.Xv, raw.yv); auto scoret =
|
||||||
// auto scoret = clf.score(raw.Xt, raw.yt);
|
// clf.score(raw.Xt, raw.yt); INFO("XBAODE order: " << order);
|
||||||
// INFO("XBAODE order: " << order);
|
|
||||||
// REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
// REQUIRE(score == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||||
// REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
// REQUIRE(scoret == Catch::Approx(scores[order]).epsilon(raw.epsilon));
|
||||||
// }
|
// }
|
||||||
@@ -131,10 +136,11 @@ TEST_CASE("Normal test", "[XBAODE]")
|
|||||||
// };
|
// };
|
||||||
// for (const auto& hyper : bad_hyper.items()) {
|
// for (const auto& hyper : bad_hyper.items()) {
|
||||||
// INFO("XBAODE hyper: " << hyper.value().dump());
|
// INFO("XBAODE hyper: " << hyper.value().dump());
|
||||||
// REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
// REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()),
|
||||||
|
// std::invalid_argument);
|
||||||
// }
|
// }
|
||||||
// REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0 } }), std::invalid_argument);
|
// REQUIRE_THROWS_AS(clf.setHyperparameters({ {"maxTolerance", 0 } }),
|
||||||
// auto bad_hyper_fit = nlohmann::json{
|
// std::invalid_argument); auto bad_hyper_fit = nlohmann::json{
|
||||||
// { { "select_features","IWSS" }, { "threshold", -0.01 } },
|
// { { "select_features","IWSS" }, { "threshold", -0.01 } },
|
||||||
// { { "select_features","IWSS" }, { "threshold", 0.51 } },
|
// { { "select_features","IWSS" }, { "threshold", 0.51 } },
|
||||||
// { { "select_features","FCBF" }, { "threshold", 1e-8 } },
|
// { { "select_features","FCBF" }, { "threshold", 1e-8 } },
|
||||||
@@ -143,7 +149,8 @@ TEST_CASE("Normal test", "[XBAODE]")
|
|||||||
// for (const auto& hyper : bad_hyper_fit.items()) {
|
// for (const auto& hyper : bad_hyper_fit.items()) {
|
||||||
// INFO("XBAODE hyper: " << hyper.value().dump());
|
// INFO("XBAODE hyper: " << hyper.value().dump());
|
||||||
// clf.setHyperparameters(hyper.value());
|
// clf.setHyperparameters(hyper.value());
|
||||||
// REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
// REQUIRE_THROWS_AS(clf.fit(raw.Xv, raw.yv, raw.features,
|
||||||
|
// raw.className, raw.states, raw.smoothing), std::invalid_argument);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// auto bad_hyper_fit2 = nlohmann::json{
|
// auto bad_hyper_fit2 = nlohmann::json{
|
||||||
@@ -152,7 +159,8 @@ TEST_CASE("Normal test", "[XBAODE]")
|
|||||||
// };
|
// };
|
||||||
// for (const auto& hyper : bad_hyper_fit2.items()) {
|
// for (const auto& hyper : bad_hyper_fit2.items()) {
|
||||||
// INFO("XBAODE hyper: " << hyper.value().dump());
|
// INFO("XBAODE hyper: " << hyper.value().dump());
|
||||||
// REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()), std::invalid_argument);
|
// REQUIRE_THROWS_AS(clf.setHyperparameters(hyper.value()),
|
||||||
|
// std::invalid_argument);
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
// TEST_CASE("Bisection Best", "[XBAODE]")
|
// TEST_CASE("Bisection Best", "[XBAODE]")
|
||||||
@@ -165,8 +173,8 @@ TEST_CASE("Normal test", "[XBAODE]")
|
|||||||
// {"convergence", true},
|
// {"convergence", true},
|
||||||
// {"convergence_best", false},
|
// {"convergence_best", false},
|
||||||
// });
|
// });
|
||||||
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className,
|
||||||
// REQUIRE(clf.getNumberOfNodes() == 210);
|
// raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 210);
|
||||||
// REQUIRE(clf.getNumberOfEdges() == 378);
|
// REQUIRE(clf.getNumberOfEdges() == 378);
|
||||||
// REQUIRE(clf.getNotes().size() == 1);
|
// REQUIRE(clf.getNotes().size() == 1);
|
||||||
// REQUIRE(clf.getNotes().at(0) == "Number of models: 14");
|
// REQUIRE(clf.getNotes().at(0) == "Number of models: 14");
|
||||||
@@ -186,15 +194,17 @@ TEST_CASE("Normal test", "[XBAODE]")
|
|||||||
// {"convergence_best", true},
|
// {"convergence_best", true},
|
||||||
// };
|
// };
|
||||||
// clf.setHyperparameters(hyperparameters);
|
// clf.setHyperparameters(hyperparameters);
|
||||||
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className,
|
||||||
// auto score_best = clf.score(raw.X_test, raw.y_test);
|
// raw.states, raw.smoothing); auto score_best = clf.score(raw.X_test,
|
||||||
// REQUIRE(score_best == Catch::Approx(0.980000019f).epsilon(raw.epsilon));
|
// raw.y_test); REQUIRE(score_best ==
|
||||||
|
// Catch::Approx(0.980000019f).epsilon(raw.epsilon));
|
||||||
// // Now we will set the hyperparameter to use the last accuracy
|
// // Now we will set the hyperparameter to use the last accuracy
|
||||||
// hyperparameters["convergence_best"] = false;
|
// hyperparameters["convergence_best"] = false;
|
||||||
// clf.setHyperparameters(hyperparameters);
|
// clf.setHyperparameters(hyperparameters);
|
||||||
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className,
|
||||||
// auto score_last = clf.score(raw.X_test, raw.y_test);
|
// raw.states, raw.smoothing); auto score_last = clf.score(raw.X_test,
|
||||||
// REQUIRE(score_last == Catch::Approx(0.976666689f).epsilon(raw.epsilon));
|
// raw.y_test); REQUIRE(score_last ==
|
||||||
|
// Catch::Approx(0.976666689f).epsilon(raw.epsilon));
|
||||||
// }
|
// }
|
||||||
// TEST_CASE("Block Update", "[XBAODE]")
|
// TEST_CASE("Block Update", "[XBAODE]")
|
||||||
// {
|
// {
|
||||||
@@ -206,20 +216,21 @@ TEST_CASE("Normal test", "[XBAODE]")
|
|||||||
// {"maxTolerance", 3},
|
// {"maxTolerance", 3},
|
||||||
// {"convergence", true},
|
// {"convergence", true},
|
||||||
// });
|
// });
|
||||||
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
// clf.fit(raw.X_train, raw.y_train, raw.features, raw.className,
|
||||||
// REQUIRE(clf.getNumberOfNodes() == 868);
|
// raw.states, raw.smoothing); REQUIRE(clf.getNumberOfNodes() == 868);
|
||||||
// REQUIRE(clf.getNumberOfEdges() == 1724);
|
// REQUIRE(clf.getNumberOfEdges() == 1724);
|
||||||
// REQUIRE(clf.getNotes().size() == 3);
|
// REQUIRE(clf.getNotes().size() == 3);
|
||||||
// REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models eliminated");
|
// REQUIRE(clf.getNotes()[0] == "Convergence threshold reached & 15 models
|
||||||
// REQUIRE(clf.getNotes()[1] == "Used features in train: 19 of 216");
|
// eliminated"); REQUIRE(clf.getNotes()[1] == "Used features in train: 19 of
|
||||||
// REQUIRE(clf.getNotes()[2] == "Number of models: 4");
|
// 216"); REQUIRE(clf.getNotes()[2] == "Number of models: 4"); auto score =
|
||||||
// auto score = clf.score(raw.X_test, raw.y_test);
|
// clf.score(raw.X_test, raw.y_test); auto scoret = clf.score(raw.X_test,
|
||||||
// auto scoret = clf.score(raw.X_test, raw.y_test);
|
// raw.y_test); REQUIRE(score == Catch::Approx(0.99f).epsilon(raw.epsilon));
|
||||||
// REQUIRE(score == Catch::Approx(0.99f).epsilon(raw.epsilon));
|
|
||||||
// REQUIRE(scoret == Catch::Approx(0.99f).epsilon(raw.epsilon));
|
// REQUIRE(scoret == Catch::Approx(0.99f).epsilon(raw.epsilon));
|
||||||
// //
|
// //
|
||||||
// // std::cout << "Number of nodes " << clf.getNumberOfNodes() << std::endl;
|
// // std::cout << "Number of nodes " << clf.getNumberOfNodes() <<
|
||||||
// // std::cout << "Number of edges " << clf.getNumberOfEdges() << std::endl;
|
// std::endl;
|
||||||
|
// // std::cout << "Number of edges " << clf.getNumberOfEdges() <<
|
||||||
|
// std::endl;
|
||||||
// // std::cout << "Notes size " << clf.getNotes().size() << std::endl;
|
// // std::cout << "Notes size " << clf.getNotes().size() << std::endl;
|
||||||
// // for (auto note : clf.getNotes()) {
|
// // for (auto note : clf.getNotes()) {
|
||||||
// // std::cout << note << std::endl;
|
// // std::cout << note << std::endl;
|
||||||
@@ -234,10 +245,11 @@ TEST_CASE("Normal test", "[XBAODE]")
|
|||||||
// clf_alpha.setHyperparameters({
|
// clf_alpha.setHyperparameters({
|
||||||
// {"alpha_block", true},
|
// {"alpha_block", true},
|
||||||
// });
|
// });
|
||||||
// clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
// clf_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className,
|
||||||
// clf_no_alpha.fit(raw.X_train, raw.y_train, raw.features, raw.className, raw.states, raw.smoothing);
|
// raw.states, raw.smoothing); clf_no_alpha.fit(raw.X_train, raw.y_train,
|
||||||
// auto score_alpha = clf_alpha.score(raw.X_test, raw.y_test);
|
// raw.features, raw.className, raw.states, raw.smoothing); auto score_alpha
|
||||||
// auto score_no_alpha = clf_no_alpha.score(raw.X_test, raw.y_test);
|
// = clf_alpha.score(raw.X_test, raw.y_test); auto score_no_alpha =
|
||||||
// REQUIRE(score_alpha == Catch::Approx(0.720779f).epsilon(raw.epsilon));
|
// clf_no_alpha.score(raw.X_test, raw.y_test); REQUIRE(score_alpha ==
|
||||||
// REQUIRE(score_no_alpha == Catch::Approx(0.733766f).epsilon(raw.epsilon));
|
// Catch::Approx(0.720779f).epsilon(raw.epsilon)); REQUIRE(score_no_alpha ==
|
||||||
|
// Catch::Approx(0.733766f).epsilon(raw.epsilon));
|
||||||
// }
|
// }
|
||||||
|
126
tests/TestXSPODE.cc
Normal file
126
tests/TestXSPODE.cc
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#include <catch2/catch_test_macros.hpp>
|
||||||
|
#include <catch2/catch_approx.hpp>
|
||||||
|
#include <catch2/matchers/catch_matchers.hpp>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include "bayesnet/classifiers/XSPODE.h"
|
||||||
|
#include "TestUtils.h"
|
||||||
|
|
||||||
|
TEST_CASE("fit vector test", "[XSPODE]") {
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto scores = std::vector<float>({0.966667, 0.9333333, 0.966667, 0.966667});
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
auto clf = bayesnet::XSpode(i);
|
||||||
|
clf.fit(raw.Xv, raw.yv, raw.features, raw.className, raw.states,
|
||||||
|
raw.smoothing);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||||
|
REQUIRE(clf.getNotes().size() == 0);
|
||||||
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST_CASE("fit dataset test", "[XSPODE]") {
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto scores = std::vector<float>({0.966667, 0.9333333, 0.966667, 0.966667});
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
auto clf = bayesnet::XSpode(i);
|
||||||
|
clf.fit(raw.dataset, raw.features, raw.className, raw.states,
|
||||||
|
raw.smoothing);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||||
|
REQUIRE(clf.getNotes().size() == 0);
|
||||||
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST_CASE("tensors dataset predict & predict_proba", "[XSPODE]") {
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto scores = std::vector<float>({0.966667, 0.9333333, 0.966667, 0.966667});
|
||||||
|
auto probs_expected = std::vector<std::vector<float>>({
|
||||||
|
{0.999017, 0.000306908, 0.000676449},
|
||||||
|
{0.99831, 0.00119304, 0.000497099},
|
||||||
|
{0.998432, 0.00078416, 0.00078416},
|
||||||
|
{0.998801, 0.000599438, 0.000599438}
|
||||||
|
});
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
auto clf = bayesnet::XSpode(i);
|
||||||
|
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states,
|
||||||
|
raw.smoothing);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||||
|
REQUIRE(clf.getNotes().size() == 0);
|
||||||
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||||
|
// Get the first 4 lines of X_test to do predict_proba
|
||||||
|
auto X_reduced = raw.X_test.slice(1, 0, 4);
|
||||||
|
auto proba = clf.predict_proba(X_reduced);
|
||||||
|
for (int p = 0; p < 3; ++p) {
|
||||||
|
REQUIRE(proba[0][p].item<double>() == Catch::Approx(probs_expected.at(i).at(p)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("mfeat-factors dataset test", "[XSPODE]") {
|
||||||
|
auto raw = RawDatasets("mfeat-factors", true);
|
||||||
|
auto scores = std::vector<float>({0.9825, 0.9775, 0.9775, 0.99});
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
auto clf = bayesnet::XSpode(i);
|
||||||
|
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, raw.smoothing);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 217);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 433);
|
||||||
|
REQUIRE(clf.getNotes().size() == 0);
|
||||||
|
REQUIRE(clf.getNumberOfStates() == 652320);
|
||||||
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST_CASE("Laplace predict", "[XSPODE]") {
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto scores = std::vector<float>({0.966666639, 1.0f, 0.933333337, 1.0f});
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
auto clf = bayesnet::XSpode(0);
|
||||||
|
clf.setHyperparameters({ {"parent", i} });
|
||||||
|
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::LAPLACE);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||||
|
REQUIRE(clf.getNotes().size() == 0);
|
||||||
|
REQUIRE(clf.getNumberOfStates() == 64);
|
||||||
|
REQUIRE(clf.getNFeatures() == 4);
|
||||||
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(scores.at(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST_CASE("Not fitted model predict", "[XSPODE]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto clf = bayesnet::XSpode(0);
|
||||||
|
REQUIRE_THROWS_AS(clf.predict(std::vector<int>({1,2,3})), std::logic_error);
|
||||||
|
}
|
||||||
|
TEST_CASE("Test instance predict", "[XSPODE]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto clf = bayesnet::XSpode(0);
|
||||||
|
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::ORIGINAL);
|
||||||
|
REQUIRE(clf.predict(std::vector<int>({1,2,3,4})) == 1);
|
||||||
|
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.973333359f));
|
||||||
|
// Cestnik is not defined in the classifier so it should imply alpha_ = 0
|
||||||
|
clf.fit(raw.Xt, raw.yt, raw.features, raw.className, raw.states, bayesnet::Smoothing_t::CESTNIK);
|
||||||
|
REQUIRE(clf.predict(std::vector<int>({1,2,3,4})) == 0);
|
||||||
|
REQUIRE(clf.score(raw.Xv, raw.yv) == Catch::Approx(0.973333359f));
|
||||||
|
}
|
||||||
|
TEST_CASE("Test to_string and fitx", "[XSPODE]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("iris", true);
|
||||||
|
auto clf = bayesnet::XSpode(0);
|
||||||
|
auto weights = torch::full({raw.Xt.size(1)}, 1.0 / raw.Xt.size(1), torch::kFloat64);
|
||||||
|
clf.fitx(raw.Xt, raw.yt, weights, bayesnet::Smoothing_t::ORIGINAL);
|
||||||
|
REQUIRE(clf.getNumberOfNodes() == 5);
|
||||||
|
REQUIRE(clf.getNumberOfEdges() == 9);
|
||||||
|
REQUIRE(clf.getNotes().size() == 0);
|
||||||
|
REQUIRE(clf.getNumberOfStates() == 64);
|
||||||
|
REQUIRE(clf.getNFeatures() == 4);
|
||||||
|
REQUIRE(clf.score(raw.X_test, raw.y_test) == Catch::Approx(0.966666639f));
|
||||||
|
REQUIRE(clf.to_string().size() == 1966);
|
||||||
|
REQUIRE(clf.graph("Not yet implemented") == std::vector<std::string>({"Not yet implemented"}));
|
||||||
|
}
|
Reference in New Issue
Block a user