Implement 3 types of smoothing
This commit is contained in:
parent
684443a788
commit
27a3e5a5e0
@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
- Add tests to check the correct version of the mdlp, folding and json libraries.
|
- Add tests to check the correct version of the mdlp, folding and json libraries.
|
||||||
- Library documentation generated with Doxygen.
|
- Library documentation generated with Doxygen.
|
||||||
- Link to documentation in the README.md.
|
- Link to documentation in the README.md.
|
||||||
|
- Three types of smoothing the Bayesian Network OLD_LAPLACE, LAPLACE and CESTNIK.
|
||||||
|
|
||||||
### Internal
|
### Internal
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
[![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
[![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||||
[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||||
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
|
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
|
||||||
[![Coverage Badge](https://img.shields.io/badge/Coverage-97,3%25-green)](html/index.html)
|
[![Coverage Badge](https://img.shields.io/badge/Coverage-97,2%25-green)](html/index.html)
|
||||||
|
|
||||||
Bayesian Network Classifiers using libtorch from scratch
|
Bayesian Network Classifiers using libtorch from scratch
|
||||||
|
|
||||||
|
@ -8,10 +8,13 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
|
#include "bayesnet/network/Network.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
enum status_t { NORMAL, WARNING, ERROR };
|
enum status_t { NORMAL, WARNING, ERROR };
|
||||||
class BaseClassifier {
|
class BaseClassifier {
|
||||||
public:
|
public:
|
||||||
|
void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; } // To call before fit
|
||||||
// X is nxm std::vector, y is nx1 std::vector
|
// X is nxm std::vector, y is nx1 std::vector
|
||||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||||
// X is nxm tensor, y is nx1 tensor
|
// X is nxm tensor, y is nx1 tensor
|
||||||
@ -41,5 +44,6 @@ namespace bayesnet {
|
|||||||
protected:
|
protected:
|
||||||
virtual void trainModel(const torch::Tensor& weights) = 0;
|
virtual void trainModel(const torch::Tensor& weights) = 0;
|
||||||
std::vector<std::string> validHyperparameters;
|
std::vector<std::string> validHyperparameters;
|
||||||
|
Smoothing_t smoothing = Smoothing_t::NONE;
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -22,6 +22,7 @@ namespace bayesnet {
|
|||||||
auto n_classes = states.at(className).size();
|
auto n_classes = states.at(className).size();
|
||||||
metrics = Metrics(dataset, features, className, n_classes);
|
metrics = Metrics(dataset, features, className, n_classes);
|
||||||
model.initialize();
|
model.initialize();
|
||||||
|
model.setSmoothing(smoothing);
|
||||||
buildModel(weights);
|
buildModel(weights);
|
||||||
trainModel(weights);
|
trainModel(weights);
|
||||||
fitted = true;
|
fitted = true;
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#define CLASSIFIER_H
|
#define CLASSIFIER_H
|
||||||
#include <torch/torch.h>
|
#include <torch/torch.h>
|
||||||
#include "bayesnet/utils/BayesMetrics.h"
|
#include "bayesnet/utils/BayesMetrics.h"
|
||||||
#include "bayesnet/network/Network.h"
|
|
||||||
#include "bayesnet/BaseClassifier.h"
|
#include "bayesnet/BaseClassifier.h"
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
@ -37,6 +37,7 @@ namespace bayesnet {
|
|||||||
void AODELd::trainModel(const torch::Tensor& weights)
|
void AODELd::trainModel(const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
for (const auto& model : models) {
|
for (const auto& model : models) {
|
||||||
|
model->setSmoothing(smoothing);
|
||||||
model->fit(Xf, y, features, className, states);
|
model->fit(Xf, y, features, className, states);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@ namespace bayesnet {
|
|||||||
for (int j = i + 1; j < featuresSelected.size(); j++) {
|
for (int j = i + 1; j < featuresSelected.size(); j++) {
|
||||||
auto parents = { featuresSelected[i], featuresSelected[j] };
|
auto parents = { featuresSelected[i], featuresSelected[j] };
|
||||||
std::unique_ptr<Classifier> model = std::make_unique<SPnDE>(parents);
|
std::unique_ptr<Classifier> model = std::make_unique<SPnDE>(parents);
|
||||||
|
model->setSmoothing(smoothing);
|
||||||
model->fit(dataset, features, className, states, weights_);
|
model->fit(dataset, features, className, states, weights_);
|
||||||
models.push_back(std::move(model));
|
models.push_back(std::move(model));
|
||||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||||
@ -96,6 +97,7 @@ namespace bayesnet {
|
|||||||
pairSelection.erase(pairSelection.begin());
|
pairSelection.erase(pairSelection.begin());
|
||||||
std::unique_ptr<Classifier> model;
|
std::unique_ptr<Classifier> model;
|
||||||
model = std::make_unique<SPnDE>(std::vector<int>({ feature_pair.first, feature_pair.second }));
|
model = std::make_unique<SPnDE>(std::vector<int>({ feature_pair.first, feature_pair.second }));
|
||||||
|
model->setSmoothing(smoothing);
|
||||||
model->fit(dataset, features, className, states, weights_);
|
model->fit(dataset, features, className, states, weights_);
|
||||||
alpha_t = 0.0;
|
alpha_t = 0.0;
|
||||||
if (!block_update) {
|
if (!block_update) {
|
||||||
|
@ -22,6 +22,7 @@ namespace bayesnet {
|
|||||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||||
for (const int& feature : featuresSelected) {
|
for (const int& feature : featuresSelected) {
|
||||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||||
|
model->setSmoothing(smoothing);
|
||||||
model->fit(dataset, features, className, states, weights_);
|
model->fit(dataset, features, className, states, weights_);
|
||||||
models.push_back(std::move(model));
|
models.push_back(std::move(model));
|
||||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||||
@ -89,6 +90,7 @@ namespace bayesnet {
|
|||||||
featureSelection.erase(featureSelection.begin());
|
featureSelection.erase(featureSelection.begin());
|
||||||
std::unique_ptr<Classifier> model;
|
std::unique_ptr<Classifier> model;
|
||||||
model = std::make_unique<SPODE>(feature);
|
model = std::make_unique<SPODE>(feature);
|
||||||
|
model->setSmoothing(smoothing);
|
||||||
model->fit(dataset, features, className, states, weights_);
|
model->fit(dataset, features, className, states, weights_);
|
||||||
alpha_t = 0.0;
|
alpha_t = 0.0;
|
||||||
if (!block_update) {
|
if (!block_update) {
|
||||||
|
@ -18,6 +18,7 @@ namespace bayesnet {
|
|||||||
n_models = models.size();
|
n_models = models.size();
|
||||||
for (auto i = 0; i < n_models; ++i) {
|
for (auto i = 0; i < n_models; ++i) {
|
||||||
// fit with std::vectors
|
// fit with std::vectors
|
||||||
|
models[i]->setSmoothing(smoothing);
|
||||||
models[i]->fit(dataset, features, className, states);
|
models[i]->fit(dataset, features, className, states);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -165,14 +165,14 @@ namespace bayesnet {
|
|||||||
for (int i = 0; i < featureNames.size(); ++i) {
|
for (int i = 0; i < featureNames.size(); ++i) {
|
||||||
auto row_feature = X.index({ i, "..." });
|
auto row_feature = X.index({ i, "..." });
|
||||||
}
|
}
|
||||||
completeFit(states, X.size(0), weights);
|
completeFit(states, weights);
|
||||||
}
|
}
|
||||||
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||||
{
|
{
|
||||||
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
|
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
|
||||||
this->className = className;
|
this->className = className;
|
||||||
this->samples = samples;
|
this->samples = samples;
|
||||||
completeFit(states, samples.size(1), weights);
|
completeFit(states, weights);
|
||||||
}
|
}
|
||||||
// input_data comes in nxm, where n is the number of features and m the number of samples
|
// input_data comes in nxm, where n is the number of features and m the number of samples
|
||||||
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||||
@ -186,16 +186,30 @@ namespace bayesnet {
|
|||||||
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
|
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
|
||||||
}
|
}
|
||||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||||
completeFit(states, input_data[0].size(), weights);
|
completeFit(states, weights);
|
||||||
}
|
}
|
||||||
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const int n_samples, const torch::Tensor& weights)
|
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||||
{
|
{
|
||||||
setStates(states);
|
setStates(states);
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> threads;
|
||||||
|
const double n_samples = static_cast<double>(samples.size(1));
|
||||||
for (auto& node : nodes) {
|
for (auto& node : nodes) {
|
||||||
threads.emplace_back([this, &node, &weights, n_samples]() {
|
threads.emplace_back([this, &node, &weights, n_samples]() {
|
||||||
auto numStates = node.second->getNumStates();
|
double numStates = static_cast<double>(node.second->getNumStates());
|
||||||
double smoothing_factor = smoothing == Smoothing_t::CESTNIK ? static_cast<double>(n_samples) / numStates : 1.0 / static_cast<double>(n_samples);
|
double smoothing_factor = 0.0;
|
||||||
|
switch (smoothing) {
|
||||||
|
case Smoothing_t::OLD_LAPLACE:
|
||||||
|
smoothing_factor = 1.0 / n_samples;
|
||||||
|
break;
|
||||||
|
case Smoothing_t::LAPLACE:
|
||||||
|
smoothing_factor = 1.0;
|
||||||
|
break;
|
||||||
|
case Smoothing_t::CESTNIK:
|
||||||
|
smoothing_factor = n_samples / numStates;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw std::invalid_argument("Smoothing method not recognized " + std::to_string(static_cast<int>(smoothing)));
|
||||||
|
}
|
||||||
node.second->computeCPT(samples, features, smoothing_factor, weights);
|
node.second->computeCPT(samples, features, smoothing_factor, weights);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
enum class Smoothing_t {
|
enum class Smoothing_t {
|
||||||
|
NONE = -1,
|
||||||
|
OLD_LAPLACE = 0,
|
||||||
LAPLACE,
|
LAPLACE,
|
||||||
CESTNIK
|
CESTNIK
|
||||||
};
|
};
|
||||||
@ -36,6 +38,7 @@ namespace bayesnet {
|
|||||||
/*
|
/*
|
||||||
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
|
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
|
||||||
*/
|
*/
|
||||||
|
void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; };
|
||||||
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||||
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||||
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||||
@ -65,7 +68,7 @@ namespace bayesnet {
|
|||||||
std::vector<double> predict_sample(const torch::Tensor&);
|
std::vector<double> predict_sample(const torch::Tensor&);
|
||||||
std::vector<double> exactInference(std::map<std::string, int>&);
|
std::vector<double> exactInference(std::map<std::string, int>&);
|
||||||
double computeFactor(std::map<std::string, int>&);
|
double computeFactor(std::map<std::string, int>&);
|
||||||
void completeFit(const std::map<std::string, std::vector<int>>& states, const int n_samples, const torch::Tensor& weights);
|
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||||
void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||||
void setStates(const std::map<std::string, std::vector<int>>&);
|
void setStates(const std::map<std::string, std::vector<int>>&);
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user