Implement 3 types of smoothing
This commit is contained in:
parent
684443a788
commit
27a3e5a5e0
@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Add tests to check the correct version of the mdlp, folding and json libraries.
|
||||
- Library documentation generated with Doxygen.
|
||||
- Link to documentation in the README.md.
|
||||
- Three types of smoothing the Bayesian Network OLD_LAPLACE, LAPLACE and CESTNIK.
|
||||
|
||||
### Internal
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
[![Security Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=security_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
[![Reliability Rating](https://sonarcloud.io/api/project_badges/measure?project=rmontanana_BayesNet&metric=reliability_rating)](https://sonarcloud.io/summary/new_code?id=rmontanana_BayesNet)
|
||||
![Gitea Last Commit](https://img.shields.io/gitea/last-commit/rmontanana/bayesnet?gitea_url=https://gitea.rmontanana.es:3000&logo=gitea)
|
||||
[![Coverage Badge](https://img.shields.io/badge/Coverage-97,3%25-green)](html/index.html)
|
||||
[![Coverage Badge](https://img.shields.io/badge/Coverage-97,2%25-green)](html/index.html)
|
||||
|
||||
Bayesian Network Classifiers using libtorch from scratch
|
||||
|
||||
|
@ -8,10 +8,13 @@
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "bayesnet/network/Network.h"
|
||||
|
||||
namespace bayesnet {
|
||||
enum status_t { NORMAL, WARNING, ERROR };
|
||||
class BaseClassifier {
|
||||
public:
|
||||
void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; } // To call before fit
|
||||
// X is nxm std::vector, y is nx1 std::vector
|
||||
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
|
||||
// X is nxm tensor, y is nx1 tensor
|
||||
@ -41,5 +44,6 @@ namespace bayesnet {
|
||||
protected:
|
||||
virtual void trainModel(const torch::Tensor& weights) = 0;
|
||||
std::vector<std::string> validHyperparameters;
|
||||
Smoothing_t smoothing = Smoothing_t::NONE;
|
||||
};
|
||||
}
|
@ -22,6 +22,7 @@ namespace bayesnet {
|
||||
auto n_classes = states.at(className).size();
|
||||
metrics = Metrics(dataset, features, className, n_classes);
|
||||
model.initialize();
|
||||
model.setSmoothing(smoothing);
|
||||
buildModel(weights);
|
||||
trainModel(weights);
|
||||
fitted = true;
|
||||
|
@ -8,7 +8,6 @@
|
||||
#define CLASSIFIER_H
|
||||
#include <torch/torch.h>
|
||||
#include "bayesnet/utils/BayesMetrics.h"
|
||||
#include "bayesnet/network/Network.h"
|
||||
#include "bayesnet/BaseClassifier.h"
|
||||
|
||||
namespace bayesnet {
|
||||
|
@ -37,6 +37,7 @@ namespace bayesnet {
|
||||
void AODELd::trainModel(const torch::Tensor& weights)
|
||||
{
|
||||
for (const auto& model : models) {
|
||||
model->setSmoothing(smoothing);
|
||||
model->fit(Xf, y, features, className, states);
|
||||
}
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ namespace bayesnet {
|
||||
for (int j = i + 1; j < featuresSelected.size(); j++) {
|
||||
auto parents = { featuresSelected[i], featuresSelected[j] };
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPnDE>(parents);
|
||||
model->setSmoothing(smoothing);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
@ -96,6 +97,7 @@ namespace bayesnet {
|
||||
pairSelection.erase(pairSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPnDE>(std::vector<int>({ feature_pair.first, feature_pair.second }));
|
||||
model->setSmoothing(smoothing);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
|
@ -22,6 +22,7 @@ namespace bayesnet {
|
||||
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||
for (const int& feature : featuresSelected) {
|
||||
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
|
||||
model->setSmoothing(smoothing);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
models.push_back(std::move(model));
|
||||
significanceModels.push_back(1.0); // They will be updated later in trainModel
|
||||
@ -89,6 +90,7 @@ namespace bayesnet {
|
||||
featureSelection.erase(featureSelection.begin());
|
||||
std::unique_ptr<Classifier> model;
|
||||
model = std::make_unique<SPODE>(feature);
|
||||
model->setSmoothing(smoothing);
|
||||
model->fit(dataset, features, className, states, weights_);
|
||||
alpha_t = 0.0;
|
||||
if (!block_update) {
|
||||
|
@ -18,6 +18,7 @@ namespace bayesnet {
|
||||
n_models = models.size();
|
||||
for (auto i = 0; i < n_models; ++i) {
|
||||
// fit with std::vectors
|
||||
models[i]->setSmoothing(smoothing);
|
||||
models[i]->fit(dataset, features, className, states);
|
||||
}
|
||||
}
|
||||
|
@ -165,14 +165,14 @@ namespace bayesnet {
|
||||
for (int i = 0; i < featureNames.size(); ++i) {
|
||||
auto row_feature = X.index({ i, "..." });
|
||||
}
|
||||
completeFit(states, X.size(0), weights);
|
||||
completeFit(states, weights);
|
||||
}
|
||||
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
{
|
||||
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
|
||||
this->className = className;
|
||||
this->samples = samples;
|
||||
completeFit(states, samples.size(1), weights);
|
||||
completeFit(states, weights);
|
||||
}
|
||||
// input_data comes in nxm, where n is the number of features and m the number of samples
|
||||
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
|
||||
@ -186,16 +186,30 @@ namespace bayesnet {
|
||||
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
|
||||
}
|
||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||
completeFit(states, input_data[0].size(), weights);
|
||||
completeFit(states, weights);
|
||||
}
|
||||
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const int n_samples, const torch::Tensor& weights)
|
||||
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
|
||||
{
|
||||
setStates(states);
|
||||
std::vector<std::thread> threads;
|
||||
const double n_samples = static_cast<double>(samples.size(1));
|
||||
for (auto& node : nodes) {
|
||||
threads.emplace_back([this, &node, &weights, n_samples]() {
|
||||
auto numStates = node.second->getNumStates();
|
||||
double smoothing_factor = smoothing == Smoothing_t::CESTNIK ? static_cast<double>(n_samples) / numStates : 1.0 / static_cast<double>(n_samples);
|
||||
double numStates = static_cast<double>(node.second->getNumStates());
|
||||
double smoothing_factor = 0.0;
|
||||
switch (smoothing) {
|
||||
case Smoothing_t::OLD_LAPLACE:
|
||||
smoothing_factor = 1.0 / n_samples;
|
||||
break;
|
||||
case Smoothing_t::LAPLACE:
|
||||
smoothing_factor = 1.0;
|
||||
break;
|
||||
case Smoothing_t::CESTNIK:
|
||||
smoothing_factor = n_samples / numStates;
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument("Smoothing method not recognized " + std::to_string(static_cast<int>(smoothing)));
|
||||
}
|
||||
node.second->computeCPT(samples, features, smoothing_factor, weights);
|
||||
});
|
||||
}
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
namespace bayesnet {
|
||||
enum class Smoothing_t {
|
||||
NONE = -1,
|
||||
OLD_LAPLACE = 0,
|
||||
LAPLACE,
|
||||
CESTNIK
|
||||
};
|
||||
@ -36,6 +38,7 @@ namespace bayesnet {
|
||||
/*
|
||||
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
|
||||
*/
|
||||
void setSmoothing(Smoothing_t smoothing) { this->smoothing = smoothing; };
|
||||
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
|
||||
@ -65,7 +68,7 @@ namespace bayesnet {
|
||||
std::vector<double> predict_sample(const torch::Tensor&);
|
||||
std::vector<double> exactInference(std::map<std::string, int>&);
|
||||
double computeFactor(std::map<std::string, int>&);
|
||||
void completeFit(const std::map<std::string, std::vector<int>>& states, const int n_samples, const torch::Tensor& weights);
|
||||
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
|
||||
void setStates(const std::map<std::string, std::vector<int>>&);
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user