Implement Cestnik & Laplace smoothing

This commit is contained in:
Ricardo Montañana Gómez 2024-06-09 17:19:38 +02:00
parent 6d9badc33b
commit 684443a788
Signed by: rmontanana
GPG Key ID: 46064262FD9A7ADE
9 changed files with 28 additions and 22 deletions

View File

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20)
project(BayesNet
VERSION 1.0.5.1
VERSION 1.0.6
DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX

View File

@ -7,17 +7,18 @@
#include <thread>
#include <mutex>
#include <sstream>
#include <numeric>
#include "Network.h"
#include "bayesnet/utils/bayesnetUtils.h"
namespace bayesnet {
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
Network::Network() : fitted{ false }, maxThreads{ 0.95 }, classNumStates{ 0 }, smoothing{ Smoothing_t::LAPLACE }
{
}
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, laplaceSmoothing{ 0 }
Network::Network(float maxT) : fitted{ false }, maxThreads{ maxT }, classNumStates{ 0 }, smoothing{ Smoothing_t::LAPLACE }
{
}
Network::Network(const Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
Network::Network(const Network& other) : smoothing(other.smoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
maxThreads(other.getMaxThreads()), fitted(other.fitted), samples(other.samples)
{
if (samples.defined())
@ -164,14 +165,14 @@ namespace bayesnet {
for (int i = 0; i < featureNames.size(); ++i) {
auto row_feature = X.index({ i, "..." });
}
completeFit(states, weights);
completeFit(states, X.size(0), weights);
}
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className;
this->samples = samples;
completeFit(states, weights);
completeFit(states, samples.size(1), weights);
}
// input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
@ -185,16 +186,17 @@ namespace bayesnet {
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
}
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(states, weights);
completeFit(states, input_data[0].size(), weights);
}
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const int n_samples, const torch::Tensor& weights)
{
setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
std::vector<std::thread> threads;
for (auto& node : nodes) {
threads.emplace_back([this, &node, &weights]() {
node.second->computeCPT(samples, features, laplaceSmoothing, weights);
threads.emplace_back([this, &node, &weights, n_samples]() {
auto numStates = node.second->getNumStates();
double smoothing_factor = smoothing == Smoothing_t::CESTNIK ? static_cast<double>(n_samples) / numStates : 1.0 / static_cast<double>(n_samples);
node.second->computeCPT(samples, features, smoothing_factor, weights);
});
}
for (auto& thread : threads) {
@ -337,7 +339,7 @@ namespace bayesnet {
thread.join();
}
// Normalize result
double sum = accumulate(result.begin(), result.end(), 0.0);
double sum = std::accumulate(result.begin(), result.end(), 0.0);
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result;
}

View File

@ -12,6 +12,10 @@
#include "Node.h"
namespace bayesnet {
enum class Smoothing_t {
LAPLACE,
CESTNIK
};
class Network {
public:
Network();
@ -54,15 +58,15 @@ namespace bayesnet {
int classNumStates;
std::vector<std::string> features; // Including classname
std::string className;
double laplaceSmoothing;
Smoothing_t smoothing;
torch::Tensor samples; // n+1xm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&);
std::vector<double> exactInference(std::map<std::string, int>&);
double computeFactor(std::map<std::string, int>&);
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void completeFit(const std::map<std::string, std::vector<int>>& states, const int n_samples, const torch::Tensor& weights);
void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const std::map<std::string, std::vector<int>>&);
};
}

View File

@ -90,14 +90,14 @@ namespace bayesnet {
}
return result;
}
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
{
dimensions.clear();
// Get dimensions of the CPT
dimensions.push_back(numStates);
transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
// Create a tensor of zeros with the dimensions of the CPT
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
cpTable = torch::zeros(dimensions, torch::kFloat) + smoothing;
// Fill table with counts
auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) {

View File

@ -23,7 +23,7 @@ namespace bayesnet {
std::vector<Node*>& getParents();
std::vector<Node*>& getChildren();
torch::Tensor& getCPT();
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights);
int getNumStates() const;
void setNumStates(int);
unsigned minFill();

@ -1 +1 @@
Subproject commit 236d1b2f8be185039493fe7fce04a83e02ed72e5
Subproject commit c4e6c041fe7f769ec24c0a2bd66a5aff482fd630

View File

@ -20,7 +20,7 @@
#include "bayesnet/ensembles/BoostAODE.h"
#include "TestUtils.h"
const std::string ACTUAL_VERSION = "1.0.5.1";
const std::string ACTUAL_VERSION = "1.0.6";
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
{

View File

@ -16,7 +16,7 @@
#include "TestUtils.h"
std::map<std::string, std::string> modules = {
{ "mdlp", "1.1.2" },
{ "mdlp", "1.2.0" },
{ "Folding", "1.1.0" },
{ "json", "3.11" },
{ "ArffFiles", "1.0.0" }

@ -1 +1 @@
Subproject commit 40ac38011a2445e00df8a18048c67abaff16fa59
Subproject commit dbefa02d9c0ca0f029f77e744cd80cb0150725c8