commit inicial

This commit is contained in:
2025-04-30 11:11:49 +02:00
commit e144d65e11
121 changed files with 53649 additions and 0 deletions

506
bayesnet/network/Network.cc Normal file
View File

@@ -0,0 +1,506 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <thread>
#include <sstream>
#include <numeric>
#include <algorithm>
#include "Network.h"
#include "bayesnet/utils/bayesnetUtils.h"
#include "bayesnet/utils/CountingSemaphore.h"
#include <pthread.h>
#include <fstream>
namespace bayesnet {
Network::Network() : fitted{ false }, classNumStates{ 0 }
{
}
Network::Network(const Network& other) : features(other.features), className(other.className), classNumStates(other.getClassNumStates()),
fitted(other.fitted), samples(other.samples)
{
if (samples.defined())
samples = samples.clone();
for (const auto& node : other.nodes) {
nodes[node.first] = std::make_unique<Node>(*node.second);
}
}
void Network::initialize()
{
features.clear();
className = "";
classNumStates = 0;
fitted = false;
nodes.clear();
samples = torch::Tensor();
}
torch::Tensor& Network::getSamples()
{
return samples;
}
void Network::addNode(const std::string& name)
{
if (fitted) {
throw std::invalid_argument("Cannot add node to a fitted network. Initialize first.");
}
if (name == "") {
throw std::invalid_argument("Node name cannot be empty");
}
if (nodes.find(name) != nodes.end()) {
return;
}
if (find(features.begin(), features.end(), name) == features.end()) {
features.push_back(name);
}
nodes[name] = std::make_unique<Node>(name);
}
std::vector<std::string> Network::getFeatures() const
{
return features;
}
int Network::getClassNumStates() const
{
return classNumStates;
}
int Network::getStates() const
{
int result = 0;
for (auto& node : nodes) {
result += node.second->getNumStates();
}
return result;
}
std::string Network::getClassName() const
{
return className;
}
bool Network::isCyclic(const std::string& nodeId, std::unordered_set<std::string>& visited, std::unordered_set<std::string>& recStack)
{
if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet
{
visited.insert(nodeId);
recStack.insert(nodeId);
for (Node* child : nodes[nodeId]->getChildren()) {
if (visited.find(child->getName()) == visited.end() && isCyclic(child->getName(), visited, recStack))
return true;
if (recStack.find(child->getName()) != recStack.end())
return true;
}
}
recStack.erase(nodeId); // remove node from recursion stack before function ends
return false;
}
void Network::addEdge(const std::string& parent, const std::string& child)
{
if (fitted) {
throw std::invalid_argument("Cannot add edge to a fitted network. Initialize first.");
}
if (nodes.find(parent) == nodes.end()) {
throw std::invalid_argument("Parent node " + parent + " does not exist");
}
if (nodes.find(child) == nodes.end()) {
throw std::invalid_argument("Child node " + child + " does not exist");
}
// Check if the edge is already in the graph
for (auto& node : nodes[parent]->getChildren()) {
if (node->getName() == child) {
throw std::invalid_argument("Edge " + parent + " -> " + child + " already exists");
}
}
// Temporarily add edge to check for cycles
nodes[parent]->addChild(nodes[child].get());
nodes[child]->addParent(nodes[parent].get());
std::unordered_set<std::string> visited;
std::unordered_set<std::string> recStack;
if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
{
// remove problematic edge
nodes[parent]->removeChild(nodes[child].get());
nodes[child]->removeParent(nodes[parent].get());
throw std::invalid_argument("Adding this edge forms a cycle in the graph.");
}
}
std::map<std::string, std::unique_ptr<Node>>& Network::getNodes()
{
return nodes;
}
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
if (weights.size(0) != n_samples) {
throw std::invalid_argument("Weights (" + std::to_string(weights.size(0)) + ") must have the same number of elements as samples (" + std::to_string(n_samples) + ") in Network::fit");
}
if (n_samples != n_samples_y) {
throw std::invalid_argument("X and y must have the same number of samples in Network::fit (" + std::to_string(n_samples) + " != " + std::to_string(n_samples_y) + ")");
}
if (n_features != featureNames.size()) {
throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")");
}
if (features.size() == 0) {
throw std::invalid_argument("The network has not been initialized. You must call addNode() before calling fit()");
}
if (n_features != features.size() - 1) {
throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")");
}
if (find(features.begin(), features.end(), className) == features.end()) {
throw std::invalid_argument("Class Name not found in Network::features");
}
for (auto& feature : featureNames) {
if (find(features.begin(), features.end(), feature) == features.end()) {
throw std::invalid_argument("Feature " + feature + " not found in Network::features");
}
if (states.find(feature) == states.end()) {
throw std::invalid_argument("Feature " + feature + " not found in states");
}
}
}
void Network::setStates(const std::map<std::string, std::vector<int>>& states)
{
// Set states to every Node in the network
for_each(features.begin(), features.end(), [this, &states](const std::string& feature) {
nodes.at(feature)->setNumStates(states.at(feature).size());
});
classNumStates = nodes.at(className)->getNumStates();
}
// X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className;
torch::Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X , ytmp }, 0);
for (int i = 0; i < featureNames.size(); ++i) {
auto row_feature = X.index({ i, "..." });
}
completeFit(states, weights, smoothing);
}
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className;
this->samples = samples;
completeFit(states, weights, smoothing);
}
// input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
{
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
this->className = className;
// Build tensor of samples (nxm) (n+1 because of the class)
samples = torch::zeros({ static_cast<int>(input_data.size() + 1), static_cast<int>(input_data[0].size()) }, torch::kInt32);
for (int i = 0; i < featureNames.size(); ++i) {
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
}
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit(states, weights, smoothing);
}
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
{
setStates(states);
std::vector<std::thread> threads;
auto& semaphore = CountingSemaphore::getInstance();
const double n_samples = static_cast<double>(samples.size(1));
auto worker = [&](std::pair<const std::string, std::unique_ptr<Node>>& node, int i) {
std::string threadName = "FitWorker-" + std::to_string(i);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
double numStates = static_cast<double>(node.second->getNumStates());
double smoothing_factor;
switch (smoothing) {
case Smoothing_t::ORIGINAL:
smoothing_factor = 1.0 / n_samples;
break;
case Smoothing_t::LAPLACE:
smoothing_factor = 1.0;
break;
case Smoothing_t::CESTNIK:
smoothing_factor = 1 / numStates;
break;
default:
smoothing_factor = 0.0; // No smoothing
}
node.second->computeCPT(samples, features, smoothing_factor, weights);
semaphore.release();
};
int i = 0;
for (auto& node : nodes) {
semaphore.acquire();
threads.emplace_back(worker, std::ref(node), i++);
}
for (auto& thread : threads) {
thread.join();
}
fitted = true;
}
torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
{
if (!fitted) {
throw std::logic_error("You must call fit() before calling predict()");
}
// Ensure the sample size is equal to the number of features
if (samples.size(0) != features.size() - 1) {
throw std::invalid_argument("(T) Sample size (" + std::to_string(samples.size(0)) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
torch::Tensor result;
std::vector<std::thread> threads;
std::mutex mtx;
auto& semaphore = CountingSemaphore::getInstance();
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
auto worker = [&](const torch::Tensor& sample, int i) {
std::string threadName = "PredictWorker-" + std::to_string(i);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64);
{
std::lock_guard<std::mutex> lock(mtx);
result.index_put_({ i, "..." }, temp);
}
semaphore.release();
};
for (int i = 0; i < samples.size(1); ++i) {
semaphore.acquire();
const torch::Tensor sample = samples.index({ "...", i });
threads.emplace_back(worker, sample, i);
}
for (auto& thread : threads) {
thread.join();
}
if (proba)
return result;
return result.argmax(1);
}
// Return mxn tensor of probabilities
torch::Tensor Network::predict_proba(const torch::Tensor& samples)
{
return predict_tensor(samples, true);
}
// Return mxn tensor of probabilities
torch::Tensor Network::predict(const torch::Tensor& samples)
{
return predict_tensor(samples, false);
}
// Return mx1 std::vector of predictions
// tsamples is nxm std::vector of samples
std::vector<int> Network::predict(const std::vector<std::vector<int>>& tsamples)
{
if (!fitted) {
throw std::logic_error("You must call fit() before calling predict()");
}
// Ensure the sample size is equal to the number of features
if (tsamples.size() != features.size() - 1) {
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::vector<int> predictions(tsamples[0].size(), 0);
std::vector<int> sample;
std::vector<std::thread> threads;
auto& semaphore = CountingSemaphore::getInstance();
auto worker = [&](const std::vector<int>& sample, const int row, int& prediction) {
std::string threadName = "(V)PWorker-" + std::to_string(row);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
auto classProbabilities = predict_sample(sample);
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
int predictedClass = distance(classProbabilities.begin(), maxElem);
prediction = predictedClass;
semaphore.release();
};
for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear();
for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]);
}
semaphore.acquire();
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
}
for (auto& thread : threads) {
thread.join();
}
return predictions;
}
// Return mxn std::vector of probabilities
// tsamples is nxm std::vector of samples
std::vector<std::vector<double>> Network::predict_proba(const std::vector<std::vector<int>>& tsamples)
{
if (!fitted) {
throw std::logic_error("You must call fit() before calling predict_proba()");
}
// Ensure the sample size is equal to the number of features
if (tsamples.size() != features.size() - 1) {
throw std::invalid_argument("(V) Sample size (" + std::to_string(tsamples.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
std::vector<std::vector<double>> predictions(tsamples[0].size(), std::vector<double>(classNumStates, 0.0));
std::vector<int> sample;
std::vector<std::thread> threads;
auto& semaphore = CountingSemaphore::getInstance();
auto worker = [&](const std::vector<int>& sample, int row, std::vector<double>& predictions) {
std::string threadName = "(V)PWorker-" + std::to_string(row);
#if defined(__linux__)
pthread_setname_np(pthread_self(), threadName.c_str());
#else
pthread_setname_np(threadName.c_str());
#endif
std::vector<double> classProbabilities = predict_sample(sample);
predictions = classProbabilities;
semaphore.release();
};
for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear();
for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]);
}
semaphore.acquire();
threads.emplace_back(worker, sample, row, std::ref(predictions[row]));
}
for (auto& thread : threads) {
thread.join();
}
return predictions;
}
double Network::score(const std::vector<std::vector<int>>& tsamples, const std::vector<int>& labels)
{
std::vector<int> y_pred = predict(tsamples);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == labels[i]) {
correct++;
}
}
return (double)correct / y_pred.size();
}
// Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const std::vector<int>& sample)
{
std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(); ++i) {
evidence[features[i]] = sample[i];
}
return exactInference(evidence);
}
// Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const torch::Tensor& sample)
{
std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(0); ++i) {
evidence[features[i]] = sample[i].item<int>();
}
return exactInference(evidence);
}
std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
{
std::vector<double> result(classNumStates, 0.0);
auto completeEvidence = std::map<std::string, int>(evidence);
for (int i = 0; i < classNumStates; ++i) {
completeEvidence[getClassName()] = i;
double partial = 1.0;
for (auto& node : getNodes()) {
partial *= node.second->getFactorValue(completeEvidence);
}
result[i] = partial;
}
// Normalize result
double sum = std::accumulate(result.begin(), result.end(), 0.0);
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result;
}
std::vector<std::string> Network::show() const
{
std::vector<std::string> result;
// Draw the network
for (auto& node : nodes) {
std::string line = node.first + " -> ";
for (auto child : node.second->getChildren()) {
line += child->getName() + ", ";
}
result.push_back(line);
}
return result;
}
std::vector<std::string> Network::graph(const std::string& title) const
{
auto output = std::vector<std::string>();
auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n";
std::string header = prefix + title + suffix;
output.push_back(header);
for (auto& node : nodes) {
auto result = node.second->graph(className);
output.insert(output.end(), result.begin(), result.end());
}
output.push_back("}\n");
return output;
}
std::vector<std::pair<std::string, std::string>> Network::getEdges() const
{
auto edges = std::vector<std::pair<std::string, std::string>>();
for (const auto& node : nodes) {
auto head = node.first;
for (const auto& child : node.second->getChildren()) {
auto tail = child->getName();
edges.push_back({ head, tail });
}
}
return edges;
}
int Network::getNumEdges() const
{
return getEdges().size();
}
std::vector<std::string> Network::topological_sort()
{
/* Check if al the fathers of every node are before the node */
auto result = features;
result.erase(remove(result.begin(), result.end(), className), result.end());
bool ending{ false };
while (!ending) {
ending = true;
for (auto feature : features) {
auto fathers = nodes[feature]->getParents();
for (const auto& father : fathers) {
auto fatherName = father->getName();
if (fatherName == className) {
continue;
}
// Check if father is placed before the actual feature
auto it = find(result.begin(), result.end(), fatherName);
if (it != result.end()) {
auto it2 = find(result.begin(), result.end(), feature);
if (it2 != result.end()) {
if (distance(it, it2) < 0) {
// if it is not, insert it before the feature
result.erase(remove(result.begin(), result.end(), fatherName), result.end());
result.insert(it2, fatherName);
ending = false;
}
}
}
}
}
}
return result;
}
std::string Network::dump_cpt() const
{
std::stringstream oss;
for (auto& node : nodes) {
oss << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
oss << node.second->getCPT() << std::endl;
}
return oss.str();
}
}

View File

@@ -0,0 +1,66 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef NETWORK_H
#define NETWORK_H
#include <map>
#include <vector>
#include "bayesnet/config.h"
#include "Node.h"
#include "Smoothing.h"
namespace bayesnet {
class Network {
public:
Network();
explicit Network(const Network&);
~Network() = default;
torch::Tensor& getSamples();
void addNode(const std::string&);
void addEdge(const std::string&, const std::string&);
std::map<std::string, std::unique_ptr<Node>>& getNodes();
std::vector<std::string> getFeatures() const;
int getStates() const;
std::vector<std::pair<std::string, std::string>> getEdges() const;
int getNumEdges() const;
int getClassNumStates() const;
std::string getClassName() const;
/*
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
*/
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
std::vector<std::vector<double>> predict_proba(const std::vector<std::vector<int>>&); // Return mxn std::vector of probabilities
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const std::vector<std::vector<int>>&, const std::vector<int>&);
std::vector<std::string> topological_sort();
std::vector<std::string> show() const;
std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
void initialize();
std::string dump_cpt() const;
inline std::string version() { return { project_version.begin(), project_version.end() }; }
private:
std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted;
int classNumStates;
std::vector<std::string> features; // Including classname
std::string className;
torch::Tensor samples; // n+1xm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&);
std::vector<double> exactInference(std::map<std::string, int>&);
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
void checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const std::map<std::string, std::vector<int>>&);
};
}
#endif

149
bayesnet/network/Node.cc Normal file
View File

@@ -0,0 +1,149 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include "Node.h"
namespace bayesnet {
Node::Node(const std::string& name)
: name(name)
{
}
void Node::clear()
{
parents.clear();
children.clear();
cpTable = torch::Tensor();
dimensions.clear();
numStates = 0;
}
std::string Node::getName() const
{
return name;
}
void Node::addParent(Node* parent)
{
parents.push_back(parent);
}
void Node::removeParent(Node* parent)
{
parents.erase(std::remove(parents.begin(), parents.end(), parent), parents.end());
}
void Node::removeChild(Node* child)
{
children.erase(std::remove(children.begin(), children.end(), child), children.end());
}
void Node::addChild(Node* child)
{
children.push_back(child);
}
std::vector<Node*>& Node::getParents()
{
return parents;
}
std::vector<Node*>& Node::getChildren()
{
return children;
}
int Node::getNumStates() const
{
return numStates;
}
void Node::setNumStates(int numStates)
{
this->numStates = numStates;
}
torch::Tensor& Node::getCPT()
{
return cpTable;
}
/*
The MinFill criterion is a heuristic for variable elimination.
The variable that minimizes the number of edges that need to be added to the graph to make it triangulated.
This is done by counting the number of edges that need to be added to the graph if the variable is eliminated.
The variable with the minimum number of edges is chosen.
Here this is done computing the length of the combinations of the node neighbors taken 2 by 2.
*/
unsigned Node::minFill()
{
std::unordered_set<std::string> neighbors;
for (auto child : children) {
neighbors.emplace(child->getName());
}
for (auto parent : parents) {
neighbors.emplace(parent->getName());
}
auto source = std::vector<std::string>(neighbors.begin(), neighbors.end());
return combinations(source).size();
}
std::vector<std::pair<std::string, std::string>> Node::combinations(const std::vector<std::string>& source)
{
std::vector<std::pair<std::string, std::string>> result;
for (int i = 0; i < source.size(); ++i) {
std::string temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
}
}
return result;
}
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights)
{
dimensions.clear();
dimensions.reserve(parents.size() + 1);
// Get dimensions of the CPT
dimensions.push_back(numStates);
for (const auto& parent : parents) {
dimensions.push_back(parent->getNumStates());
}
//transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
// Create a tensor initialized with smoothing
cpTable = torch::full(dimensions, smoothing, torch::kDouble);
// Create a map for quick feature index lookup
std::unordered_map<std::string, int> featureIndexMap;
for (size_t i = 0; i < features.size(); ++i) {
featureIndexMap[features[i]] = i;
}
// Fill table with counts
// Get the index of this node's feature
int name_index = featureIndexMap[name];
// Get parent indices in dataset
std::vector<int> parent_indices;
parent_indices.reserve(parents.size());
for (const auto& parent : parents) {
parent_indices.push_back(featureIndexMap[parent->getName()]);
}
c10::List<c10::optional<at::Tensor>> coordinates;
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
coordinates.clear();
auto sample = dataset.index({ "...", n_sample });
coordinates.push_back(sample[name_index]);
for (size_t i = 0; i < parent_indices.size(); ++i) {
coordinates.push_back(sample[parent_indices[i]]);
}
// Increment the count of the corresponding coordinate
cpTable.index_put_({ coordinates }, weights.index({ n_sample }), true);
}
// Normalize the counts (dividing each row by the sum of the row)
cpTable /= cpTable.sum(0, true);
}
double Node::getFactorValue(std::map<std::string, int>& evidence)
{
c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<double>();
}
std::vector<std::string> Node::graph(const std::string& className)
{
auto output = std::vector<std::string>();
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
output.push_back("\"" + name + "\" [shape=circle" + suffix + "] \n");
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return "\"" + name + "\" -> \"" + child->getName() + "\""; });
return output;
}
}

42
bayesnet/network/Node.h Normal file
View File

@@ -0,0 +1,42 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef NODE_H
#define NODE_H
#include <unordered_set>
#include <vector>
#include <string>
#include <torch/torch.h>
namespace bayesnet {
class Node {
public:
explicit Node(const std::string&);
void clear();
void addParent(Node*);
void addChild(Node*);
void removeParent(Node*);
void removeChild(Node*);
std::string getName() const;
std::vector<Node*>& getParents();
std::vector<Node*>& getChildren();
torch::Tensor& getCPT();
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double smoothing, const torch::Tensor& weights);
int getNumStates() const;
void setNumStates(int);
unsigned minFill();
std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
double getFactorValue(std::map<std::string, int>&);
private:
std::string name;
std::vector<Node*> parents;
std::vector<Node*> children;
int numStates = 0; // number of states of the variable
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
std::vector<int64_t> dimensions; // dimensions of the cpTable
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
};
}
#endif

View File

@@ -0,0 +1,17 @@
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#ifndef SMOOTHING_H
#define SMOOTHING_H
namespace bayesnet {
enum class Smoothing_t {
NONE = -1,
ORIGINAL = 0,
LAPLACE,
CESTNIK
};
}
#endif // SMOOTHING_H