Refactor library structure

2024-03-08 22:20:54 +01:00
parent 1231f4522a
commit 635ef22520
56 changed files with 64 additions and 68 deletions
--- a/bayesnet/network/Network.cc
+++ b/bayesnet/network/Network.cc
@@ -0,0 +1,414 @@
+#include <thread>
+#include <mutex>
+#include "Network.h"
+#include "bayesnet/utils/bayesnetUtils.h"
+namespace bayesnet {
+    Network::Network() : features(std::vector<std::string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
+    Network::Network(float maxT) : features(std::vector<std::string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
+    Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
+        getmaxThreads()), fitted(other.fitted)
+    {
+        for (const auto& node : other.nodes) {
+            nodes[node.first] = std::make_unique<Node>(*node.second);
+        }
+    }
+    void Network::initialize()
+    {
+        features = std::vector<std::string>();
+        className = "";
+        classNumStates = 0;
+        fitted = false;
+        nodes.clear();
+        samples = torch::Tensor();
+    }
+    float Network::getmaxThreads()
+    {
+        return maxThreads;
+    }
+    torch::Tensor& Network::getSamples()
+    {
+        return samples;
+    }
+    void Network::addNode(const std::string& name)
+    {
+        if (name == "") {
+            throw std::invalid_argument("Node name cannot be empty");
+        }
+        if (nodes.find(name) != nodes.end()) {
+            return;
+        }
+        if (find(features.begin(), features.end(), name) == features.end()) {
+            features.push_back(name);
+        }
+        nodes[name] = std::make_unique<Node>(name);
+    }
+    std::vector<std::string> Network::getFeatures() const
+    {
+        return features;
+    }
+    int Network::getClassNumStates() const
+    {
+        return classNumStates;
+    }
+    int Network::getStates() const
+    {
+        int result = 0;
+        for (auto& node : nodes) {
+            result += node.second->getNumStates();
+        }
+        return result;
+    }
+    std::string Network::getClassName() const
+    {
+        return className;
+    }
+    bool Network::isCyclic(const std::string& nodeId, std::unordered_set<std::string>& visited, std::unordered_set<std::string>& recStack)
+    {
+        if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet
+        {
+            visited.insert(nodeId);
+            recStack.insert(nodeId);
+            for (Node* child : nodes[nodeId]->getChildren()) {
+                if (visited.find(child->getName()) == visited.end() && isCyclic(child->getName(), visited, recStack))
+                    return true;
+                if (recStack.find(child->getName()) != recStack.end())
+                    return true;
+            }
+        }
+        recStack.erase(nodeId); // remove node from recursion stack before function ends
+        return false;
+    }
+    void Network::addEdge(const std::string& parent, const std::string& child)
+    {
+        if (nodes.find(parent) == nodes.end()) {
+            throw std::invalid_argument("Parent node " + parent + " does not exist");
+        }
+        if (nodes.find(child) == nodes.end()) {
+            throw std::invalid_argument("Child node " + child + " does not exist");
+        }
+        // Temporarily add edge to check for cycles
+        nodes[parent]->addChild(nodes[child].get());
+        nodes[child]->addParent(nodes[parent].get());
+        std::unordered_set<std::string> visited;
+        std::unordered_set<std::string> recStack;
+        if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
+        {
+            // remove problematic edge
+            nodes[parent]->removeChild(nodes[child].get());
+            nodes[child]->removeParent(nodes[parent].get());
+            throw std::invalid_argument("Adding this edge forms a cycle in the graph.");
+        }
+    }
+    std::map<std::string, std::unique_ptr<Node>>& Network::getNodes()
+    {
+        return nodes;
+    }
+    void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
+    {
+        if (weights.size(0) != n_samples) {
+            throw std::invalid_argument("Weights (" + std::to_string(weights.size(0)) + ") must have the same number of elements as samples (" + std::to_string(n_samples) + ") in Network::fit");
+        }
+        if (n_samples != n_samples_y) {
+            throw std::invalid_argument("X and y must have the same number of samples in Network::fit (" + std::to_string(n_samples) + " != " + std::to_string(n_samples_y) + ")");
+        }
+        if (n_features != featureNames.size()) {
+            throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")");
+        }
+        if (n_features != features.size() - 1) {
+            throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")");
+        }
+        if (find(features.begin(), features.end(), className) == features.end()) {
+            throw std::invalid_argument("className not found in Network::features");
+        }
+        for (auto& feature : featureNames) {
+            if (find(features.begin(), features.end(), feature) == features.end()) {
+                throw std::invalid_argument("Feature " + feature + " not found in Network::features");
+            }
+            if (states.find(feature) == states.end()) {
+                throw std::invalid_argument("Feature " + feature + " not found in states");
+            }
+        }
+    }
+    void Network::setStates(const std::map<std::string, std::vector<int>>& states)
+    {
+        // Set states to every Node in the network
+        for_each(features.begin(), features.end(), [this, &states](const std::string& feature) {
+            nodes.at(feature)->setNumStates(states.at(feature).size());
+            });
+        classNumStates = nodes.at(className)->getNumStates();
+    }
+    // X comes in nxm, where n is the number of features and m the number of samples
+    void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
+    {
+        checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
+        this->className = className;
+        torch::Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
+        samples = torch::cat({ X , ytmp }, 0);
+        for (int i = 0; i < featureNames.size(); ++i) {
+            auto row_feature = X.index({ i, "..." });
+        }
+        completeFit(states, weights);
+    }
+    void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
+    {
+        checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
+        this->className = className;
+        this->samples = samples;
+        completeFit(states, weights);
+    }
+    // input_data comes in nxm, where n is the number of features and m the number of samples
+    void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
+    {
+        const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
+        checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
+        this->className = className;
+        // Build tensor of samples (nxm) (n+1 because of the class)
+        samples = torch::zeros({ static_cast<int>(input_data.size() + 1), static_cast<int>(input_data[0].size()) }, torch::kInt32);
+        for (int i = 0; i < featureNames.size(); ++i) {
+            samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
+        }
+        samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
+        completeFit(states, weights);
+    }
+    void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
+    {
+        setStates(states);
+        laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
+        std::vector<std::thread> threads;
+        for (auto& node : nodes) {
+            threads.emplace_back([this, &node, &weights]() {
+                node.second->computeCPT(samples, features, laplaceSmoothing, weights);
+                });
+        }
+        for (auto& thread : threads) {
+            thread.join();
+        }
+        fitted = true;
+    }
+    torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
+    {
+        if (!fitted) {
+            throw std::logic_error("You must call fit() before calling predict()");
+        }
+        torch::Tensor result;
+        result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
+        for (int i = 0; i < samples.size(1); ++i) {
+            const torch::Tensor sample = samples.index({ "...", i });
+            auto psample = predict_sample(sample);
+            auto temp = torch::tensor(psample, torch::kFloat64);
+            //            result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
+            result.index_put_({ i, "..." }, temp);
+        }
+        if (proba)
+            return result;
+        return result.argmax(1);
+    }
+    // Return mxn tensor of probabilities
+    torch::Tensor Network::predict_proba(const torch::Tensor& samples)
+    {
+        return predict_tensor(samples, true);
+    }
+
+    // Return mxn tensor of probabilities
+    torch::Tensor Network::predict(const torch::Tensor& samples)
+    {
+        return predict_tensor(samples, false);
+    }
+
+    // Return mx1 std::vector of predictions
+    // tsamples is nxm std::vector of samples
+    std::vector<int> Network::predict(const std::vector<std::vector<int>>& tsamples)
+    {
+        if (!fitted) {
+            throw std::logic_error("You must call fit() before calling predict()");
+        }
+        std::vector<int> predictions;
+        std::vector<int> sample;
+        for (int row = 0; row < tsamples[0].size(); ++row) {
+            sample.clear();
+            for (int col = 0; col < tsamples.size(); ++col) {
+                sample.push_back(tsamples[col][row]);
+            }
+            std::vector<double> classProbabilities = predict_sample(sample);
+            // Find the class with the maximum posterior probability
+            auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
+            int predictedClass = distance(classProbabilities.begin(), maxElem);
+            predictions.push_back(predictedClass);
+        }
+        return predictions;
+    }
+    // Return mxn std::vector of probabilities
+    // tsamples is nxm std::vector of samples
+    std::vector<std::vector<double>> Network::predict_proba(const std::vector<std::vector<int>>& tsamples)
+    {
+        if (!fitted) {
+            throw std::logic_error("You must call fit() before calling predict_proba()");
+        }
+        std::vector<std::vector<double>> predictions;
+        std::vector<int> sample;
+        for (int row = 0; row < tsamples[0].size(); ++row) {
+            sample.clear();
+            for (int col = 0; col < tsamples.size(); ++col) {
+                sample.push_back(tsamples[col][row]);
+            }
+            predictions.push_back(predict_sample(sample));
+        }
+        return predictions;
+    }
+    double Network::score(const std::vector<std::vector<int>>& tsamples, const std::vector<int>& labels)
+    {
+        std::vector<int> y_pred = predict(tsamples);
+        int correct = 0;
+        for (int i = 0; i < y_pred.size(); ++i) {
+            if (y_pred[i] == labels[i]) {
+                correct++;
+            }
+        }
+        return (double)correct / y_pred.size();
+    }
+    // Return 1xn std::vector of probabilities
+    std::vector<double> Network::predict_sample(const std::vector<int>& sample)
+    {
+        // Ensure the sample size is equal to the number of features
+        if (sample.size() != features.size() - 1) {
+            throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
+                ") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
+        }
+        std::map<std::string, int> evidence;
+        for (int i = 0; i < sample.size(); ++i) {
+            evidence[features[i]] = sample[i];
+        }
+        return exactInference(evidence);
+    }
+    // Return 1xn std::vector of probabilities
+    std::vector<double> Network::predict_sample(const torch::Tensor& sample)
+    {
+        // Ensure the sample size is equal to the number of features
+        if (sample.size(0) != features.size() - 1) {
+            throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
+                ") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
+        }
+        std::map<std::string, int> evidence;
+        for (int i = 0; i < sample.size(0); ++i) {
+            evidence[features[i]] = sample[i].item<int>();
+        }
+        return exactInference(evidence);
+    }
+    double Network::computeFactor(std::map<std::string, int>& completeEvidence)
+    {
+        double result = 1.0;
+        for (auto& node : getNodes()) {
+            result *= node.second->getFactorValue(completeEvidence);
+        }
+        return result;
+    }
+    std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
+    {
+        std::vector<double> result(classNumStates, 0.0);
+        std::vector<std::thread> threads;
+        std::mutex mtx;
+        for (int i = 0; i < classNumStates; ++i) {
+            threads.emplace_back([this, &result, &evidence, i, &mtx]() {
+                auto completeEvidence = std::map<std::string, int>(evidence);
+                completeEvidence[getClassName()] = i;
+                double factor = computeFactor(completeEvidence);
+                std::lock_guard<std::mutex> lock(mtx);
+                result[i] = factor;
+                });
+        }
+        for (auto& thread : threads) {
+            thread.join();
+        }
+        // Normalize result
+        double sum = accumulate(result.begin(), result.end(), 0.0);
+        transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
+        return result;
+    }
+    std::vector<std::string> Network::show() const
+    {
+        std::vector<std::string> result;
+        // Draw the network
+        for (auto& node : nodes) {
+            std::string line = node.first + " -> ";
+            for (auto child : node.second->getChildren()) {
+                line += child->getName() + ", ";
+            }
+            result.push_back(line);
+        }
+        return result;
+    }
+    std::vector<std::string> Network::graph(const std::string& title) const
+    {
+        auto output = std::vector<std::string>();
+        auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
+        auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n";
+        std::string header = prefix + title + suffix;
+        output.push_back(header);
+        for (auto& node : nodes) {
+            auto result = node.second->graph(className);
+            output.insert(output.end(), result.begin(), result.end());
+        }
+        output.push_back("}\n");
+        return output;
+    }
+    std::vector<std::pair<std::string, std::string>> Network::getEdges() const
+    {
+        auto edges = std::vector<std::pair<std::string, std::string>>();
+        for (const auto& node : nodes) {
+            auto head = node.first;
+            for (const auto& child : node.second->getChildren()) {
+                auto tail = child->getName();
+                edges.push_back({ head, tail });
+            }
+        }
+        return edges;
+    }
+    int Network::getNumEdges() const
+    {
+        return getEdges().size();
+    }
+    std::vector<std::string> Network::topological_sort()
+    {
+        /* Check if al the fathers of every node are before the node */
+        auto result = features;
+        result.erase(remove(result.begin(), result.end(), className), result.end());
+        bool ending{ false };
+        while (!ending) {
+            ending = true;
+            for (auto feature : features) {
+                auto fathers = nodes[feature]->getParents();
+                for (const auto& father : fathers) {
+                    auto fatherName = father->getName();
+                    if (fatherName == className) {
+                        continue;
+                    }
+                    // Check if father is placed before the actual feature
+                    auto it = find(result.begin(), result.end(), fatherName);
+                    if (it != result.end()) {
+                        auto it2 = find(result.begin(), result.end(), feature);
+                        if (it2 != result.end()) {
+                            if (distance(it, it2) < 0) {
+                                // if it is not, insert it before the feature
+                                result.erase(remove(result.begin(), result.end(), fatherName), result.end());
+                                result.insert(it2, fatherName);
+                                ending = false;
+                            }
+                        } else {
+                            throw std::logic_error("Error in topological sort because of node " + feature + " is not in result");
+                        }
+                    } else {
+                        throw std::logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
+                    }
+                }
+            }
+        }
+        return result;
+    }
+    void Network::dump_cpt() const
+    {
+        for (auto& node : nodes) {
+            std::cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
+            std::cout << node.second->getCPT() << std::endl;
+        }
+    }
+}
--- a/bayesnet/network/Network.h
+++ b/bayesnet/network/Network.h
@@ -0,0 +1,63 @@
+#ifndef NETWORK_H
+#define NETWORK_H
+#include <map>
+#include <vector>
+#include "bayesnet/config.h"
+#include "Node.h"
+
+namespace bayesnet {
+    class Network {
+    public:
+        Network();
+        explicit Network(float);
+        explicit Network(Network&);
+        ~Network() = default;
+        torch::Tensor& getSamples();
+        float getmaxThreads();
+        void addNode(const std::string&);
+        void addEdge(const std::string&, const std::string&);
+        std::map<std::string, std::unique_ptr<Node>>& getNodes();
+        std::vector<std::string> getFeatures() const;
+        int getStates() const;
+        std::vector<std::pair<std::string, std::string>> getEdges() const;
+        int getNumEdges() const;
+        int getClassNumStates() const;
+        std::string getClassName() const;
+        /*
+        Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
+        */
+        void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
+        void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
+        void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
+        std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
+        torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
+        torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
+        std::vector<std::vector<double>> predict_proba(const std::vector<std::vector<int>>&); // Return mxn std::vector of probabilities
+        torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
+        double score(const std::vector<std::vector<int>>&, const std::vector<int>&);
+        std::vector<std::string> topological_sort();
+        std::vector<std::string> show() const;
+        std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
+        void initialize();
+        void dump_cpt() const;
+        inline std::string version() { return  { project_version.begin(), project_version.end() }; }
+    private:
+        std::map<std::string, std::unique_ptr<Node>> nodes;
+        bool fitted;
+        float maxThreads = 0.95;
+        int classNumStates;
+        std::vector<std::string> features; // Including classname
+        std::string className;
+        double laplaceSmoothing;
+        torch::Tensor samples; // nxm tensor used to fit the model
+        bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
+        std::vector<double> predict_sample(const std::vector<int>&);
+        std::vector<double> predict_sample(const torch::Tensor&);
+        std::vector<double> exactInference(std::map<std::string, int>&);
+        double computeFactor(std::map<std::string, int>&);
+        void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
+        void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
+        void setStates(const std::map<std::string, std::vector<int>>&);
+    };
+}
+#endif
--- a/bayesnet/network/Node.cc
+++ b/bayesnet/network/Node.cc
@@ -0,0 +1,135 @@
+#include "Node.h"
+
+namespace bayesnet {
+
+    Node::Node(const std::string& name)
+        : name(name), numStates(0), cpTable(torch::Tensor()), parents(std::vector<Node*>()), children(std::vector<Node*>())
+    {
+    }
+    void Node::clear()
+    {
+        parents.clear();
+        children.clear();
+        cpTable = torch::Tensor();
+        dimensions.clear();
+        numStates = 0;
+    }
+    std::string Node::getName() const
+    {
+        return name;
+    }
+    void Node::addParent(Node* parent)
+    {
+        parents.push_back(parent);
+    }
+    void Node::removeParent(Node* parent)
+    {
+        parents.erase(std::remove(parents.begin(), parents.end(), parent), parents.end());
+    }
+    void Node::removeChild(Node* child)
+    {
+        children.erase(std::remove(children.begin(), children.end(), child), children.end());
+    }
+    void Node::addChild(Node* child)
+    {
+        children.push_back(child);
+    }
+    std::vector<Node*>& Node::getParents()
+    {
+        return parents;
+    }
+    std::vector<Node*>& Node::getChildren()
+    {
+        return children;
+    }
+    int Node::getNumStates() const
+    {
+        return numStates;
+    }
+    void Node::setNumStates(int numStates)
+    {
+        this->numStates = numStates;
+    }
+    torch::Tensor& Node::getCPT()
+    {
+        return cpTable;
+    }
+    /*
+     The MinFill criterion is a heuristic for variable elimination.
+     The variable that minimizes the number of edges that need to be added to the graph to make it triangulated.
+     This is done by counting the number of edges that need to be added to the graph if the variable is eliminated.
+     The variable with the minimum number of edges is chosen.
+     Here this is done computing the length of the combinations of the node neighbors taken 2 by 2.
+    */
+    unsigned Node::minFill()
+    {
+        std::unordered_set<std::string> neighbors;
+        for (auto child : children) {
+            neighbors.emplace(child->getName());
+        }
+        for (auto parent : parents) {
+            neighbors.emplace(parent->getName());
+        }
+        auto source = std::vector<std::string>(neighbors.begin(), neighbors.end());
+        return combinations(source).size();
+    }
+    std::vector<std::pair<std::string, std::string>> Node::combinations(const std::vector<std::string>& source)
+    {
+        std::vector<std::pair<std::string, std::string>> result;
+        for (int i = 0; i < source.size(); ++i) {
+            std::string temp = source[i];
+            for (int j = i + 1; j < source.size(); ++j) {
+                result.push_back({ temp, source[j] });
+            }
+        }
+        return result;
+    }
+    void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
+    {
+        dimensions.clear();
+        // Get dimensions of the CPT
+        dimensions.push_back(numStates);
+        transform(parents.begin(), parents.end(), back_inserter(dimensions), [](const auto& parent) { return parent->getNumStates(); });
+
+        // Create a tensor of zeros with the dimensions of the CPT
+        cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
+        // Fill table with counts
+        auto pos = find(features.begin(), features.end(), name);
+        if (pos == features.end()) {
+            throw std::logic_error("Feature " + name + " not found in dataset");
+        }
+        int name_index = pos - features.begin();
+        for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
+            c10::List<c10::optional<at::Tensor>> coordinates;
+            coordinates.push_back(dataset.index({ name_index, n_sample }));
+            for (auto parent : parents) {
+                pos = find(features.begin(), features.end(), parent->getName());
+                if (pos == features.end()) {
+                    throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
+                }
+                int parent_index = pos - features.begin();
+                coordinates.push_back(dataset.index({ parent_index, n_sample }));
+            }
+            // Increment the count of the corresponding coordinate
+            cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>());
+        }
+        // Normalize the counts
+        cpTable = cpTable / cpTable.sum(0);
+    }
+    float Node::getFactorValue(std::map<std::string, int>& evidence)
+    {
+        c10::List<c10::optional<at::Tensor>> coordinates;
+        // following predetermined order of indices in the cpTable (see Node.h)
+        coordinates.push_back(at::tensor(evidence[name]));
+        transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
+        return cpTable.index({ coordinates }).item<float>();
+    }
+    std::vector<std::string> Node::graph(const std::string& className)
+    {
+        auto output = std::vector<std::string>();
+        auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
+        output.push_back(name + " [shape=circle" + suffix + "] \n");
+        transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); });
+        return output;
+    }
+}
--- a/bayesnet/network/Node.h
+++ b/bayesnet/network/Node.h
@@ -0,0 +1,36 @@
+#ifndef NODE_H
+#define NODE_H
+#include <unordered_set>
+#include <vector>
+#include <string>
+#include <torch/torch.h>
+namespace bayesnet {
+    class Node {
+    private:
+        std::string name;
+        std::vector<Node*> parents;
+        std::vector<Node*> children;
+        int numStates; // number of states of the variable
+        torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
+        std::vector<int64_t> dimensions; // dimensions of the cpTable
+        std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
+    public:
+        explicit Node(const std::string&);
+        void clear();
+        void addParent(Node*);
+        void addChild(Node*);
+        void removeParent(Node*);
+        void removeChild(Node*);
+        std::string getName() const;
+        std::vector<Node*>& getParents();
+        std::vector<Node*>& getChildren();
+        torch::Tensor& getCPT();
+        void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
+        int getNumStates() const;
+        void setNumStates(int);
+        unsigned minFill();
+        std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
+        float getFactorValue(std::map<std::string, int>&);
+    };
+}
+#endif