Make fit build the network

2023-06-30 02:46:06 +02:00 · 2023-06-30 02:46:06 +02:00 · 0a31aa2ff1
commit 0a31aa2ff1
parent 31c22898de
13 changed files with 580 additions and 82 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -1,7 +1,4 @@
 {
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
@ -20,6 +17,19 @@
            "program": "${workspaceFolder}/a.out",
            "args": [],
            "cwd": "${workspaceFolder}"
+        },
+        {
+            "name": "Build & debug active file",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/build/bayesnet",
+            "args": [],
+            "stopAtEntry": false,
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "lldb",
+            "preLaunchTask": "CMake: build"
        }
    ]
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -81,7 +81,12 @@
        "*.toml": "toml",
        "utility": "cpp",
        "__verbose_abort": "cpp",
-        "bit": "cpp"
+        "bit": "cpp",
+        "random": "cpp",
+        "*.tcc": "cpp",
+        "functional": "cpp",
+        "iterator": "cpp",
+        "memory_resource": "cpp"
    },
    "cmake.configureOnOpen": false,
    "C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools"
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@ -1,16 +1,37 @@
 {
-	"version": "2.0.0",
-	"tasks": [
-		{
-			"type": "cmake",
-			"label": "CMake: build",
-			"command": "build",
-			"targets": [
-				"all"
-			],
-			"group": "build",
-			"problemMatcher": [],
-			"detail": "CMake template build task"
-		}
-	]
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "type": "cmake",
+            "label": "CMake: build",
+            "command": "build",
+            "targets": [
+                "all"
+            ],
+            "group": "build",
+            "problemMatcher": [],
+            "detail": "CMake template build task"
+        },
+        {
+            "type": "cppbuild",
+            "label": "C/C++: clang build active file",
+            "command": "/usr/bin/clang",
+            "args": [
+                "-fcolor-diagnostics",
+                "-fansi-escape-codes",
+                "-g",
+                "${file}",
+                "-o",
+                "${fileDirname}/${fileBasenameNoExtension}"
+            ],
+            "options": {
+                "cwd": "${fileDirname}"
+            },
+            "problemMatcher": [
+                "$gcc"
+            ],
+            "group": "build",
+            "detail": "Task generated by Debugger."
+        }
+    ]
 }
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -11,6 +11,5 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

 # add_library(BayesNet Node.cc Network.cc)
-# add_executable(BayesNet main.cc Node.cc Network.cc ArffFiles.cc)
-add_executable(BayesNet main.cc ArffFiles.cc Node.cc Network.cc)
+add_executable(BayesNet main.cc ArffFiles.cc Node.cc Network.cc CPPFImdlp.cpp Metrics.cpp)
 target_link_libraries(BayesNet "${TORCH_LIBRARIES}")
--- a/CPPFImdlp.cpp
+++ b/CPPFImdlp.cpp
@ -0,0 +1,220 @@
+#include <numeric>
+#include <algorithm>
+#include <set>
+#include <cmath>
+#include "CPPFImdlp.h"
+#include "Metrics.h"
+
+namespace mdlp {
+
+    CPPFImdlp::CPPFImdlp(size_t min_length_, int max_depth_, float proposed) : min_length(min_length_),
+        max_depth(max_depth_),
+        proposed_cuts(proposed)
+    {
+    }
+
+    CPPFImdlp::CPPFImdlp() = default;
+
+    CPPFImdlp::~CPPFImdlp() = default;
+
+    size_t CPPFImdlp::compute_max_num_cut_points() const
+    {
+        // Set the actual maximum number of cut points as a number or as a percentage of the number of samples
+        if (proposed_cuts == 0) {
+            return numeric_limits<size_t>::max();
+        }
+        if (proposed_cuts < 0 || proposed_cuts > static_cast<float>(X.size())) {
+            throw invalid_argument("wrong proposed num_cuts value");
+        }
+        if (proposed_cuts < 1)
+            return static_cast<size_t>(round(static_cast<float>(X.size()) * proposed_cuts));
+        return static_cast<size_t>(proposed_cuts);
+    }
+
+    void CPPFImdlp::fit(samples_t& X_, labels_t& y_)
+    {
+        X = X_;
+        y = y_;
+        num_cut_points = compute_max_num_cut_points();
+        depth = 0;
+        cutPoints.clear();
+        if (X.size() != y.size()) {
+            throw invalid_argument("X and y must have the same size");
+        }
+        if (X.empty() || y.empty()) {
+            throw invalid_argument("X and y must have at least one element");
+        }
+        if (min_length < 3) {
+            throw invalid_argument("min_length must be greater than 2");
+        }
+        if (max_depth < 1) {
+            throw invalid_argument("max_depth must be greater than 0");
+        }
+        indices = sortIndices(X_, y_);
+        metrics.setData(y, indices);
+        computeCutPoints(0, X.size(), 1);
+        sort(cutPoints.begin(), cutPoints.end());
+        if (num_cut_points > 0) {
+            // Select the best (with lower entropy) cut points
+            while (cutPoints.size() > num_cut_points) {
+                resizeCutPoints();
+            }
+        }
+    }
+
+    pair<precision_t, size_t> CPPFImdlp::valueCutPoint(size_t start, size_t cut, size_t end)
+    {
+        size_t n;
+        size_t m;
+        size_t idxPrev = cut - 1 >= start ? cut - 1 : cut;
+        size_t idxNext = cut + 1 < end ? cut + 1 : cut;
+        bool backWall; // true if duplicates reach beginning of the interval
+        precision_t previous;
+        precision_t actual;
+        precision_t next;
+        previous = X[indices[idxPrev]];
+        actual = X[indices[cut]];
+        next = X[indices[idxNext]];
+        // definition 2 of the paper => X[t-1] < X[t]
+        // get the first equal value of X in the interval
+        while (idxPrev > start && actual == previous) {
+            previous = X[indices[--idxPrev]];
+        }
+        backWall = idxPrev == start && actual == previous;
+        // get the last equal value of X in the interval
+        while (idxNext < end - 1 && actual == next) {
+            next = X[indices[++idxNext]];
+        }
+        // # of duplicates before cutpoint
+        n = cut - 1 - idxPrev;
+        // # of duplicates after cutpoint
+        m = idxNext - cut - 1;
+        // Decide which values to use
+        cut = cut + (backWall ? m + 1 : -n);
+        actual = X[indices[cut]];
+        return { (actual + previous) / 2, cut };
+    }
+
+    void CPPFImdlp::computeCutPoints(size_t start, size_t end, int depth_)
+    {
+        size_t cut;
+        pair<precision_t, size_t> result;
+        // Check if the interval length and the depth are Ok
+        if (end - start < min_length || depth_ > max_depth)
+            return;
+        depth = depth_ > depth ? depth_ : depth;
+        cut = getCandidate(start, end);
+        if (cut == numeric_limits<size_t>::max())
+            return;
+        if (mdlp(start, cut, end)) {
+            result = valueCutPoint(start, cut, end);
+            cut = result.second;
+            cutPoints.push_back(result.first);
+            computeCutPoints(start, cut, depth_ + 1);
+            computeCutPoints(cut, end, depth_ + 1);
+        }
+    }
+
+    size_t CPPFImdlp::getCandidate(size_t start, size_t end)
+    {
+        /* Definition 1: A binary discretization for A is determined by selecting the cut point TA for which
+        E(A, TA; S) is minimal amongst all the candidate cut points. */
+        size_t candidate = numeric_limits<size_t>::max();
+        size_t elements = end - start;
+        bool sameValues = true;
+        precision_t entropy_left;
+        precision_t entropy_right;
+        precision_t minEntropy;
+        // Check if all the values of the variable in the interval are the same
+        for (size_t idx = start + 1; idx < end; idx++) {
+            if (X[indices[idx]] != X[indices[start]]) {
+                sameValues = false;
+                break;
+            }
+        }
+        if (sameValues)
+            return candidate;
+        minEntropy = metrics.entropy(start, end);
+        for (size_t idx = start + 1; idx < end; idx++) {
+            // Cutpoints are always on boundaries (definition 2)
+            if (y[indices[idx]] == y[indices[idx - 1]])
+                continue;
+            entropy_left = precision_t(idx - start) / static_cast<precision_t>(elements) * metrics.entropy(start, idx);
+            entropy_right = precision_t(end - idx) / static_cast<precision_t>(elements) * metrics.entropy(idx, end);
+            if (entropy_left + entropy_right < minEntropy) {
+                minEntropy = entropy_left + entropy_right;
+                candidate = idx;
+            }
+        }
+        return candidate;
+    }
+
+    bool CPPFImdlp::mdlp(size_t start, size_t cut, size_t end)
+    {
+        int k;
+        int k1;
+        int k2;
+        precision_t ig;
+        precision_t delta;
+        precision_t ent;
+        precision_t ent1;
+        precision_t ent2;
+        auto N = precision_t(end - start);
+        k = metrics.computeNumClasses(start, end);
+        k1 = metrics.computeNumClasses(start, cut);
+        k2 = metrics.computeNumClasses(cut, end);
+        ent = metrics.entropy(start, end);
+        ent1 = metrics.entropy(start, cut);
+        ent2 = metrics.entropy(cut, end);
+        ig = metrics.informationGain(start, cut, end);
+        delta = static_cast<precision_t>(log2(pow(3, precision_t(k)) - 2) -
+            (precision_t(k) * ent - precision_t(k1) * ent1 - precision_t(k2) * ent2));
+        precision_t term = 1 / N * (log2(N - 1) + delta);
+        return ig > term;
+    }
+
+    // Argsort from https://stackoverflow.com/questions/1577475/c-sorting-and-keeping-track-of-indexes
+    indices_t CPPFImdlp::sortIndices(samples_t& X_, labels_t& y_)
+    {
+        indices_t idx(X_.size());
+        iota(idx.begin(), idx.end(), 0);
+        stable_sort(idx.begin(), idx.end(), [&X_, &y_](size_t i1, size_t i2) {
+            if (X_[i1] == X_[i2])
+                return y_[i1] < y_[i2];
+            else
+                return X_[i1] < X_[i2];
+            });
+        return idx;
+    }
+
+    void CPPFImdlp::resizeCutPoints()
+    {
+        //Compute entropy of each of the whole cutpoint set and discards the biggest value
+        precision_t maxEntropy = 0;
+        precision_t entropy;
+        size_t maxEntropyIdx = 0;
+        size_t begin = 0;
+        size_t end;
+        for (size_t idx = 0; idx < cutPoints.size(); idx++) {
+            end = begin;
+            while (X[indices[end]] < cutPoints[idx] && end < X.size())
+                end++;
+            entropy = metrics.entropy(begin, end);
+            if (entropy > maxEntropy) {
+                maxEntropy = entropy;
+                maxEntropyIdx = idx;
+            }
+            begin = end;
+        }
+        cutPoints.erase(cutPoints.begin() + static_cast<long>(maxEntropyIdx));
+    }
+    labels_t& CPPFImdlp::transform(const samples_t& data)
+    {
+        discretizedData.reserve(data.size());
+        for (const precision_t& item : data) {
+            auto upper = upper_bound(cutPoints.begin(), cutPoints.end(), item);
+            discretizedData.push_back(upper - cutPoints.begin());
+        }
+        return discretizedData;
+    }
+}
--- a/CPPFImdlp.h
+++ b/CPPFImdlp.h
@ -0,0 +1,45 @@
+#ifndef CPPFIMDLP_H
+#define CPPFIMDLP_H
+
+#include "typesFImdlp.h"
+#include "Metrics.h"
+#include <limits>
+#include <utility>
+#include <string>
+
+namespace mdlp {
+    class CPPFImdlp {
+    protected:
+        size_t min_length = 3;
+        int depth = 0;
+        int max_depth = numeric_limits<int>::max();
+        float proposed_cuts = 0;
+        indices_t indices = indices_t();
+        samples_t X = samples_t();
+        labels_t y = labels_t();
+        Metrics metrics = Metrics(y, indices);
+        cutPoints_t cutPoints;
+        size_t num_cut_points = numeric_limits<size_t>::max();
+        labels_t discretizedData = labels_t();
+
+        static indices_t sortIndices(samples_t&, labels_t&);
+
+        void computeCutPoints(size_t, size_t, int);
+        void resizeCutPoints();
+        bool mdlp(size_t, size_t, size_t);
+        size_t getCandidate(size_t, size_t);
+        size_t compute_max_num_cut_points() const;
+        pair<precision_t, size_t> valueCutPoint(size_t, size_t, size_t);
+
+    public:
+        CPPFImdlp();
+        CPPFImdlp(size_t, int, float);
+        ~CPPFImdlp();
+        void fit(samples_t&, labels_t&);
+        inline cutPoints_t getCutPoints() const { return cutPoints; };
+        labels_t& transform(const samples_t&);
+        inline int get_depth() const { return depth; };
+        static inline string version() { return "1.1.2"; };
+    };
+}
+#endif
--- a/Metrics.cpp
+++ b/Metrics.cpp
@ -0,0 +1,78 @@
+#include "Metrics.h"
+#include <set>
+#include <cmath>
+
+using namespace std;
+namespace mdlp {
+    Metrics::Metrics(labels_t& y_, indices_t& indices_): y(y_), indices(indices_),
+        numClasses(computeNumClasses(0, indices.size()))
+    {
+    }
+
+    int Metrics::computeNumClasses(size_t start, size_t end)
+    {
+        set<int> nClasses;
+        for (auto i = start; i < end; ++i) {
+            nClasses.insert(y[indices[i]]);
+        }
+        return static_cast<int>(nClasses.size());
+    }
+
+    void Metrics::setData(const labels_t& y_, const indices_t& indices_)
+    {
+        indices = indices_;
+        y = y_;
+        numClasses = computeNumClasses(0, indices.size());
+        entropyCache.clear();
+        igCache.clear();
+    }
+
+    precision_t Metrics::entropy(size_t start, size_t end)
+    {
+        precision_t p;
+        precision_t ventropy = 0;
+        int nElements = 0;
+        labels_t counts(numClasses + 1, 0);
+        if (end - start < 2)
+            return 0;
+        if (entropyCache.find({ start, end }) != entropyCache.end()) {
+            return entropyCache[{start, end}];
+        }
+        for (auto i = &indices[start]; i != &indices[end]; ++i) {
+            counts[y[*i]]++;
+            nElements++;
+        }
+        for (auto count : counts) {
+            if (count > 0) {
+                p = static_cast<precision_t>(count) / static_cast<precision_t>(nElements);
+                ventropy -= p * log2(p);
+            }
+        }
+        entropyCache[{start, end}] = ventropy;
+        return ventropy;
+    }
+
+    precision_t Metrics::informationGain(size_t start, size_t cut, size_t end)
+    {
+        precision_t iGain;
+        precision_t entropyInterval;
+        precision_t entropyLeft;
+        precision_t entropyRight;
+        size_t nElementsLeft = cut - start;
+        size_t nElementsRight = end - cut;
+        size_t nElements = end - start;
+        if (igCache.find(make_tuple(start, cut, end)) != igCache.end()) {
+            return igCache[make_tuple(start, cut, end)];
+        }
+        entropyInterval = entropy(start, end);
+        entropyLeft = entropy(start, cut);
+        entropyRight = entropy(cut, end);
+        iGain = entropyInterval -
+            (static_cast<precision_t>(nElementsLeft) * entropyLeft +
+                static_cast<precision_t>(nElementsRight) * entropyRight) /
+            static_cast<precision_t>(nElements);
+        igCache[make_tuple(start, cut, end)] = iGain;
+        return iGain;
+    }
+
+}
--- a/Metrics.h
+++ b/Metrics.h
@ -0,0 +1,22 @@
+#ifndef CCMETRICS_H
+#define CCMETRICS_H
+
+#include "typesFImdlp.h"
+
+namespace mdlp {
+    class Metrics {
+    protected:
+        labels_t& y;
+        indices_t& indices;
+        int numClasses;
+        cacheEnt_t entropyCache = cacheEnt_t();
+        cacheIg_t igCache = cacheIg_t();
+    public:
+        Metrics(labels_t&, indices_t&);
+        void setData(const labels_t&, const indices_t&);
+        int computeNumClasses(size_t, size_t);
+        precision_t entropy(size_t, size_t);
+        precision_t informationGain(size_t, size_t, size_t);
+    };
+}
+#endif
--- a/Network.cc
+++ b/Network.cc
@ -1,5 +1,7 @@
 #include "Network.h"
 namespace bayesnet {
+    Network::Network() : laplaceSmoothing(1), root(nullptr) {}
+    Network::Network(int smoothing) : laplaceSmoothing(smoothing), root(nullptr) {}
    Network::~Network()
    {
        for (auto& pair : nodes) {
@ -70,46 +72,62 @@ namespace bayesnet {
    {
        return nodes;
    }
-    void Network::fit(const vector<vector<int>>& dataset, const int smoothing)
+    void Network::buildNetwork(const vector<vector<int>>& dataset, const vector<int>& labels, const vector<string>& featureNames, const string& className)
    {
-        auto jointCounts = [](const vector<vector<int>>& data, const vector<int>& indices, int numStates) {
-            int size = indices.size();
-            vector<int64_t> sizes(size, numStates);
-            torch::Tensor counts = torch::zeros(sizes, torch::kLong);
-
-            for (const auto& row : data) {
-                int idx = 0;
-                for (int i = 0; i < size; ++i) {
-                    idx = idx * numStates + row[indices[i]];
-                }
-                counts.view({ -1 }).add_(idx, 1);
-            }
-
-            return counts;
-            };
-
-        auto marginalCounts = [](const torch::Tensor& jointCounts) {
-            return jointCounts.sum(-1);
-            };
-
-        for (auto& pair : nodes) {
-            Node* node = pair.second;
-
-            vector<int> indices;
-            for (const auto& parent : node->getParents()) {
-                indices.push_back(nodes[parent->getName()]->getId());
-            }
-            indices.push_back(node->getId());
-
-            for (auto& child : node->getChildren()) {
-                torch::Tensor counts = jointCounts(dataset, indices, node->getNumStates()) + smoothing;
-                torch::Tensor parentCounts = marginalCounts(counts);
-                parentCounts = parentCounts.unsqueeze(-1);
-
-                torch::Tensor cpt = counts.to(torch::kDouble) / parentCounts.to(torch::kDouble);
-                setCPD(node->getCPDKey(child), cpt);
-            }
+        // Add features as nodes to the network
+        for (int i = 0; i < featureNames.size(); ++i) {
+            addNode(featureNames[i], *max_element(dataset[i].begin(), dataset[i].end()) + 1);
        }
+        // Add class as node to the network
+        addNode(className, *max_element(labels.begin(), labels.end()) + 1);
+        // Add edges from class to features => naive Bayes
+        for (auto feature : featureNames) {
+            addEdge(className, feature);
+        }
+    }
+    void Network::fit(const vector<vector<int>>& dataset, const vector<int>& labels, const vector<string>& featureNames, const string& className)
+    {
+        buildNetwork(dataset, labels, featureNames, className);
+        //estimateParameters(dataset);
+
+        // auto jointCounts = [](const vector<vector<int>>& data, const vector<int>& indices, int numStates) {
+        //     int size = indices.size();
+        //     vector<int64_t> sizes(size, numStates);
+        //     torch::Tensor counts = torch::zeros(sizes, torch::kLong);
+
+        //     for (const auto& row : data) {
+        //         int idx = 0;
+        //         for (int i = 0; i < size; ++i) {
+        //             idx = idx * numStates + row[indices[i]];
+        //         }
+        //         counts.view({ -1 }).add_(idx, 1);
+        //     }
+
+        //     return counts;
+        //     };
+
+        // auto marginalCounts = [](const torch::Tensor& jointCounts) {
+        //     return jointCounts.sum(-1);
+        //     };
+
+        // for (auto& pair : nodes) {
+        //     Node* node = pair.second;
+
+        //     vector<int> indices;
+        //     for (const auto& parent : node->getParents()) {
+        //         indices.push_back(nodes[parent->getName()]->getId());
+        //     }
+        //     indices.push_back(node->getId());
+
+        //     for (auto& child : node->getChildren()) {
+        //         torch::Tensor counts = jointCounts(dataset, indices, node->getNumStates()) + laplaceSmoothing;
+        //         torch::Tensor parentCounts = marginalCounts(counts);
+        //         parentCounts = parentCounts.unsqueeze(-1);
+
+        //         torch::Tensor cpt = counts.to(torch::kDouble) / parentCounts.to(torch::kDouble);
+        //         setCPD(node->getCPDKey(child), cpt);
+        //     }
+        // }
    }

    torch::Tensor& Network::getCPD(const string& key)
--- a/Network.h
+++ b/Network.h
@ -8,14 +8,18 @@ namespace bayesnet {
    private:
        map<string, Node*> nodes;
        map<string, torch::Tensor> cpds;  // Map from CPD key to CPD tensor
-        Node* root = nullptr;
+        Node* root;
+        int laplaceSmoothing;
        bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
    public:
+        Network();
+        Network(int);
        ~Network();
        void addNode(string, int);
        void addEdge(const string, const string);
        map<string, Node*>& getNodes();
-        void fit(const vector<vector<int>>&, const int);
+        void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
+        void buildNetwork(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
        torch::Tensor& getCPD(const string&);
        void setCPD(const string&, const torch::Tensor&);
        void setRoot(string);
--- a/copy.cc
+++ b/copy.cc
@ -0,0 +1,54 @@
+#include <iostream>
+#include <string>
+#include <torch/torch.h>
+#include "ArffFiles.h"
+#include "Network.h"
+
+using namespace std;
+
+int main()
+{
+    auto handler = ArffFiles();
+    handler.load("iris.arff");
+    auto X = handler.getX();
+    auto y = handler.getY();
+    auto className = handler.getClassName();
+    vector<pair<string, string>> edges = { {className, "sepallength"}, {className, "sepalwidth"}, {className, "petallength"}, {className, "petalwidth"} };
+    auto network = bayesnet::Network();
+    // Add nodes to the network
+    for (auto feature : handler.getAttributes()) {
+        cout << "Adding feature: " << feature.first << endl;
+        network.addNode(feature.first, 7);
+    }
+    network.addNode(className, 3);
+    for (auto item : edges) {
+        network.addEdge(item.first, item.second);
+    }
+    cout << "Hello, Bayesian Networks!" << endl;
+    torch::Tensor tensor = torch::eye(3);
+    cout << "Now I'll add a cycle" << endl;
+    try {
+        network.addEdge("petallength", className);
+    }
+    catch (invalid_argument& e) {
+        cout << e.what() << endl;
+    }
+    cout << tensor << std::endl;
+    cout << "Nodes:" << endl;
+    for (auto [name, item] : network.getNodes()) {
+        cout << "*" << item->getName() << endl;
+        cout << "-Parents:" << endl;
+        for (auto parent : item->getParents()) {
+            cout << " " << parent->getName() << endl;
+        }
+        cout << "-Children:" << endl;
+        for (auto child : item->getChildren()) {
+            cout << " " << child->getName() << endl;
+        }
+    }
+    cout << "Root: " << network.getRoot()->getName() << endl;
+    network.setRoot(className);
+    cout << "Now Root should be class: " << network.getRoot()->getName() << endl;
+    cout << "PyTorch version: " << TORCH_VERSION << endl;
+    return 0;
+}
--- a/main.cc
+++ b/main.cc
@ -3,40 +3,44 @@
 #include <torch/torch.h>
 #include "ArffFiles.h"
 #include "Network.h"
+#include "CPPFImdlp.h"
+

 using namespace std;

+vector<mdlp::labels_t> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
+{
+    vector<mdlp::labels_t>Xd;
+    auto fimdlp = mdlp::CPPFImdlp();
+    for (int i = 0; i < X.size(); i++) {
+        fimdlp.fit(X[i], y);
+        Xd.push_back(fimdlp.transform(X[i]));
+    }
+    return Xd;
+}
+
 int main()
 {
    auto handler = ArffFiles();
    handler.load("iris.arff");
-    auto X = handler.getX();
-    auto y = handler.getY();
+    // Get Dataset X, y
+    vector<mdlp::samples_t>& X = handler.getX();
+    mdlp::labels_t& y = handler.getY();
+    // Get className & Features
    auto className = handler.getClassName();
-    vector<pair<string, string>> edges = { {className, "sepallength"}, {className, "sepalwidth"}, {className, "petallength"}, {className, "petalwidth"} };
-    auto network = bayesnet::Network();
-    // Add nodes to the network
+    vector<string> features;
    for (auto feature : handler.getAttributes()) {
-        cout << "Adding feature: " << feature.first << endl;
-        network.addNode(feature.first, 7);
-    }
-    network.addNode(className, 3);
-    for (auto item : edges) {
-        network.addEdge(item.first, item.second);
+        features.push_back(feature.first);
    }
+    // Discretize Dataset
+    vector<mdlp::labels_t> Xd = discretize(X, y);;
+    // Build Network    
+    auto network = bayesnet::Network();
+    network.fit(Xd, y, features, className);
    cout << "Hello, Bayesian Networks!" << endl;
-    torch::Tensor tensor = torch::eye(3);
-    cout << "Now I'll add a cycle" << endl;
-    try {
-        network.addEdge("petallength", className);
-    }
-    catch (invalid_argument& e) {
-        cout << e.what() << endl;
-    }
-    cout << tensor << std::endl;
    cout << "Nodes:" << endl;
    for (auto [name, item] : network.getNodes()) {
-        cout << "*" << item->getName() << endl;
+        cout << "*" << item->getName() << " -> " << item->getNumStates() << endl;
        cout << "-Parents:" << endl;
        for (auto parent : item->getParents()) {
            cout << " " << parent->getName() << endl;
@ -49,6 +53,6 @@ int main()
    cout << "Root: " << network.getRoot()->getName() << endl;
    network.setRoot(className);
    cout << "Now Root should be class: " << network.getRoot()->getName() << endl;
-
+    cout << "PyTorch version: " << TORCH_VERSION << endl;
    return 0;
 }
--- a/typesFImdlp.h
+++ b/typesFImdlp.h
@ -0,0 +1,18 @@
+#ifndef TYPES_H
+#define TYPES_H
+
+#include <vector>
+#include <map>
+#include <stdexcept>
+
+using namespace std;
+namespace mdlp {
+    typedef float precision_t;
+    typedef vector<precision_t> samples_t;
+    typedef vector<int> labels_t;
+    typedef vector<size_t> indices_t;
+    typedef vector<precision_t> cutPoints_t;
+    typedef map<pair<int, int>, precision_t> cacheEnt_t;
+    typedef map<tuple<int, int, int>, precision_t> cacheIg_t;
+}
+#endif