Begin with parameter estimation

2023-06-30 21:24:12 +02:00 · 2023-06-30 21:24:12 +02:00 · 71d730d228
commit 71d730d228
parent 0a31aa2ff1
8 changed files with 236 additions and 87 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -12,4 +12,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 # add_library(BayesNet Node.cc Network.cc)
 add_executable(BayesNet main.cc ArffFiles.cc Node.cc Network.cc CPPFImdlp.cpp Metrics.cpp)
-target_link_libraries(BayesNet "${TORCH_LIBRARIES}")
+add_executable(test test.cc)
 target_link_libraries(BayesNet "${TORCH_LIBRARIES}")
 target_link_libraries(test "${TORCH_LIBRARIES}")
--- a/CPPFImdlp.cpp
+++ b/CPPFImdlp.cpp
@ -37,6 +37,7 @@ namespace mdlp {
        y = y_;
        num_cut_points = compute_max_num_cut_points();
        depth = 0;
        discretizedData.clear();
        cutPoints.clear();
        if (X.size() != y.size()) {
            throw invalid_argument("X and y must have the same size");
--- a/Network.cc
+++ b/Network.cc
@ -1,7 +1,7 @@
 #include "Network.h"
 namespace bayesnet {
-    Network::Network() : laplaceSmoothing(1), root(nullptr) {}
+    Network::Network() : laplaceSmoothing(1), root(nullptr), features(vector<string>()), className("") {}
-    Network::Network(int smoothing) : laplaceSmoothing(smoothing), root(nullptr) {}
+    Network::Network(int smoothing) : laplaceSmoothing(smoothing), root(nullptr), features(vector<string>()), className("") {}
    Network::~Network()
    {
        for (auto& pair : nodes) {
@ -10,6 +10,9 @@ namespace bayesnet {
    }
    void Network::addNode(string name, int numStates)
    {
        if (nodes.find(name) != nodes.end()) {
            throw invalid_argument("Node " + name + " already exists");
        }
        nodes[name] = new Node(name, numStates);
        if (root == nullptr) {
            root = nodes[name];
@ -32,7 +35,6 @@ namespace bayesnet {
        {
            visited.insert(nodeId);
            recStack.insert(nodeId);
            for (Node* child : nodes[nodeId]->getChildren()) {
                if (visited.find(child->getName()) == visited.end() && isCyclic(child->getName(), visited, recStack))
                    return true;
@ -55,13 +57,11 @@ namespace bayesnet {
        nodes[parent]->addChild(nodes[child]);
        nodes[child]->addParent(nodes[parent]);
        // temporarily add edge
        unordered_set<string> visited;
        unordered_set<string> recStack;
        if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
        {
-            // remove edge
+            // remove problematic edge
            nodes[parent]->removeChild(nodes[child]);
            nodes[child]->removeParent(nodes[parent]);
            throw invalid_argument("Adding this edge forms a cycle in the graph.");
@ -72,71 +72,162 @@ namespace bayesnet {
    {
        return nodes;
    }
-    void Network::buildNetwork(const vector<vector<int>>& dataset, const vector<int>& labels, const vector<string>& featureNames, const string& className)
+    void Network::buildNetwork()
    {
        // Add features as nodes to the network
-        for (int i = 0; i < featureNames.size(); ++i) {
+        for (int i = 0; i < features.size(); ++i) {
-            addNode(featureNames[i], *max_element(dataset[i].begin(), dataset[i].end()) + 1);
+            addNode(features[i], *max_element(dataset[features[i]].begin(), dataset[features[i]].end()) + 1);
        }
        // Add class as node to the network
-        addNode(className, *max_element(labels.begin(), labels.end()) + 1);
+        addNode(className, *max_element(dataset[className].begin(), dataset[className].end()) + 1);
        // Add edges from class to features => naive Bayes
-        for (auto feature : featureNames) {
+        for (auto feature : features) {
            addEdge(className, feature);
        }
        addEdge("petalwidth", "petallength");
    }
    void Network::fit(const vector<vector<int>>& dataset, const vector<int>& labels, const vector<string>& featureNames, const string& className)
    {
-        buildNetwork(dataset, labels, featureNames, className);
+        features = featureNames;
-        //estimateParameters(dataset);
+        this->className = className;
-
+        // Build dataset
-        // auto jointCounts = [](const vector<vector<int>>& data, const vector<int>& indices, int numStates) {
+        for (int i = 0; i < featureNames.size(); ++i) {
-        //     int size = indices.size();
+            this->dataset[featureNames[i]] = dataset[i];
-        //     vector<int64_t> sizes(size, numStates);
+        }
-        //     torch::Tensor counts = torch::zeros(sizes, torch::kLong);
+        this->dataset[className] = labels;
-
+        buildNetwork();
-        //     for (const auto& row : data) {
+        estimateParameters();
        //         int idx = 0;
        //         for (int i = 0; i < size; ++i) {
        //             idx = idx * numStates + row[indices[i]];
        //         }
        //         counts.view({ -1 }).add_(idx, 1);
        //     }
        //     return counts;
        //     };
        // auto marginalCounts = [](const torch::Tensor& jointCounts) {
        //     return jointCounts.sum(-1);
        //     };
        // for (auto& pair : nodes) {
        //     Node* node = pair.second;
        //     vector<int> indices;
        //     for (const auto& parent : node->getParents()) {
        //         indices.push_back(nodes[parent->getName()]->getId());
        //     }
        //     indices.push_back(node->getId());
        //     for (auto& child : node->getChildren()) {
        //         torch::Tensor counts = jointCounts(dataset, indices, node->getNumStates()) + laplaceSmoothing;
        //         torch::Tensor parentCounts = marginalCounts(counts);
        //         parentCounts = parentCounts.unsqueeze(-1);
        //         torch::Tensor cpt = counts.to(torch::kDouble) / parentCounts.to(torch::kDouble);
        //         setCPD(node->getCPDKey(child), cpt);
        //     }
        // }
    }
-    torch::Tensor& Network::getCPD(const string& key)
+    // void Network::estimateParameters()
    // {
    //     auto dimensions = vector<int64_t>();
    //     for (auto [name, node] : nodes) {
    //         // Get dimensions of the CPT
    //         dimensions.clear();
    //         dimensions.push_back(node->getNumStates());
    //         for (auto father : node->getParents()) {
    //             dimensions.push_back(father->getNumStates());
    //         }
    //         auto length = dimensions.size();
    //         // Create a tensor of zeros with the dimensions of the CPT
    //         torch::Tensor cpt = torch::zeros(dimensions, torch::kFloat);
    //         // Fill table with counts
    //         for (int n_sample = 0; n_sample < dataset[name].size(); ++n_sample) {
    //             torch::List<c10::optional<torch::Tensor>> coordinates;
    //             coordinates.push_back(torch::tensor(dataset[name][n_sample]));
    //             for (auto father : node->getParents()) {
    //                 coordinates.push_back(torch::tensor(dataset[father->getName()][n_sample]));
    //             }
    //             // Increment the count of the corresponding coordinate
    //             cpt.index_put_({ coordinates }, cpt.index({ coordinates }) + 1);
    //         }
    //         // store thre resulting cpt in the node
    //         node->setCPT(cpt);
    //     }
    // }
    // void Network::estimateParameters()
    // {
    //     // Lambda function to compute joint counts of states
    //     auto jointCounts = [this](const vector<string>& nodeNames) {
    //         int size = nodeNames.size();
    //         std::vector<int64_t> sizes(size);
    //         for (int i = 0; i < size; ++i) {
    //             sizes[i] = this->nodes[nodeNames[i]]->getNumStates();
    //         }
    //         torch::Tensor counts = torch::zeros(sizes, torch::kLong);
    //         int dataSize = this->dataset[nodeNames[0]].size();
    //         for (int dataIdx = 0; dataIdx < dataSize; ++dataIdx) {
    //             std::vector<torch::Tensor> idx(size);
    //             for (int i = 0; i < size; ++i) {
    //                 idx[i] = torch::tensor(this->dataset[nodeNames[i]][dataIdx], torch::kLong);
    //             }
    //             torch::Tensor indices = torch::stack(idx);
    //             counts.index_put_({ indices }, counts.index({ indices }) + 1);
    //         }
    //         return counts;
    //         };
    //     // Lambda function to compute marginal counts of states
    //     auto marginalCounts = [](const torch::Tensor& jointCounts) {
    //         return jointCounts.sum(-1);
    //         };
    //     for (auto& pair : nodes) {
    //         Node* node = pair.second;
    //         // Create a list of names of the node and its parents
    //         std::vector<string> nodeNames;
    //         nodeNames.push_back(node->getName());
    //         for (Node* parent : node->getParents()) {
    //             nodeNames.push_back(parent->getName());
    //         }
    //         // Compute counts and normalize to get probabilities
    //         torch::Tensor counts = jointCounts(nodeNames) + laplaceSmoothing;
    //         torch::Tensor parentCounts = marginalCounts(counts);
    //         parentCounts = parentCounts.unsqueeze(-1);
    //         // The CPT is represented as a tensor and stored in the Node
    //         node->setCPT((counts.to(torch::kDouble) / parentCounts.to(torch::kDouble)));
    //     }
    // }
    void Network::estimateParameters()
    {
-        return cpds[key];
+        // Lambda function to compute joint counts of states
        auto jointCounts = [this](const vector<string>& nodeNames) {
            int size = nodeNames.size();
            std::vector<int64_t> sizes(size);
            for (int i = 0; i < size; ++i) {
                sizes[i] = this->nodes[nodeNames[i]]->getNumStates();
            }
            torch::Tensor counts = torch::zeros(sizes, torch::kLong);
            int dataSize = this->dataset[nodeNames[0]].size();
            torch::List<c10::optional<torch::Tensor>> indices;
            for (int dataIdx = 0; dataIdx < dataSize; ++dataIdx) {
                indices.clear();
                for (int i = 0; i < size; ++i) {
                    indices.push_back(torch::tensor(this->dataset[nodeNames[i]][dataIdx], torch::kLong));
                }
                //torch::Tensor indices = torch::stack(idx);
                counts.index_put_({ indices }, counts.index({ indices }) + 1);
            }
            return counts;
            };
        // Lambda function to compute marginal counts of states
        auto marginalCounts = [](const torch::Tensor& jointCounts) {
            return jointCounts.sum(-1);
            };
        for (auto& pair : nodes) {
            Node* node = pair.second;
            // Create a list of names of the node and its parents
            std::vector<string> nodeNames;
            nodeNames.push_back(node->getName());
            for (Node* parent : node->getParents()) {
                nodeNames.push_back(parent->getName());
            }
            // Compute counts and normalize to get probabilities
            torch::Tensor counts = jointCounts(nodeNames) + laplaceSmoothing;
            torch::Tensor parentCounts = marginalCounts(counts);
            parentCounts = parentCounts.unsqueeze(-1);
            // The CPT is represented as a tensor and stored in the Node
            node->setCPT((counts.to(torch::kDouble) / parentCounts.to(torch::kDouble)));
        }
    }
    void Network::setCPD(const string& key, const torch::Tensor& cpt)
    {
        cpds[key] = cpt;
    }
 }
--- a/Network.h
+++ b/Network.h
@ -3,12 +3,16 @@
 #include "Node.h"
 #include <map>
 #include <vector>
 namespace bayesnet {
    class Network {
    private:
        map<string, Node*> nodes;
-        map<string, torch::Tensor> cpds;  // Map from CPD key to CPD tensor
+        map<string, vector<int>> dataset;
        Node* root;
        vector<string> features;
        string className;
        int laplaceSmoothing;
        bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
    public:
@ -19,9 +23,8 @@ namespace bayesnet {
        void addEdge(const string, const string);
        map<string, Node*>& getNodes();
        void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
-        void buildNetwork(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
+        void estimateParameters();
-        torch::Tensor& getCPD(const string&);
+        void buildNetwork();
        void setCPD(const string&, const torch::Tensor&);
        void setRoot(string);
        Node* getRoot();
    };
--- a/Node.cc
+++ b/Node.cc
@ -41,8 +41,12 @@ namespace bayesnet {
    {
        return numStates;
    }
-    string Node::getCPDKey(const Node* child) const
+    torch::Tensor& Node::getCPT()
    {
-        return name + "-" + child->getName();
+        return cpt;
    }
    void Node::setCPT(const torch::Tensor& cpt)
    {
        this->cpt = cpt;
    }
 }
--- a/Node.h
+++ b/Node.h
@ -12,11 +12,12 @@ namespace bayesnet {
        string name;
        vector<Node*> parents;
        vector<Node*> children;
        torch::Tensor cpTable;
        int numStates;
        torch::Tensor cpt;
    public:
        Node(const std::string&, int);
-        void addParent(Node*);        
+        void addParent(Node*);
        void addChild(Node*);
        void removeParent(Node*);
        void removeChild(Node*);
@ -27,7 +28,6 @@ namespace bayesnet {
        void setCPT(const torch::Tensor&);
        int getNumStates() const;
        int getId() const { return id; }
        string getCPDKey(const Node*) const;
    };
 }
 #endif
--- a/main.cc
+++ b/main.cc
@ -11,10 +11,15 @@ using namespace std;
 vector<mdlp::labels_t> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
 {
    vector<mdlp::labels_t>Xd;
    auto fimdlp = mdlp::CPPFImdlp();
    for (int i = 0; i < X.size(); i++) {
        fimdlp.fit(X[i], y);
-        Xd.push_back(fimdlp.transform(X[i]));
+        mdlp::labels_t& xd = fimdlp.transform(X[i]);
        cout << "X[" << i << "]: ";
        auto mm = minmax_element(xd.begin(), xd.end());
        cout << *mm.first << " " << *mm.second << endl;
        Xd.push_back(xd);
    }
    return Xd;
 }
@ -33,7 +38,7 @@ int main()
        features.push_back(feature.first);
    }
    // Discretize Dataset
-    vector<mdlp::labels_t> Xd = discretize(X, y);;
+    vector<mdlp::labels_t> Xd = discretize(X, y);
    // Build Network    
    auto network = bayesnet::Network();
    network.fit(Xd, y, features, className);
@ -53,6 +58,19 @@ int main()
    cout << "Root: " << network.getRoot()->getName() << endl;
    network.setRoot(className);
    cout << "Now Root should be class: " << network.getRoot()->getName() << endl;
    cout << "CPDs:" << endl;
    auto nodes = network.getNodes();
    auto classNode = nodes[className];
    for (auto it = nodes.begin(); it != nodes.end(); it++) {
        cout << "* Name: " << it->first << " " << it->second->getName() << " -> " << it->second->getNumStates() << endl;
        cout << "Parents: ";
        for (auto parent : it->second->getParents()) {
            cout << parent->getName() << " -> " << parent->getNumStates() << ", ";
        }
        cout << endl;
        auto cpd = it->second->getCPT();
        cout << cpd << endl;
    }
    cout << "PyTorch version: " << TORCH_VERSION << endl;
    return 0;
 }
--- a/test.cc
+++ b/test.cc
@ -1,23 +1,53 @@
-#include <map>
+// #include <torch/torch.h>
 #include <string>
 #include <iostream>
-using namespace std;
+// int main()
 // {
 //     torch::Tensor t = torch::rand({ 5, 5 });
-int main(int argc, char const* argv[])
+//     // Print original tensor
 //     std::cout << t << std::endl;
 //     // New value
 //     torch::Tensor new_val = torch::tensor(10.0f);
 //     // Indices for the cell you want to update
 //     auto index_i = torch::tensor({ 2 });
 //     auto index_j = torch::tensor({ 3 });
 //     // Update cell
 //     t.index_put_({ index_i, index_j }, new_val);
 //     // Print updated tensor
 //     std::cout << t << std::endl;
 // }
 #include <torch/torch.h>
 int main()
 {
-    map<string, int> m;
+    torch::Tensor t = torch::rand({ 5, 4, 3 }); // 3D tensor for this example
-    m["a"] = 1;
+    int i = 3, j = 1, k = 2; // Indices for the cell you want to update
-    m["b"] = 2;
+    // Print original tensor
-    m["c"] = 3;
+    std::cout << t << std::endl;
    if (m.find("b") != m.end()) {
        cout << "Found b" << endl;
    } else {
        cout << "Not found b" << endl;
    }
    // for (auto [key, value] : m) {
    //     cout << key << " " << value << endl;
    // }
-    return 0;
+    // New value
    torch::Tensor new_val = torch::tensor(10.0f);
    // Indices for the cell you want to update
    std::vector<torch::Tensor> indices;
    indices.push_back(torch::tensor(i)); // Replace i with your index for the 1st dimension
    indices.push_back(torch::tensor(j)); // Replace j with your index for the 2nd dimension
    indices.push_back(torch::tensor(k)); // Replace k with your index for the 3rd dimension
    //torch::ArrayRef<at::indexing::TensorIndex> indices_ref(indices);
    // Update cell
    //torch::Tensor result = torch::stack(indices);
    //torch::List<c10::optional<torch::Tensor>> indices_list = { torch::tensor(i), torch::tensor(j), torch::tensor(k) };
    torch::List<c10::optional<torch::Tensor>> indices_list;
    indices_list.push_back(torch::tensor(i));
    indices_list.push_back(torch::tensor(j));
    indices_list.push_back(torch::tensor(k));
    //t.index_put_({ torch::tensor(i), torch::tensor(j), torch::tensor(k) }, new_val);
    t.index_put_(indices_list, new_val);
    // Print updated tensor
    std::cout << t << std::endl;
 }