From c1531dba2abcc6e7ec36403564489128dd58ea67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?=
 <rmontanana@gmail.com>
Date: Wed, 19 Feb 2025 11:40:33 +0100
Subject: [PATCH] Complete XA1DE integration

---
 src/experimental_clfs/XA1DE.cpp | 67 +++++++++++++++++++++++++++++++--
 src/experimental_clfs/XA1DE.h   | 24 ++++++------
 src/experimental_clfs/Xaode.hpp | 33 ++++++++++------
 3 files changed, 96 insertions(+), 28 deletions(-)
diff --git a/src/experimental_clfs/XA1DE.cpp b/src/experimental_clfs/XA1DE.cpp
index 8f1f9b0..f8c2849 100644
--- a/src/experimental_clfs/XA1DE.cpp
+++ b/src/experimental_clfs/XA1DE.cpp
@@ -27,6 +27,7 @@ namespace platform {
         Timer timer, timert;
         timer.start();
         timert.start();
+        // debug = true;
         std::vector<std::vector<int>> instances = X;
         instances.push_back(y);
         int num_instances = instances[0].size();
@@ -36,6 +37,16 @@ namespace platform {
         for (int i = 0; i < num_attributes; i++) {
             statesv.push_back(*max_element(instances[i].begin(), instances[i].end()) + 1);
         }
+        // std::cout << "* States: " << statesv << std::endl;
+        // std::cout << "* Weights: " << weights_ << std::endl;
+        // std::cout << "* Instances: " << num_instances << std::endl;
+        // std::cout << "* Attributes: " << num_attributes << std::endl;
+        // std::cout << "* y: " << y << std::endl;
+        // std::cout << "* x shape: " << X.size() << "x" << X[0].size() << std::endl;
+        // for (int i = 0; i < num_attributes - 1; i++) {
+        //     std::cout << "* " << features[i] << ": " << instances[i] << std::endl;
+        // }
+        // std::cout << "Starting to build the model" << std::endl;
         aode_.init(statesv);
         aode_.duration_first += timer.getDuration(); timer.start();
         std::vector<int> instance;
@@ -54,7 +65,7 @@ namespace platform {
             // std::cout << "* Checking coherence... ";
             // aode_.checkCoherenceApprox(1e-6);
             // std::cout << "Ok!" << std::endl;
-            // aode_.show();
+            aode_.show();
             // std::cout << "* Accumulated first time: " << aode_.duration_first << std::endl;
             // std::cout << "* Accumulated second time: " << aode_.duration_second << std::endl;
             // std::cout << "* Accumulated third time: " << aode_.duration_third << std::endl;
@@ -196,6 +207,26 @@ namespace platform {
         return data;
     }
 
+    //
+    // statistics
+    //
+    int XA1DE::getNumberOfNodes() const
+    {
+        return aode_.getNumberOfNodes();
+    }
+    int XA1DE::getNumberOfEdges() const
+    {
+        return aode_.getNumberOfEdges();
+    }
+    int XA1DE::getNumberOfStates() const
+    {
+        return aode_.getNumberOfStates();
+    }
+    int XA1DE::getClassNumStates() const
+    {
+        return aode_.statesClass();
+    }
+
     //
     // Fit
     //
@@ -203,8 +234,7 @@ namespace platform {
     XA1DE& XA1DE::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing)
     {
         auto X_ = to_matrix(X);
-        int a = 1;
-        std::vector<int> y_ = to_vector<int>(y);
+        auto y_ = to_vector<int>(y);
         return fit(X_, y_, features, className, states, smoothing);
     }
     XA1DE& XA1DE::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing)
@@ -215,8 +245,37 @@ namespace platform {
     }
     XA1DE& XA1DE::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
     {
-        double b = 1;
         weights_ = to_vector<double>(weights);
         return fit(dataset, features, className, states, smoothing);
     }
+    //
+    // Predict
+    //
+    torch::Tensor XA1DE::predict(torch::Tensor& X)
+    {
+        auto X_ = to_matrix(X);
+        torch::Tensor y = torch::tensor(predict(X_));
+        return y;
+    }
+    torch::Tensor XA1DE::predict_proba(torch::Tensor& X)
+    {
+        auto X_ = to_matrix(X);
+        auto probabilities = predict_proba(X_);
+        auto n_samples = X.size(1);
+        int n_classes = probabilities[0].size();
+        auto y = torch::zeros({ n_samples, n_classes });
+        for (int i = 0; i < n_samples; i++) {
+            for (int j = 0; j < n_classes; j++) {
+                y[i][j] = probabilities[i][j];
+            }
+        }
+        return y;
+    }
+    float XA1DE::score(torch::Tensor& X, torch::Tensor& y)
+    {
+        auto X_ = to_matrix(X);
+        auto y_ = to_vector<int>(y);
+        return score(X_, y_);
+    }
+
 }
\ No newline at end of file
diff --git a/src/experimental_clfs/XA1DE.h b/src/experimental_clfs/XA1DE.h
index 1cfc371..f50bb45 100644
--- a/src/experimental_clfs/XA1DE.h
+++ b/src/experimental_clfs/XA1DE.h
@@ -24,23 +24,23 @@ namespace platform {
         virtual ~XA1DE() = default;
         const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
 
-        std::vector<std::vector<double>> predict_proba_threads(const std::vector<std::vector<int>>& test_data);
-        std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
-        float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
-        std::vector<int> predict(std::vector<std::vector<int>>& X) override;
         XA1DE& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) override;
         XA1DE& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) override;
         XA1DE& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const bayesnet::Smoothing_t smoothing) override;
         XA1DE& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
-        torch::Tensor predict(torch::Tensor& X) override { return torch::zeros(0); };
-        torch::Tensor predict_proba(torch::Tensor& X) override { return torch::zeros(0); };
-        int getNumberOfNodes() const override { return 0; };
-        int getNumberOfEdges() const override { return 0; };
-        int getNumberOfStates() const override { return 0; };
-        int getClassNumStates() const override { return 0; };
+        std::vector<int> predict(std::vector<std::vector<int>>& X) override;
+        torch::Tensor predict(torch::Tensor& X) override;
+        torch::Tensor predict_proba(torch::Tensor& X) override;
+        std::vector<std::vector<double>> predict_proba_threads(const std::vector<std::vector<int>>& test_data);
+        std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
+        float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
+        float score(torch::Tensor& X, torch::Tensor& y) override;
+        int getNumberOfNodes() const override;
+        int getNumberOfEdges() const override;
+        int getNumberOfStates() const override;
+        int getClassNumStates() const override;
         bayesnet::status_t getStatus() const override { return status; }
         std::string getVersion() override { return version; };
-        float score(torch::Tensor& X, torch::Tensor& y) override { return 0; };
         std::vector<std::string> show() const override { return {}; }
         std::vector<std::string> topological_order()  override { return {}; }
         std::vector<std::string> getNotes() const override { return notes; }
@@ -57,7 +57,7 @@ namespace platform {
         {
             double sum = std::accumulate(weights_.begin(), weights_.end(), 0.0);
             if (sum == 0) {
-                weights_ = std::vector<double>(weights_.size(), 1.0);
+                weights_ = std::vector<double>(num_instances, 1.0);
             } else {
                 for (double& w : weights_) {
                     w = w * num_instances / sum;
diff --git a/src/experimental_clfs/Xaode.hpp b/src/experimental_clfs/Xaode.hpp
index 5093853..b11d20a 100644
--- a/src/experimental_clfs/Xaode.hpp
+++ b/src/experimental_clfs/Xaode.hpp
@@ -31,7 +31,7 @@ namespace platform {
         double duration_first = 0.0;
         double duration_second = 0.0;
         double duration_third = 0.0;
-        Xaode() : nFeatures_{ 0 }, statesClass_{ 0 }, totalSize_{ 0 }, matrixState_{ MatrixState::EMPTY } {}
+        Xaode() : nFeatures_{ 0 }, statesClass_{ 0 }, matrixState_{ MatrixState::EMPTY } {}
         // -------------------------------------------------------
         // init
         // -------------------------------------------------------
@@ -84,9 +84,9 @@ namespace platform {
                 }
                 runningOffset += states_[i];
             }
-            totalSize_ = index * statesClass_;
-            data_.resize(totalSize_);
-            dataOpp_.resize(totalSize_);
+            int totalSize = index * statesClass_;
+            data_.resize(totalSize);
+            dataOpp_.resize(totalSize);
 
             classFeatureCounts_.resize(feature_offset * statesClass_);
             classFeatureProbs_.resize(feature_offset * statesClass_);
@@ -98,12 +98,6 @@ namespace platform {
             matrixState_ = MatrixState::COUNTS;
         }
 
-        // Returns the dimension of data_ (just for info).
-        int size() const
-        {
-            return totalSize_;
-        }
-
         // Returns current mode: INIT, COUNTS or PROBS
         MatrixState state() const
         {
@@ -116,7 +110,6 @@ namespace platform {
             std::cout << "-------- Xaode.show() --------" << std::endl
                 << "- nFeatures = " << nFeatures_ << std::endl
                 << "- statesClass = " << statesClass_ << std::endl
-                << "- totalSize_ = " << totalSize_ << std::endl
                 << "- matrixState = " << (matrixState_ == MatrixState::COUNTS ? "COUNTS" : "PROBS") << std::endl;
             std::cout << "- states: size: " << states_.size() << std::endl;
             for (int s : states_) std::cout << s << " "; std::cout << std::endl;
@@ -543,6 +536,23 @@ namespace platform {
         {
             return statesClass_;
         }
+        int nFeatures() const
+        {
+            return nFeatures_;
+        }
+        int getNumberOfStates() const
+        {
+            return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
+        }
+        int getNumberOfEdges() const
+        {
+            return nFeatures_ * (2 * nFeatures_ - 1);
+        }
+        int getNumberOfNodes() const
+        {
+            return (nFeatures_ + 1) * nFeatures_;
+        }
+
 
     private:
         // -----------
@@ -555,7 +565,6 @@ namespace platform {
         // data_ means p(child=sj | c, superparent= si) after normalization.
         // But in COUNTS mode, it accumulates raw counts.
         std::vector<int> pairOffset_;
-        int totalSize_;
         // data_ stores p(child=sj | c, superparent=si) for each pair (i<j).
         std::vector<double> data_;
         // dataOpp_ stores p(superparent=si | c, child=sj) for each pair (i<j).