First working version

2025-03-08 14:20:27 +01:00
parent 7a23782b05
commit 9c1852c6c3
6 changed files with 148 additions and 584 deletions
--- a/src/experimental_clfs/ExpClf.h
+++ b/src/experimental_clfs/ExpClf.h
@@ -11,11 +11,11 @@
 #include <cmath>
 #include <algorithm>
 #include <limits>
-#include "bayesnet/ensembles/Boost.h"
+#include <bayesnet/ensembles/Boost.h>
+#include <bayesnet/network/Smoothing.h>
 #include "common/Timer.hpp"
 #include "CountingSemaphore.hpp"
 #include "Xaode.hpp"
-#include "Xaode2.hpp"

 namespace platform {
    class ExpClf : public bayesnet::Boost {
@@ -45,8 +45,7 @@ namespace platform {
        void remove_last_parent();
    protected:
        bool debug = false;
-        // Xaode aode;
-        Xaode2 aode_;
+        Xaode aode_;
        torch::Tensor weights_;
        const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
        inline void normalize_weights(int num_instances)
--- a/src/experimental_clfs/XA1DE.h
+++ b/src/experimental_clfs/XA1DE.h
@@ -8,6 +8,7 @@
 #define XA1DE_H
 #include "Xaode.hpp"
 #include "ExpClf.h"
+#include <bayesnet/network/Smoothing.h>

 namespace platform {
    class XA1DE : public ExpClf {
--- a/src/experimental_clfs/XBAODE.cpp
+++ b/src/experimental_clfs/XBAODE.cpp
@@ -37,7 +37,7 @@ namespace platform {
        // Algorithm based on the adaboost algorithm for classification
        // as explained in Ensemble methods (Zhi-Hua Zhou, 2012)
        double alpha_t = 0;
-        weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
+        weights_ = torch::full({ m }, 1.0 / static_cast<double>(m), torch::kFloat64);
        bool finished = false;
        std::vector<int> featuresUsed;
        aode_.fit(X_train_, y_train_, features, className, states, weights_, false);
@@ -88,8 +88,7 @@ namespace platform {
                auto feature = featureSelection[0];
                featureSelection.erase(featureSelection.begin());
                auto model = XSpode(feature);
-                model.fit(X_train_, y_train_, weights_);
-                alpha_t = 0.0;
+                model.fit(X_train_, y_train_, weights_, smoothing);
                std::vector<int> ypred;
                if (alpha_block) {
                    //
--- a/src/experimental_clfs/XSpode.hpp
+++ b/src/experimental_clfs/XSpode.hpp
@@ -11,22 +11,13 @@
 #include <limits>
 #include <sstream>
 #include <iostream>
+#include "CountingSemaphore.hpp"
+

 namespace platform {

    class XSpode {
    public:
-        // --------------------------------------
-        // The SPODE can be EMPTY (just created),
-        // in COUNTS mode (accumulating raw counts),
-        // or in PROBS mode (storing conditional probabilities).
-        // --------------------------------------
-        enum class MatrixState {
-            EMPTY,
-            COUNTS,
-            PROBS
-        };
-
        // --------------------------------------
        // Constructor
        //
@@ -36,8 +27,8 @@ namespace platform {
            : superParent_{ spIndex },
            nFeatures_{ 0 },
            statesClass_{ 0 },
-            matrixState_{ MatrixState::EMPTY },
-            alpha_{ 1.0 }
+            alpha_{ 1.0 },
+            semaphore_{ CountingSemaphore::getInstance() }
        {
        }

@@ -61,7 +52,7 @@ namespace platform {
        // --------------------------------------
        void fit(const std::vector<std::vector<int>>& X,
            const std::vector<int>& y,
-            const torch::Tensor& weights)
+            const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
        {
            int numInstances = static_cast<int>(y.size());
            nFeatures_ = static_cast<int>(X.size());
@@ -99,9 +90,6 @@ namespace platform {
            }
            childCounts_.resize(totalSize, 0.0);

-            // Switch to COUNTS mode
-            matrixState_ = MatrixState::COUNTS;
-
            // Accumulate raw counts
            for (int n = 0; n < numInstances; n++) {
                std::vector<int> instance(nFeatures_ + 1);
@@ -112,11 +100,20 @@ namespace platform {
                addSample(instance, weights[n].item<double>());
            }

-            // Laplace smoothing scaled to #instances
-            alpha_ = 1.0 / static_cast<double>(numInstances);
+            switch (smoothing) {
+                case bayesnet::Smoothing_t::ORIGINAL:
+                    alpha_ = 1.0 / numInstances;
+                    break;
+                case bayesnet::Smoothing_t::LAPLACE:
+                    alpha_ = 1.0;
+                    break;
+                default:
+                    alpha_ = 0.0; // No smoothing 
+            }
            initializer_ = initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
            // Convert raw counts to probabilities
            computeProbabilities();
+            fitted_ = true;
        }

        // --------------------------------------
@@ -128,9 +125,6 @@ namespace platform {
        //
        void addSample(const std::vector<int>& instance, double weight)
        {
-            if (matrixState_ != MatrixState::COUNTS) {
-                throw std::logic_error("addSample: Not in COUNTS mode!");
-            }
            if (weight <= 0.0) return;

            int c = instance.back();
@@ -167,10 +161,6 @@ namespace platform {
        // --------------------------------------
        void computeProbabilities()
        {
-            if (matrixState_ != MatrixState::COUNTS) {
-                throw std::logic_error("computeProbabilities: must be in COUNTS mode.");
-            }
-
            double totalCount = std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);

            // p(c) => classPriors_
@@ -225,7 +215,6 @@ namespace platform {
                }
            }

-            matrixState_ = MatrixState::PROBS;
        }

        // --------------------------------------
@@ -239,10 +228,6 @@ namespace platform {
        // --------------------------------------
        std::vector<double> predict_proba(const std::vector<int>& instance) const
        {
-            if (matrixState_ != MatrixState::PROBS) {
-                throw std::logic_error("predict_proba: the model is not in PROBS mode.");
-            }
-
            std::vector<double> probs(statesClass_, 0.0);

            // Multiply p(c) × p(x_sp | c)
@@ -270,6 +255,41 @@ namespace platform {
            normalize(probs);
            return probs;
        }
+        std::vector<std::vector<double>> predict_proba(const std::vector<std::vector<int>>& test_data)
+        {
+            int test_size = test_data[0].size();
+            int sample_size = test_data.size();
+            auto probabilities = std::vector<std::vector<double>>(test_size, std::vector<double>(statesClass_));
+
+            int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
+            std::vector<std::thread> threads;
+            auto worker = [&](const std::vector<std::vector<int>>& samples, int begin, int chunk, int sample_size, std::vector<std::vector<double>>& predictions) {
+                std::string threadName = "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
+#if defined(__linux__)
+                pthread_setname_np(pthread_self(), threadName.c_str());
+#else
+                pthread_setname_np(threadName.c_str());
+#endif
+
+                std::vector<int> instance(sample_size);
+                for (int sample = begin; sample < begin + chunk; ++sample) {
+                    for (int feature = 0; feature < sample_size; ++feature) {
+                        instance[feature] = samples[feature][sample];
+                    }
+                    predictions[sample] = predict_proba(instance);
+                }
+                semaphore_.release();
+                };
+            for (int begin = 0; begin < test_size; begin += chunk_size) {
+                int chunk = std::min(chunk_size, test_size - begin);
+                semaphore_.acquire();
+                threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
+            }
+            for (auto& thread : threads) {
+                thread.join();
+            }
+            return probabilities;
+        }

        // --------------------------------------
        // predict
@@ -283,13 +303,19 @@ namespace platform {
            return static_cast<int>(std::distance(p.begin(),
                std::max_element(p.begin(), p.end())));
        }
-        std::vector<int> predict(const std::vector<std::vector<int>>& X) const
+        std::vector<int> predict(std::vector<std::vector<int>>& test_data)
        {
-            std::vector<int> preds;
-            for (const auto& instance : X) {
-                preds.push_back(predict(instance));
+            if (!fitted_) {
+                throw std::logic_error(CLASSIFIER_NOT_FITTED);
            }
-            return preds;
+            auto probabilities = predict_proba(test_data);
+            std::vector<int> predictions(probabilities.size(), 0);
+
+            for (size_t i = 0; i < probabilities.size(); i++) {
+                predictions[i] = std::distance(probabilities[i].begin(), std::max_element(probabilities[i].begin(), probabilities[i].end()));
+            }
+
+            return predictions;
        }

        // --------------------------------------
@@ -317,9 +343,6 @@ namespace platform {
                << "nFeatures_  = " << nFeatures_ << "\n"
                << "superParent_ = " << superParent_ << "\n"
                << "statesClass_ = " << statesClass_ << "\n"
-                << "matrixState_ = "
-                << (matrixState_ == MatrixState::EMPTY ? "EMPTY"
-                    : (matrixState_ == MatrixState::COUNTS ? "COUNTS" : "PROBS"))
                << "\n";

            oss << "States: [";
@@ -366,8 +389,11 @@ namespace platform {
        int superParent_;                  // which feature is the single super-parent
        int nFeatures_;
        int statesClass_;
+        bool fitted_ = false;
        std::vector<int> states_;          // [states_feat0, ..., states_feat(N-1)] (class not included in this array)

+        const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
+
        // Class counts
        std::vector<double> classCounts_;  // [c], accumulative
        std::vector<double> classPriors_;  // [c], after normalization
@@ -384,9 +410,9 @@ namespace platform {
        std::vector<double> childProbs_;
        std::vector<int>    childOffsets_;

-        MatrixState matrixState_;
        double alpha_ = 1.0;
        double initializer_; // for numerical stability
+        CountingSemaphore& semaphore_;
    };

 } // namespace platform
--- a/src/experimental_clfs/Xaode.hpp
+++ b/src/experimental_clfs/Xaode.hpp
@@ -9,15 +9,17 @@
 #ifndef XAODE_H
 #define XAODE_H
 #include <vector>
+#include <map>
 #include <stdexcept>
 #include <algorithm>
 #include <numeric>
-#include <iostream>
 #include <string>
 #include <cmath>
 #include <limits>
+#include <sstream>
 #include <torch/torch.h>

+
 namespace platform {
    class Xaode {
    public:
@@ -108,30 +110,39 @@ namespace platform {
                instance[nFeatures_] = y[n_instance];
                addSample(instance, weights[n_instance].item<double>());
            }
+            // alpha_ Laplace smoothing adapted to the number of instances
+            alpha_ = 1.0 / static_cast<double>(num_instances);
+            initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
            computeProbabilities();
        }
-        // Optional: print a quick summary
-        void show() const
+        std::string to_string() const
        {
-            std::cout << "-------- Xaode.show() --------" << std::endl
+            std::ostringstream ostream;
+            ostream << "-------- Xaode.status --------" << std::endl
                << "- nFeatures = " << nFeatures_ << std::endl
                << "- statesClass = " << statesClass_ << std::endl
                << "- matrixState = " << (matrixState_ == MatrixState::COUNTS ? "COUNTS" : "PROBS") << std::endl;
-            std::cout << "- states: size: " << states_.size() << std::endl;
-            for (int s : states_) std::cout << s << " "; std::cout << std::endl;
-            std::cout << "- classCounts: size: " << classCounts_.size() << std::endl;
-            for (double cc : classCounts_) std::cout << cc << " "; std::cout << std::endl;
-            std::cout << "- classFeatureCounts: size: " << classFeatureCounts_.size() << std::endl;
-            for (double cfc : classFeatureCounts_) std::cout << cfc << " "; std::cout << std::endl;
-            std::cout << "- classFeatureProbs: size: " << classFeatureProbs_.size() << std::endl;
-            for (double cfp : classFeatureProbs_) std::cout << cfp << " "; std::cout << std::endl;
-            std::cout << "- featureClassOffset: size: " << featureClassOffset_.size() << std::endl;
-            for (int f : featureClassOffset_) std::cout << f << " "; std::cout << std::endl;
-            std::cout << "- pairOffset_: size: " << pairOffset_.size() << std::endl;
-            for (int p : pairOffset_) std::cout << p << " "; std::cout << std::endl;
-            std::cout << "- data: size: " << data_.size() << std::endl;
-            for (double d : data_) std::cout << d << " "; std::cout << std::endl;
-            std::cout << "--------------------------------" << std::endl;
+            ostream << "- states: size: " << states_.size() << std::endl;
+            for (int s : states_) ostream << s << " "; ostream << std::endl;
+            ostream << "- classCounts: size: " << classCounts_.size() << std::endl;
+            for (double cc : classCounts_) ostream << cc << " "; ostream << std::endl;
+            ostream << "- classPriors: size: " << classPriors_.size() << std::endl;
+            for (double cp : classPriors_) ostream << cp << " "; ostream << std::endl;
+            ostream << "- classFeatureCounts: size: " << classFeatureCounts_.size() << std::endl;
+            for (double cfc : classFeatureCounts_) ostream << cfc << " "; ostream << std::endl;
+            ostream << "- classFeatureProbs: size: " << classFeatureProbs_.size() << std::endl;
+            for (double cfp : classFeatureProbs_) ostream << cfp << " "; ostream << std::endl;
+            ostream << "- featureClassOffset: size: " << featureClassOffset_.size() << std::endl;
+            for (int f : featureClassOffset_) ostream << f << " "; ostream << std::endl;
+            ostream << "- pairOffset_: size: " << pairOffset_.size() << std::endl;
+            for (int p : pairOffset_) ostream << p << " "; ostream << std::endl;
+            ostream << "- data: size: " << data_.size() << std::endl;
+            for (double d : data_) ostream << d << " "; ostream << std::endl;
+            ostream << "- dataOpp: size: " << dataOpp_.size() << std::endl;
+            for (double d : dataOpp_) ostream << d << " "; ostream << std::endl;
+            ostream << "--------------------------------" << std::endl;
+            std::string output = ostream.str();
+            return output;
        }
        // -------------------------------------------------------
        // addSample (only in COUNTS mode)
@@ -146,18 +157,7 @@ namespace platform {
            // (B) increment feature–class counts => for p(x_i|c)
            // (C) increment pair (superparent= i, child= j) counts => data_ 
            //
-
-            // if (matrixState_ != MatrixState::COUNTS) {
-            //     throw std::logic_error("addSample: not in COUNTS mode.");
-            // }
-            // if (static_cast<int>(instance.size()) != nFeatures_ + 1) {
-            //     throw std::invalid_argument("addSample: instance.size() must be nFeatures_ + 1.");
-            // }
-
            int c = instance.back();
-            // if (c < 0 || c >= statesClass_) {
-            //     throw std::out_of_range("addSample: class index out of range.");
-            // }
            if (weight <= 0.0) {
                return;
            }
@@ -166,17 +166,17 @@ namespace platform {

            // (B,C)
            // We'll store raw counts now and turn them into p(child| c, superparent) later.
-            int idx, fcIndex, si, sj, i_offset;
-            for (int i = 0; i < nFeatures_; ++i) {
-                si = instance[i];
+            int idx, fcIndex, sp, sc, i_offset;
+            for (int parent = 0; parent < nFeatures_; ++parent) {
+                sp = instance[parent];
                // (B) increment feature–class counts => for p(x_i|c)
-                fcIndex = (featureClassOffset_[i] + si) * statesClass_ + c;
+                fcIndex = (featureClassOffset_[parent] + sp) * statesClass_ + c;
                classFeatureCounts_[fcIndex] += weight;
                // (C) increment pair (superparent= i, child= j) counts => data_
-                i_offset = pairOffset_[featureClassOffset_[i] + si];
-                for (int j = 0; j < i; ++j) {
-                    sj = instance[j];
-                    idx = (i_offset + featureClassOffset_[j] + sj) * statesClass_ + c;
+                i_offset = pairOffset_[featureClassOffset_[parent] + sp];
+                for (int child = 0; child < parent; ++child) {
+                    sc = instance[child];
+                    idx = (i_offset + featureClassOffset_[child] + sc) * statesClass_ + c;
                    data_[idx] += weight;
                }
            }
@@ -205,36 +205,26 @@ namespace platform {
                }
            } else {
                for (int c = 0; c < statesClass_; ++c) {
-                    classPriors_[c] = classCounts_[c] / totalCount;
+                    classPriors_[c] = (classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_);
                }
            }
            // (2) p(x_i=si | c) => classFeatureProbs_
            int idx, sf;
-            double denom, countVal, p;
+            double denom;
            for (int feature = 0; feature < nFeatures_; ++feature) {
                sf = states_[feature];
                for (int c = 0; c < statesClass_; ++c) {
-                    denom = classCounts_[c] * sf;
-                    if (denom <= 0.0) {
-                        // fallback => uniform
-                        for (int sf_value = 0; sf_value < sf; ++sf_value) {
-                            idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c;
-                            classFeatureProbs_[idx] = 1.0 / sf;
-                        }
-                    } else {
-                        for (int sf_value = 0; sf_value < sf; ++sf_value) {
-                            idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c;
-                            countVal = classFeatureCounts_[idx];
-                            p = ((countVal + alpha_ / (statesClass_ * states_[feature])) / (totalCount + alpha_));
-                            classFeatureProbs_[idx] = p;
-                        }
+                    denom = classCounts_[c] + alpha_ * sf;
+                    for (int sf_value = 0; sf_value < sf; ++sf_value) {
+                        idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c;
+                        classFeatureProbs_[idx] = (classFeatureCounts_[idx] + alpha_) / denom;
                    }
                }
            }
            // getCountFromTable(int classVal, int pIndex, int childIndex)
            // (3) p(x_c=sc | c, x_p=sp) => data_(parent,sp,child,sc,c)
            // (3) p(x_p=sp | c, x_c=sc) => dataOpp_(child,sc,parent,sp,c)
-            //                    C(x_c, x_p, c) + alpha_/Card(xp)
+            //                    C(x_c, x_p, c) + alpha_
            // P(x_p | x_c, c) = -----------------------------------
            //                           C(x_c, c) + alpha_
            double pcc_count, pc_count, cc_count;
@@ -258,10 +248,10 @@ namespace platform {
                                // Child, Class count
                                cc_count = classFeatureCounts_[part2_class + c];
                                // p(x_c=sc | c, x_p=sp)
-                                conditionalProb = (pcc_count + alpha_ / states_[parent]) / (cc_count + alpha_);
+                                conditionalProb = (pcc_count + alpha_) / (pc_count + alpha_ * states_[child]);
                                data_[idx] = conditionalProb;
                                // p(x_p=sp | c, x_c=sc)
-                                oppositeCondProb = (pcc_count + alpha_ / states_[child]) / (pc_count + alpha_);
+                                oppositeCondProb = (pcc_count + alpha_) / (cc_count + alpha_ * states_[parent]);
                                dataOpp_[idx] = oppositeCondProb;
                            }
                        }
@@ -286,30 +276,39 @@ namespace platform {
        {
            // accumulates posterior probabilities for each class
            auto probs = std::vector<double>(statesClass_);
-            auto spodeProbs = std::vector<double>(statesClass_);
+            auto spodeProbs = std::vector<double>(statesClass_, 0.0);
+            if (std::find(active_parents.begin(), active_parents.end(), parent) == active_parents.end()) {
+                return spodeProbs;
+            }
            // Initialize the probabilities with the feature|class probabilities x class priors
            int localOffset;
            int sp = instance[parent];
            localOffset = (featureClassOffset_[parent] + sp) * statesClass_;
            for (int c = 0; c < statesClass_; ++c) {
-                spodeProbs[c] = classFeatureProbs_[localOffset + c] * classPriors_[c];
+                spodeProbs[c] = classFeatureProbs_[localOffset + c] * classPriors_[c] * initializer_;
            }
            int idx, base, sc, parent_offset;
-            sp = instance[parent];
-            parent_offset = pairOffset_[featureClassOffset_[parent] + sp];
            for (int child = 0; child < nFeatures_; ++child) {
                if (child == parent) {
                    continue;
                }
                sc = instance[child];
-                base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_;
+                if (child > parent) {
+                    parent_offset = pairOffset_[featureClassOffset_[child] + sc];
+                    base = (parent_offset + featureClassOffset_[parent] + sp) * statesClass_;
+                } else {
+                    parent_offset = pairOffset_[featureClassOffset_[parent] + sp];
+                    base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_;
+                }
                for (int c = 0; c < statesClass_; ++c) {
                    /*
                    * The probability P(xc|xp,c) is stored in dataOpp_, and
                    * the probability P(xp|xc,c) is stored in data_
                    */
                    idx = base + c;
-                    spodeProbs[c] *= child < parent ? dataOpp_[idx] : data_[idx];
+                    double factor = child > parent ? dataOpp_[idx] : data_[idx];
+                    // double factor = data_[idx];
+                    spodeProbs[c] *= factor;
                }
            }
            // Normalize the probabilities
@@ -345,7 +344,7 @@ namespace platform {
                }
                localOffset = (featureClassOffset_[feature] + instance[feature]) * statesClass_;
                for (int c = 0; c < statesClass_; ++c) {
-                    spodeProbs[feature][c] = classFeatureProbs_[localOffset + c] * classPriors_[c];
+                    spodeProbs[feature][c] = classFeatureProbs_[localOffset + c] * classPriors_[c] * initializer_;
                }
            }
            int idx, base, sp, sc, parent_offset;
@@ -358,15 +357,23 @@ namespace platform {
                parent_offset = pairOffset_[featureClassOffset_[parent] + sp];
                for (int child = 0; child < parent; ++child) {
                    sc = instance[child];
-                    base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_;
+                    if (child > parent) {
+                        parent_offset = pairOffset_[featureClassOffset_[child] + sc];
+                        base = (parent_offset + featureClassOffset_[parent] + sp) * statesClass_;
+                    } else {
+                        parent_offset = pairOffset_[featureClassOffset_[parent] + sp];
+                        base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_;
+                    }
                    for (int c = 0; c < statesClass_; ++c) {
                        /*
                         * The probability P(xc|xp,c) is stored in dataOpp_, and
                         * the probability P(xp|xc,c) is stored in data_
                         */
                        idx = base + c;
-                        spodeProbs[child][c] *= data_[idx];
-                        spodeProbs[parent][c] *= dataOpp_[idx];
+                        double factor_child = child > parent ? data_[idx] : dataOpp_[idx];
+                        double factor_parent = child > parent ? dataOpp_[idx] : data_[idx];
+                        spodeProbs[child][c] *= factor_child;
+                        spodeProbs[parent][c] *= factor_parent;
                    }
                }
            }
@@ -454,7 +461,8 @@ namespace platform {

        MatrixState matrixState_;

-        double alpha_ = 1.0;
+        double alpha_ = 1.0; // Laplace smoothing
+        double initializer_ = 1.0;
        std::vector<int> active_parents;
    };
 }
--- a/src/experimental_clfs/Xaode2.hpp
+++ b/src/experimental_clfs/Xaode2.hpp
@@ -1,469 +0,0 @@
-// ***************************************************************
-// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
-// SPDX-FileType: SOURCE
-// SPDX-License-Identifier: MIT
-// ***************************************************************
-// Based on the Geoff. I. Webb A1DE java algorithm
-// https://weka.sourceforge.io/packageMetaData/AnDE/Latest.html
-
-#ifndef XAODE2_H
-#define XAODE2_H
-#include <vector>
-#include <map>
-#include <stdexcept>
-#include <algorithm>
-#include <numeric>
-#include <string>
-#include <cmath>
-#include <limits>
-#include <sstream>
-
-#include <iostream>
-
-namespace platform {
-    class Xaode2 {
-    public:
-        // -------------------------------------------------------
-        // The Xaode can be EMPTY (just created), in COUNTS mode (accumulating raw counts)
-        // or PROBS mode (storing conditional probabilities).
-        enum class MatrixState {
-            EMPTY,
-            COUNTS,
-            PROBS
-        };
-        std::vector<double> significance_models_;
-        Xaode2() : nFeatures_{ 0 }, statesClass_{ 0 }, matrixState_{ MatrixState::EMPTY } {}
-        // -------------------------------------------------------
-        // fit
-        // -------------------------------------------------------
-        //
-        // Classifiers interface
-        // all parameter decide if the model is initialized with all the parents active or none of them
-        //
-        // states.size() = nFeatures + 1,
-        //   where states.back() = number of class states.
-        //
-        // We'll store:
-        //  1) p(x_i=si | c) in classFeatureProbs_
-        //  2) p(x_j=sj | c, x_i=si) in data_, with i<j => i is "superparent," j is "child."
-        //
-        // Internally, in COUNTS mode, data_ accumulates raw counts, then
-        // computeProbabilities(...) normalizes them into conditionals.
-        void fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bool all_parents)
-        {
-            int num_instances = X[0].size();
-            nFeatures_ = X.size();
-
-            significance_models_.resize(nFeatures_, (all_parents ? 1.0 : 0.0));
-            for (int i = 0; i < nFeatures_; i++) {
-                if (all_parents) active_parents.push_back(i);
-                states_.push_back(*max_element(X[i].begin(), X[i].end()) + 1);
-            }
-            states_.push_back(*max_element(y.begin(), y.end()) + 1);
-            //
-            statesClass_ = states_.back();
-            classCounts_.resize(statesClass_, 0.0);
-            classPriors_.resize(statesClass_, 0.0);
-            //
-            // Initialize data structures
-            //
-            active_parents.resize(nFeatures_);
-            int totalStates = std::accumulate(states_.begin(), states_.end(), 0) - statesClass_;
-
-            // For p(x_i=si | c), we store them in a 1D array classFeatureProbs_ after we compute.
-            // We'll need the offsets for each feature i in featureClassOffset_.
-            featureClassOffset_.resize(nFeatures_);
-            // We'll store p(x_child=sj | c, x_sp=si) for each pair (i<j).
-            // So data_(i, si, j, sj, c) indexes into a big 1D array with an offset.
-            // For p(x_i=si | c), we store them in a 1D array classFeatureProbs_ after we compute.
-            // We'll need the offsets for each feature i in featureClassOffset_.
-            featureClassOffset_.resize(nFeatures_);
-            pairOffset_.resize(totalStates);
-            int feature_offset = 0;
-            int runningOffset = 0;
-            int feature = 0, index = 0;
-            for (int i = 0; i < nFeatures_; ++i) {
-                featureClassOffset_[i] = feature_offset;
-                feature_offset += states_[i];
-                for (int j = 0; j < states_[i]; ++j) {
-                    pairOffset_[feature++] = index;
-                    index += runningOffset;
-                }
-                runningOffset += states_[i];
-            }
-            int totalSize = index * statesClass_;
-            data_.resize(totalSize);
-            dataOpp_.resize(totalSize);
-
-            classFeatureCounts_.resize(feature_offset * statesClass_);
-            classFeatureProbs_.resize(feature_offset * statesClass_);
-
-            matrixState_ = MatrixState::COUNTS;
-            //
-            // Add samples
-            //
-            std::vector<int> instance(nFeatures_ + 1);
-            for (int n_instance = 0; n_instance < num_instances; n_instance++) {
-                for (int feature = 0; feature < nFeatures_; feature++) {
-                    instance[feature] = X[feature][n_instance];
-                }
-                instance[nFeatures_] = y[n_instance];
-                addSample(instance, weights[n_instance].item<double>());
-            }
-            // alpha_ Laplace smoothing adapted to the number of instances
-            alpha_ = 1.0 / static_cast<double>(num_instances);
-            initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
-            computeProbabilities();
-        }
-        std::string to_string() const
-        {
-            std::ostringstream ostream;
-            ostream << "-------- Xaode.status --------" << std::endl
-                << "- nFeatures = " << nFeatures_ << std::endl
-                << "- statesClass = " << statesClass_ << std::endl
-                << "- matrixState = " << (matrixState_ == MatrixState::COUNTS ? "COUNTS" : "PROBS") << std::endl;
-            ostream << "- states: size: " << states_.size() << std::endl;
-            for (int s : states_) ostream << s << " "; ostream << std::endl;
-            ostream << "- classCounts: size: " << classCounts_.size() << std::endl;
-            for (double cc : classCounts_) ostream << cc << " "; ostream << std::endl;
-            ostream << "- classPriors: size: " << classPriors_.size() << std::endl;
-            for (double cp : classPriors_) ostream << cp << " "; ostream << std::endl;
-            ostream << "- classFeatureCounts: size: " << classFeatureCounts_.size() << std::endl;
-            for (double cfc : classFeatureCounts_) ostream << cfc << " "; ostream << std::endl;
-            ostream << "- classFeatureProbs: size: " << classFeatureProbs_.size() << std::endl;
-            for (double cfp : classFeatureProbs_) ostream << cfp << " "; ostream << std::endl;
-            ostream << "- featureClassOffset: size: " << featureClassOffset_.size() << std::endl;
-            for (int f : featureClassOffset_) ostream << f << " "; ostream << std::endl;
-            ostream << "- pairOffset_: size: " << pairOffset_.size() << std::endl;
-            for (int p : pairOffset_) ostream << p << " "; ostream << std::endl;
-            ostream << "- data: size: " << data_.size() << std::endl;
-            for (double d : data_) ostream << d << " "; ostream << std::endl;
-            ostream << "- dataOpp: size: " << dataOpp_.size() << std::endl;
-            for (double d : dataOpp_) ostream << d << " "; ostream << std::endl;
-            ostream << "--------------------------------" << std::endl;
-            std::string output = ostream.str();
-            return output;
-        }
-        // -------------------------------------------------------
-        // addSample (only in COUNTS mode)
-        // -------------------------------------------------------
-        // 
-        // instance should have the class at the end.
-        // 
-        void addSample(const std::vector<int>& instance, double weight)
-        {
-            //
-            // (A) increment classCounts_
-            // (B) increment feature–class counts => for p(x_i|c)
-            // (C) increment pair (superparent= i, child= j) counts => data_ 
-            //
-            int c = instance.back();
-            if (weight <= 0.0) {
-                return;
-            }
-            // (A) increment classCounts_
-            classCounts_[c] += weight;
-
-            // (B,C)
-            // We'll store raw counts now and turn them into p(child| c, superparent) later.
-            int idx, fcIndex, sp, sc, i_offset;
-            for (int parent = 0; parent < nFeatures_; ++parent) {
-                sp = instance[parent];
-                // (B) increment feature–class counts => for p(x_i|c)
-                fcIndex = (featureClassOffset_[parent] + sp) * statesClass_ + c;
-                classFeatureCounts_[fcIndex] += weight;
-                // (C) increment pair (superparent= i, child= j) counts => data_
-                i_offset = pairOffset_[featureClassOffset_[parent] + sp];
-                for (int child = 0; child < parent; ++child) {
-                    sc = instance[child];
-                    idx = (i_offset + featureClassOffset_[child] + sc) * statesClass_ + c;
-                    data_[idx] += weight;
-                }
-            }
-        }
-        // -------------------------------------------------------
-        // computeProbabilities
-        // -------------------------------------------------------
-        //
-        // Once all samples are added in COUNTS mode, call this to:
-        //  1) compute p(c) => classPriors_
-        //  2) compute p(x_i=si | c) => classFeatureProbs_
-        //  3) compute p(x_j=sj | c, x_i=si) => data_ (for i<j) dataOpp_ (for i>j)
-        //
-        void computeProbabilities()
-        {
-            if (matrixState_ != MatrixState::COUNTS) {
-                throw std::logic_error("computeProbabilities: must be in COUNTS mode.");
-            }
-            double totalCount = std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);
-            // (1) p(c)
-            if (totalCount <= 0.0) {
-                // fallback => uniform
-                double unif = 1.0 / statesClass_;
-                for (int c = 0; c < statesClass_; ++c) {
-                    classPriors_[c] = unif;
-                }
-            } else {
-                for (int c = 0; c < statesClass_; ++c) {
-                    classPriors_[c] = (classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_);
-                }
-            }
-            // (2) p(x_i=si | c) => classFeatureProbs_
-            int idx, sf;
-            double denom;
-            for (int feature = 0; feature < nFeatures_; ++feature) {
-                sf = states_[feature];
-                for (int c = 0; c < statesClass_; ++c) {
-                    denom = classCounts_[c] + alpha_ * sf;
-                    for (int sf_value = 0; sf_value < sf; ++sf_value) {
-                        idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c;
-                        classFeatureProbs_[idx] = (classFeatureCounts_[idx] + alpha_) / denom;
-                    }
-                }
-            }
-            // getCountFromTable(int classVal, int pIndex, int childIndex)
-            // (3) p(x_c=sc | c, x_p=sp) => data_(parent,sp,child,sc,c)
-            // (3) p(x_p=sp | c, x_c=sc) => dataOpp_(child,sc,parent,sp,c)
-            //                    C(x_c, x_p, c) + alpha_
-            // P(x_p | x_c, c) = -----------------------------------
-            //                           C(x_c, c) + alpha_
-            double pcc_count, pc_count, cc_count;
-            double conditionalProb, oppositeCondProb;
-            int part1, part2, p1, part2_class, p1_class;
-            for (int parent = 1; parent < nFeatures_; ++parent) {
-                for (int sp = 0; sp < states_[parent]; ++sp) {
-                    p1 = featureClassOffset_[parent] + sp;
-                    part1 = pairOffset_[p1];
-                    p1_class = p1 * statesClass_;
-                    for (int child = 0; child < parent; ++child) {
-                        for (int sc = 0; sc < states_[child]; ++sc) {
-                            part2 = featureClassOffset_[child] + sc;
-                            part2_class = part2 * statesClass_;
-                            for (int c = 0; c < statesClass_; c++) {
-                                idx = (part1 + part2) * statesClass_ + c;
-                                // Parent, Child, Class Count
-                                pcc_count = data_[idx];
-                                // Parent, Class count
-                                pc_count = classFeatureCounts_[p1_class + c];
-                                // Child, Class count
-                                cc_count = classFeatureCounts_[part2_class + c];
-                                // p(x_c=sc | c, x_p=sp)
-                                conditionalProb = (pcc_count + alpha_) / (pc_count + alpha_ * states_[child]);
-                                data_[idx] = conditionalProb;
-                                // p(x_p=sp | c, x_c=sc)
-                                oppositeCondProb = (pcc_count + alpha_) / (cc_count + alpha_ * states_[parent]);
-                                dataOpp_[idx] = oppositeCondProb;
-                            }
-                        }
-                    }
-                }
-            }
-            matrixState_ = MatrixState::PROBS;
-        }
-        // -------------------------------------------------------
-        // predict_proba_spode
-        // -------------------------------------------------------
-        //
-        // Single-superparent approach:
-        // P(c | x) ∝ p(c) * p(x_sp| c) * ∏_{i≠sp} p(x_i | c, x_sp)
-        //
-        // 'instance' should have size == nFeatures_ (no class).
-        // sp in [0..nFeatures_).
-        // We multiply p(c) * p(x_sp| c) * p(x_i| c, x_sp).
-        // Then normalize the distribution.
-        //
-        std::vector<double> predict_proba_spode(const std::vector<int>& instance, int parent)
-        {
-            // accumulates posterior probabilities for each class
-            auto probs = std::vector<double>(statesClass_);
-            auto spodeProbs = std::vector<double>(statesClass_, 0.0);
-            if (std::find(active_parents.begin(), active_parents.end(), parent) == active_parents.end()) {
-                return spodeProbs;
-            }
-            // Initialize the probabilities with the feature|class probabilities x class priors
-            int localOffset;
-            int sp = instance[parent];
-            localOffset = (featureClassOffset_[parent] + sp) * statesClass_;
-            for (int c = 0; c < statesClass_; ++c) {
-                spodeProbs[c] = classFeatureProbs_[localOffset + c] * classPriors_[c] * initializer_;
-            }
-            int idx, base, sc, parent_offset;
-            for (int child = 0; child < nFeatures_; ++child) {
-                if (child == parent) {
-                    continue;
-                }
-                sc = instance[child];
-                if (child > parent) {
-                    parent_offset = pairOffset_[featureClassOffset_[child] + sc];
-                    base = (parent_offset + featureClassOffset_[parent] + sp) * statesClass_;
-                } else {
-                    parent_offset = pairOffset_[featureClassOffset_[parent] + sp];
-                    base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_;
-                }
-                for (int c = 0; c < statesClass_; ++c) {
-                    /*
-                    * The probability P(xc|xp,c) is stored in dataOpp_, and
-                    * the probability P(xp|xc,c) is stored in data_
-                    */
-                    idx = base + c;
-                    double factor = child > parent ? dataOpp_[idx] : data_[idx];
-                    // double factor = data_[idx];
-                    spodeProbs[c] *= factor;
-                }
-            }
-            // Normalize the probabilities
-            normalize(spodeProbs);
-            return spodeProbs;
-        }
-        int predict_spode(const std::vector<int>& instance, int parent)
-        {
-            auto probs = predict_proba_spode(instance, parent);
-            return (int)std::distance(probs.begin(), std::max_element(probs.begin(), probs.end()));
-        }
-        // -------------------------------------------------------
-        // predict_proba
-        // -------------------------------------------------------
-        //
-        // P(c | x) ∝ p(c) * ∏_{i} p(x_i | c) * ∏_{i<j} p(x_j | c, x_i) * p(x_i | c, x_j)
-        //
-        // 'instance' should have size == nFeatures_ (no class).
-        // We multiply p(c) * p(x_i| c) * p(x_j| c, x_i) for all i, j.
-        // Then normalize the distribution.
-        //
-        std::vector<double> predict_proba(const std::vector<int>& instance)
-        {
-            // accumulates posterior probabilities for each class
-            auto probs = std::vector<double>(statesClass_);
-            auto spodeProbs = std::vector<std::vector<double>>(nFeatures_, std::vector<double>(statesClass_));
-            // Initialize the probabilities with the feature|class probabilities
-            int localOffset;
-            for (int feature = 0; feature < nFeatures_; ++feature) {
-                // if feature is not in the active_parents, skip it
-                if (std::find(active_parents.begin(), active_parents.end(), feature) == active_parents.end()) {
-                    continue;
-                }
-                localOffset = (featureClassOffset_[feature] + instance[feature]) * statesClass_;
-                for (int c = 0; c < statesClass_; ++c) {
-                    spodeProbs[feature][c] = classFeatureProbs_[localOffset + c] * classPriors_[c] * initializer_;
-                }
-            }
-            int idx, base, sp, sc, parent_offset;
-            for (int parent = 1; parent < nFeatures_; ++parent) {
-                // if parent is not in the active_parents, skip it
-                if (std::find(active_parents.begin(), active_parents.end(), parent) == active_parents.end()) {
-                    continue;
-                }
-                sp = instance[parent];
-                parent_offset = pairOffset_[featureClassOffset_[parent] + sp];
-                for (int child = 0; child < parent; ++child) {
-                    sc = instance[child];
-                    if (child > parent) {
-                        parent_offset = pairOffset_[featureClassOffset_[child] + sc];
-                        base = (parent_offset + featureClassOffset_[parent] + sp) * statesClass_;
-                    } else {
-                        parent_offset = pairOffset_[featureClassOffset_[parent] + sp];
-                        base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_;
-                    }
-                    for (int c = 0; c < statesClass_; ++c) {
-                        /*
-                         * The probability P(xc|xp,c) is stored in dataOpp_, and
-                         * the probability P(xp|xc,c) is stored in data_
-                         */
-                        idx = base + c;
-                        double factor_child = child > parent ? data_[idx] : dataOpp_[idx];
-                        double factor_parent = child > parent ? dataOpp_[idx] : data_[idx];
-                        spodeProbs[child][c] *= factor_child;
-                        spodeProbs[parent][c] *= factor_parent;
-                    }
-                }
-            }
-            /* add all the probabilities for each class */
-            for (int c = 0; c < statesClass_; ++c) {
-                for (int i = 0; i < nFeatures_; ++i) {
-                    probs[c] += spodeProbs[i][c] * significance_models_[i];
-                }
-            }
-            // Normalize the probabilities
-            normalize(probs);
-            return probs;
-        }
-        void normalize(std::vector<double>& probs) const
-        {
-            double sum = std::accumulate(probs.begin(), probs.end(), 0.0);
-            if (std::isnan(sum)) {
-                throw std::runtime_error("Can't normalize array. Sum is NaN.");
-            }
-            if (sum == 0) {
-                return;
-            }
-            for (int i = 0; i < (int)probs.size(); i++) {
-                probs[i] /= sum;
-            }
-        }
-        // Returns current mode: INIT, COUNTS or PROBS
-        MatrixState state() const
-        {
-            return matrixState_;
-        }
-        int statesClass() const
-        {
-            return statesClass_;
-        }
-        int nFeatures() const
-        {
-            return nFeatures_;
-        }
-        int getNumberOfStates() const
-        {
-            return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
-        }
-        int getNumberOfEdges() const
-        {
-            return nFeatures_ * (2 * nFeatures_ - 1);
-        }
-        int getNumberOfNodes() const
-        {
-            return (nFeatures_ + 1) * nFeatures_;
-        }
-        void add_active_parent(int active_parent)
-        {
-            active_parents.push_back(active_parent);
-        }
-        void remove_last_parent()
-        {
-            active_parents.pop_back();
-        }
-
-    private:
-        // -----------
-        // MEMBER DATA
-        // -----------
-        std::vector<int> states_;            // [states_feat0, ..., states_feat(n-1), statesClass_]
-        int nFeatures_;
-        int statesClass_;
-
-        // data_ means p(child=sj | c, superparent= si) after normalization.
-        // But in COUNTS mode, it accumulates raw counts.
-        std::vector<int> pairOffset_;
-        // data_ stores p(child=sj | c, superparent=si) for each pair (i<j).
-        std::vector<double> data_;
-        // dataOpp_ stores p(superparent=si | c, child=sj) for each pair (i<j).
-        std::vector<double> dataOpp_;
-
-        // classCounts_[c]
-        std::vector<double> classCounts_;
-        std::vector<double> classPriors_;       // => p(c)
-
-        // For p(x_i=si| c), we store counts in classFeatureCounts_ => offset by featureClassOffset_[i]
-        std::vector<int> featureClassOffset_;
-        std::vector<double> classFeatureCounts_;
-        std::vector<double> classFeatureProbs_;  // => p(x_i=si | c) after normalization
-
-        MatrixState matrixState_;
-
-        double alpha_ = 1.0; // Laplace smoothing
-        double initializer_ = 1.0;
-        std::vector<int> active_parents;
-    };
-}
-#endif // XAODE2_H