From b2002d341c94c94b85d1776280c217200c1b6cfc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?=
 <rmontanana@gmail.com>
Date: Mon, 3 Mar 2025 12:38:05 +0100
Subject: [PATCH] Create Xaode2 and add initializer factor in predict

---
 Makefile                         |   2 +-
 src/experimental_clfs/ExpClf.h   |   4 +-
 src/experimental_clfs/XBAODE.h   |   1 -
 src/experimental_clfs/Xaode2.hpp | 464 +++++++++++++++++++++++++++++++
 4 files changed, 468 insertions(+), 3 deletions(-)
 create mode 100644 src/experimental_clfs/Xaode2.hpp
diff --git a/Makefile b/Makefile
index 57d53a7..59c603b 100644
--- a/Makefile
+++ b/Makefile
@@ -38,7 +38,7 @@ setup: ## Install dependencies for tests and coverage
 	fi
 
 dest ?= ${HOME}/bin
-main: ## Build the main target
+main: ## Build only the b_main target
 	@cmake --build $(f_release) -t b_main --parallel
 	@cp $(f_release)/src/b_main $(dest)
 
diff --git a/src/experimental_clfs/ExpClf.h b/src/experimental_clfs/ExpClf.h
index 82098ba..abc7d04 100644
--- a/src/experimental_clfs/ExpClf.h
+++ b/src/experimental_clfs/ExpClf.h
@@ -15,6 +15,7 @@
 #include "common/Timer.hpp"
 #include "CountingSemaphore.hpp"
 #include "Xaode.hpp"
+#include "Xaode2.hpp"
 
 namespace platform {
     class ExpClf : public bayesnet::Boost {
@@ -44,7 +45,8 @@ namespace platform {
         void remove_last_parent();
     protected:
         bool debug = false;
-        Xaode aode_;
+        // Xaode aode;
+        Xaode2 aode_;
         torch::Tensor weights_;
         const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
         inline void normalize_weights(int num_instances)
diff --git a/src/experimental_clfs/XBAODE.h b/src/experimental_clfs/XBAODE.h
index 13951ac..77bc427 100644
--- a/src/experimental_clfs/XBAODE.h
+++ b/src/experimental_clfs/XBAODE.h
@@ -18,7 +18,6 @@ namespace platform {
     class XBAODE : public ExpClf {
     public:
         XBAODE();
-        virtual ~XBAODE() override = default;
         std::string getVersion() override { return version; };
     protected:
         void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
diff --git a/src/experimental_clfs/Xaode2.hpp b/src/experimental_clfs/Xaode2.hpp
new file mode 100644
index 0000000..dd5f15d
--- /dev/null
+++ b/src/experimental_clfs/Xaode2.hpp
@@ -0,0 +1,464 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+// Based on the Geoff. I. Webb A1DE java algorithm
+// https://weka.sourceforge.io/packageMetaData/AnDE/Latest.html
+
+#ifndef XAODE2_H
+#define XAODE2_H
+#include <vector>
+#include <stdexcept>
+#include <algorithm>
+#include <numeric>
+#include <iostream>
+#include <string>
+#include <cmath>
+#include <limits>
+#include <torch/torch.h>
+
+namespace platform {
+    class Xaode2 {
+    public:
+        // -------------------------------------------------------
+        // The Xaode can be EMPTY (just created), in COUNTS mode (accumulating raw counts)
+        // or PROBS mode (storing conditional probabilities).
+        enum class MatrixState {
+            EMPTY,
+            COUNTS,
+            PROBS
+        };
+        std::vector<double> significance_models_;
+        Xaode2() : nFeatures_{ 0 }, statesClass_{ 0 }, matrixState_{ MatrixState::EMPTY } {}
+        // -------------------------------------------------------
+        // fit
+        // -------------------------------------------------------
+        //
+        // Classifiers interface
+        // all parameter decide if the model is initialized with all the parents active or none of them
+        //
+        // states.size() = nFeatures + 1,
+        //   where states.back() = number of class states.
+        //
+        // We'll store:
+        //  1) p(x_i=si | c) in classFeatureProbs_
+        //  2) p(x_j=sj | c, x_i=si) in data_, with i<j => i is "superparent," j is "child."
+        //
+        // Internally, in COUNTS mode, data_ accumulates raw counts, then
+        // computeProbabilities(...) normalizes them into conditionals.
+        void fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const bool all_parents)
+        {
+            int num_instances = X[0].size();
+            nFeatures_ = X.size();
+
+            significance_models_.resize(nFeatures_, (all_parents ? 1.0 : 0.0));
+            for (int i = 0; i < nFeatures_; i++) {
+                if (all_parents) active_parents.push_back(i);
+                states_.push_back(*max_element(X[i].begin(), X[i].end()) + 1);
+            }
+            states_.push_back(*max_element(y.begin(), y.end()) + 1);
+            //
+            statesClass_ = states_.back();
+            classCounts_.resize(statesClass_, 0.0);
+            classPriors_.resize(statesClass_, 0.0);
+            //
+            // Initialize data structures
+            //
+            active_parents.resize(nFeatures_);
+            int totalStates = std::accumulate(states_.begin(), states_.end(), 0) - statesClass_;
+
+            // For p(x_i=si | c), we store them in a 1D array classFeatureProbs_ after we compute.
+            // We'll need the offsets for each feature i in featureClassOffset_.
+            featureClassOffset_.resize(nFeatures_);
+            // We'll store p(x_child=sj | c, x_sp=si) for each pair (i<j).
+            // So data_(i, si, j, sj, c) indexes into a big 1D array with an offset.
+            // For p(x_i=si | c), we store them in a 1D array classFeatureProbs_ after we compute.
+            // We'll need the offsets for each feature i in featureClassOffset_.
+            featureClassOffset_.resize(nFeatures_);
+            pairOffset_.resize(totalStates);
+            int feature_offset = 0;
+            int runningOffset = 0;
+            int feature = 0, index = 0;
+            for (int i = 0; i < nFeatures_; ++i) {
+                featureClassOffset_[i] = feature_offset;
+                feature_offset += states_[i];
+                for (int j = 0; j < states_[i]; ++j) {
+                    pairOffset_[feature++] = index;
+                    index += runningOffset;
+                }
+                runningOffset += states_[i];
+            }
+            int totalSize = index * statesClass_;
+            data_.resize(totalSize);
+            dataOpp_.resize(totalSize);
+
+            classFeatureCounts_.resize(feature_offset * statesClass_);
+            classFeatureProbs_.resize(feature_offset * statesClass_);
+
+            matrixState_ = MatrixState::COUNTS;
+            //
+            // Add samples
+            //
+            std::vector<int> instance(nFeatures_ + 1);
+            for (int n_instance = 0; n_instance < num_instances; n_instance++) {
+                for (int feature = 0; feature < nFeatures_; feature++) {
+                    instance[feature] = X[feature][n_instance];
+                }
+                instance[nFeatures_] = y[n_instance];
+                addSample(instance, weights[n_instance].item<double>());
+            }
+            //alpha_ = 1 / num_instances;
+            initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
+            computeProbabilities();
+        }
+        // Optional: print a quick summary
+        void show() const
+        {
+            std::cout << "-------- Xaode.show() --------" << std::endl
+                << "- nFeatures = " << nFeatures_ << std::endl
+                << "- statesClass = " << statesClass_ << std::endl
+                << "- matrixState = " << (matrixState_ == MatrixState::COUNTS ? "COUNTS" : "PROBS") << std::endl;
+            std::cout << "- states: size: " << states_.size() << std::endl;
+            for (int s : states_) std::cout << s << " "; std::cout << std::endl;
+            std::cout << "- classCounts: size: " << classCounts_.size() << std::endl;
+            for (double cc : classCounts_) std::cout << cc << " "; std::cout << std::endl;
+            std::cout << "- classFeatureCounts: size: " << classFeatureCounts_.size() << std::endl;
+            for (double cfc : classFeatureCounts_) std::cout << cfc << " "; std::cout << std::endl;
+            std::cout << "- classFeatureProbs: size: " << classFeatureProbs_.size() << std::endl;
+            for (double cfp : classFeatureProbs_) std::cout << cfp << " "; std::cout << std::endl;
+            std::cout << "- featureClassOffset: size: " << featureClassOffset_.size() << std::endl;
+            for (int f : featureClassOffset_) std::cout << f << " "; std::cout << std::endl;
+            std::cout << "- pairOffset_: size: " << pairOffset_.size() << std::endl;
+            for (int p : pairOffset_) std::cout << p << " "; std::cout << std::endl;
+            std::cout << "- data: size: " << data_.size() << std::endl;
+            for (double d : data_) std::cout << d << " "; std::cout << std::endl;
+            std::cout << "--------------------------------" << std::endl;
+        }
+        // -------------------------------------------------------
+        // addSample (only in COUNTS mode)
+        // -------------------------------------------------------
+        // 
+        // instance should have the class at the end.
+        // 
+        void addSample(const std::vector<int>& instance, double weight)
+        {
+            //
+            // (A) increment classCounts_
+            // (B) increment feature–class counts => for p(x_i|c)
+            // (C) increment pair (superparent= i, child= j) counts => data_ 
+            //
+
+            // if (matrixState_ != MatrixState::COUNTS) {
+            //     throw std::logic_error("addSample: not in COUNTS mode.");
+            // }
+            // if (static_cast<int>(instance.size()) != nFeatures_ + 1) {
+            //     throw std::invalid_argument("addSample: instance.size() must be nFeatures_ + 1.");
+            // }
+
+            int c = instance.back();
+            // if (c < 0 || c >= statesClass_) {
+            //     throw std::out_of_range("addSample: class index out of range.");
+            // }
+            if (weight <= 0.0) {
+                return;
+            }
+            // (A) increment classCounts_
+            classCounts_[c] += weight;
+
+            // (B,C)
+            // We'll store raw counts now and turn them into p(child| c, superparent) later.
+            int idx, fcIndex, si, sj, i_offset;
+            for (int i = 0; i < nFeatures_; ++i) {
+                si = instance[i];
+                // (B) increment feature–class counts => for p(x_i|c)
+                fcIndex = (featureClassOffset_[i] + si) * statesClass_ + c;
+                classFeatureCounts_[fcIndex] += weight;
+                // (C) increment pair (superparent= i, child= j) counts => data_
+                i_offset = pairOffset_[featureClassOffset_[i] + si];
+                for (int j = 0; j < i; ++j) {
+                    sj = instance[j];
+                    idx = (i_offset + featureClassOffset_[j] + sj) * statesClass_ + c;
+                    data_[idx] += weight;
+                }
+            }
+        }
+        // -------------------------------------------------------
+        // computeProbabilities
+        // -------------------------------------------------------
+        //
+        // Once all samples are added in COUNTS mode, call this to:
+        //  1) compute p(c) => classPriors_
+        //  2) compute p(x_i=si | c) => classFeatureProbs_
+        //  3) compute p(x_j=sj | c, x_i=si) => data_ (for i<j) dataOpp_ (for i>j)
+        //
+        void computeProbabilities()
+        {
+            if (matrixState_ != MatrixState::COUNTS) {
+                throw std::logic_error("computeProbabilities: must be in COUNTS mode.");
+            }
+            double totalCount = std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);
+            // (1) p(c)
+            if (totalCount <= 0.0) {
+                // fallback => uniform
+                double unif = 1.0 / statesClass_;
+                for (int c = 0; c < statesClass_; ++c) {
+                    classPriors_[c] = unif;
+                }
+            } else {
+                for (int c = 0; c < statesClass_; ++c) {
+                    classPriors_[c] = classCounts_[c] / totalCount;
+                }
+            }
+            // (2) p(x_i=si | c) => classFeatureProbs_
+            int idx, sf;
+            double denom, countVal, p;
+            for (int feature = 0; feature < nFeatures_; ++feature) {
+                sf = states_[feature];
+                for (int c = 0; c < statesClass_; ++c) {
+                    denom = classCounts_[c] * sf;
+                    if (denom <= 0.0) {
+                        // fallback => uniform
+                        for (int sf_value = 0; sf_value < sf; ++sf_value) {
+                            idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c;
+                            classFeatureProbs_[idx] = 1.0 / sf;
+                        }
+                    } else {
+                        for (int sf_value = 0; sf_value < sf; ++sf_value) {
+                            idx = (featureClassOffset_[feature] + sf_value) * statesClass_ + c;
+                            countVal = classFeatureCounts_[idx];
+                            p = ((countVal + alpha_ / (statesClass_ * states_[feature])) / (totalCount + alpha_));
+                            classFeatureProbs_[idx] = p;
+                        }
+                    }
+                }
+            }
+            // getCountFromTable(int classVal, int pIndex, int childIndex)
+            // (3) p(x_c=sc | c, x_p=sp) => data_(parent,sp,child,sc,c)
+            // (3) p(x_p=sp | c, x_c=sc) => dataOpp_(child,sc,parent,sp,c)
+            //                    C(x_c, x_p, c) + alpha_/Card(xp)
+            // P(x_p | x_c, c) = -----------------------------------
+            //                           C(x_c, c) + alpha_
+            double pcc_count, pc_count, cc_count;
+            double conditionalProb, oppositeCondProb;
+            int part1, part2, p1, part2_class, p1_class;
+            for (int parent = 1; parent < nFeatures_; ++parent) {
+                for (int sp = 0; sp < states_[parent]; ++sp) {
+                    p1 = featureClassOffset_[parent] + sp;
+                    part1 = pairOffset_[p1];
+                    p1_class = p1 * statesClass_;
+                    for (int child = 0; child < parent; ++child) {
+                        for (int sc = 0; sc < states_[child]; ++sc) {
+                            part2 = featureClassOffset_[child] + sc;
+                            part2_class = part2 * statesClass_;
+                            for (int c = 0; c < statesClass_; c++) {
+                                idx = (part1 + part2) * statesClass_ + c;
+                                // Parent, Child, Class Count
+                                pcc_count = data_[idx];
+                                // Parent, Class count
+                                pc_count = classFeatureCounts_[p1_class + c];
+                                // Child, Class count
+                                cc_count = classFeatureCounts_[part2_class + c];
+                                // p(x_c=sc | c, x_p=sp)
+                                conditionalProb = (pcc_count + alpha_ / states_[parent]) / (cc_count + alpha_);
+                                data_[idx] = conditionalProb;
+                                // p(x_p=sp | c, x_c=sc)
+                                oppositeCondProb = (pcc_count + alpha_ / states_[child]) / (pc_count + alpha_);
+                                dataOpp_[idx] = oppositeCondProb;
+                            }
+                        }
+                    }
+                }
+            }
+            matrixState_ = MatrixState::PROBS;
+        }
+        // -------------------------------------------------------
+        // predict_proba_spode
+        // -------------------------------------------------------
+        //
+        // Single-superparent approach:
+        // P(c | x) ∝ p(c) * p(x_sp| c) * ∏_{i≠sp} p(x_i | c, x_sp)
+        //
+        // 'instance' should have size == nFeatures_ (no class).
+        // sp in [0..nFeatures_).
+        // We multiply p(c) * p(x_sp| c) * p(x_i| c, x_sp).
+        // Then normalize the distribution.
+        //
+        std::vector<double> predict_proba_spode(const std::vector<int>& instance, int parent)
+        {
+            // accumulates posterior probabilities for each class
+            auto probs = std::vector<double>(statesClass_);
+            auto spodeProbs = std::vector<double>(statesClass_);
+            // Initialize the probabilities with the feature|class probabilities x class priors
+            int localOffset;
+            int sp = instance[parent];
+            localOffset = (featureClassOffset_[parent] + sp) * statesClass_;
+            for (int c = 0; c < statesClass_; ++c) {
+                spodeProbs[c] = classFeatureProbs_[localOffset + c] * classPriors_[c] * initializer_;
+            }
+            int idx, base, sc, parent_offset;
+            sp = instance[parent];
+            parent_offset = pairOffset_[featureClassOffset_[parent] + sp];
+            for (int child = 0; child < nFeatures_; ++child) {
+                if (child == parent) {
+                    continue;
+                }
+                sc = instance[child];
+                base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_;
+                for (int c = 0; c < statesClass_; ++c) {
+                    /*
+                    * The probability P(xc|xp,c) is stored in dataOpp_, and
+                    * the probability P(xp|xc,c) is stored in data_
+                    */
+                    idx = base + c;
+                    spodeProbs[c] *= child < parent ? dataOpp_[idx] : data_[idx];
+                }
+            }
+            // Normalize the probabilities
+            normalize(spodeProbs);
+            return spodeProbs;
+        }
+        int predict_spode(const std::vector<int>& instance, int parent)
+        {
+            auto probs = predict_proba_spode(instance, parent);
+            return (int)std::distance(probs.begin(), std::max_element(probs.begin(), probs.end()));
+        }
+        // -------------------------------------------------------
+        // predict_proba
+        // -------------------------------------------------------
+        //
+        // P(c | x) ∝ p(c) * ∏_{i} p(x_i | c) * ∏_{i<j} p(x_j | c, x_i) * p(x_i | c, x_j)
+        //
+        // 'instance' should have size == nFeatures_ (no class).
+        // We multiply p(c) * p(x_i| c) * p(x_j| c, x_i) for all i, j.
+        // Then normalize the distribution.
+        //
+        std::vector<double> predict_proba(const std::vector<int>& instance)
+        {
+            // accumulates posterior probabilities for each class
+            auto probs = std::vector<double>(statesClass_);
+            auto spodeProbs = std::vector<std::vector<double>>(nFeatures_, std::vector<double>(statesClass_));
+            // Initialize the probabilities with the feature|class probabilities
+            int localOffset;
+            for (int feature = 0; feature < nFeatures_; ++feature) {
+                // if feature is not in the active_parents, skip it
+                if (std::find(active_parents.begin(), active_parents.end(), feature) == active_parents.end()) {
+                    continue;
+                }
+                localOffset = (featureClassOffset_[feature] + instance[feature]) * statesClass_;
+                for (int c = 0; c < statesClass_; ++c) {
+                    spodeProbs[feature][c] = classFeatureProbs_[localOffset + c] * classPriors_[c];
+                }
+            }
+            int idx, base, sp, sc, parent_offset;
+            for (int parent = 1; parent < nFeatures_; ++parent) {
+                // if parent is not in the active_parents, skip it
+                if (std::find(active_parents.begin(), active_parents.end(), parent) == active_parents.end()) {
+                    continue;
+                }
+                sp = instance[parent];
+                parent_offset = pairOffset_[featureClassOffset_[parent] + sp];
+                for (int child = 0; child < parent; ++child) {
+                    sc = instance[child];
+                    base = (parent_offset + featureClassOffset_[child] + sc) * statesClass_;
+                    for (int c = 0; c < statesClass_; ++c) {
+                        /*
+                         * The probability P(xc|xp,c) is stored in dataOpp_, and
+                         * the probability P(xp|xc,c) is stored in data_
+                         */
+                        idx = base + c;
+                        spodeProbs[child][c] *= data_[idx];
+                        spodeProbs[parent][c] *= dataOpp_[idx];
+                    }
+                }
+            }
+            /* add all the probabilities for each class */
+            for (int c = 0; c < statesClass_; ++c) {
+                for (int i = 0; i < nFeatures_; ++i) {
+                    probs[c] += spodeProbs[i][c] * significance_models_[i];
+                }
+            }
+            // Normalize the probabilities
+            normalize(probs);
+            return probs;
+        }
+        void normalize(std::vector<double>& probs) const
+        {
+            double sum = std::accumulate(probs.begin(), probs.end(), 0.0);
+            if (std::isnan(sum)) {
+                throw std::runtime_error("Can't normalize array. Sum is NaN.");
+            }
+            if (sum == 0) {
+                return;
+            }
+            for (int i = 0; i < (int)probs.size(); i++) {
+                probs[i] /= sum;
+            }
+        }
+        // Returns current mode: INIT, COUNTS or PROBS
+        MatrixState state() const
+        {
+            return matrixState_;
+        }
+        int statesClass() const
+        {
+            return statesClass_;
+        }
+        int nFeatures() const
+        {
+            return nFeatures_;
+        }
+        int getNumberOfStates() const
+        {
+            return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
+        }
+        int getNumberOfEdges() const
+        {
+            return nFeatures_ * (2 * nFeatures_ - 1);
+        }
+        int getNumberOfNodes() const
+        {
+            return (nFeatures_ + 1) * nFeatures_;
+        }
+        void add_active_parent(int active_parent)
+        {
+            active_parents.push_back(active_parent);
+        }
+        void remove_last_parent()
+        {
+            active_parents.pop_back();
+        }
+
+    private:
+        // -----------
+        // MEMBER DATA
+        // -----------
+        std::vector<int> states_;            // [states_feat0, ..., states_feat(n-1), statesClass_]
+        int nFeatures_;
+        int statesClass_;
+
+        // data_ means p(child=sj | c, superparent= si) after normalization.
+        // But in COUNTS mode, it accumulates raw counts.
+        std::vector<int> pairOffset_;
+        // data_ stores p(child=sj | c, superparent=si) for each pair (i<j).
+        std::vector<double> data_;
+        // dataOpp_ stores p(superparent=si | c, child=sj) for each pair (i<j).
+        std::vector<double> dataOpp_;
+
+        // classCounts_[c]
+        std::vector<double> classCounts_;
+        std::vector<double> classPriors_;       // => p(c)
+
+        // For p(x_i=si| c), we store counts in classFeatureCounts_ => offset by featureClassOffset_[i]
+        std::vector<int> featureClassOffset_;
+        std::vector<double> classFeatureCounts_;
+        std::vector<double> classFeatureProbs_;  // => p(x_i=si | c) after normalization
+
+        MatrixState matrixState_;
+
+        double alpha_ = 1.0;
+        double initializer_ = std::numeric_limits<double>::max();
+        std::vector<int> active_parents;
+    };
+}
+#endif // XAODE2_H
\ No newline at end of file