commit inicial

2025-04-30 11:11:49 +02:00
commit e144d65e11
121 changed files with 53649 additions and 0 deletions
--- a/bayesnet/classifiers/Classifier.cc
+++ b/bayesnet/classifiers/Classifier.cc
@@ -0,0 +1,193 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include <sstream>
+#include "bayesnet/utils/bayesnetUtils.h"
+#include "Classifier.h"
+
+namespace bayesnet {
+    Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
+    Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
+    {
+        this->features = features;
+        this->className = className;
+        this->states = states;
+        m = dataset.size(1);
+        n = features.size();
+        checkFitParameters();
+        auto n_classes = states.at(className).size();
+        metrics = Metrics(dataset, features, className, n_classes);
+        model.initialize();
+        buildModel(weights);
+        trainModel(weights, smoothing);
+        fitted = true;
+        return *this;
+    }
+    void Classifier::buildDataset(torch::Tensor& ytmp)
+    {
+        try {
+            auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
+            dataset = torch::cat({ dataset, yresized }, 0);
+        }
+        catch (const std::exception& e) {
+            std::stringstream oss;
+            oss << "* Error in X and y dimensions *\n";
+            oss << "X dimensions: " << dataset.sizes() << "\n";
+            oss << "y dimensions: " << ytmp.sizes();
+            throw std::runtime_error(oss.str());
+        }
+    }
+    void Classifier::trainModel(const torch::Tensor& weights, Smoothing_t smoothing)
+    {
+        model.fit(dataset, weights, features, className, states, smoothing);
+    }
+    // X is nxm where n is the number of features and m the number of samples
+    Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
+    {
+        dataset = X;
+        buildDataset(y);
+        const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
+        return build(features, className, states, weights, smoothing);
+    }
+    // X is nxm where n is the number of features and m the number of samples
+    Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
+    {
+        dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
+        for (int i = 0; i < X.size(); ++i) {
+            dataset.index_put_({ i, "..." }, torch::tensor(X[i], torch::kInt32));
+        }
+        auto ytmp = torch::tensor(y, torch::kInt32);
+        buildDataset(ytmp);
+        const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
+        return build(features, className, states, weights, smoothing);
+    }
+    Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
+    {
+        this->dataset = dataset;
+        const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
+        return build(features, className, states, weights, smoothing);
+    }
+    Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
+    {
+        this->dataset = dataset;
+        return build(features, className, states, weights, smoothing);
+    }
+    void Classifier::checkFitParameters()
+    {
+        if (torch::is_floating_point(dataset)) {
+            throw std::invalid_argument("dataset (X, y) must be of type Integer");
+        }
+        if (dataset.size(0) - 1 != features.size()) {
+            throw std::invalid_argument("Classifier: X " + std::to_string(dataset.size(0) - 1) + " and features " + std::to_string(features.size()) + " must have the same number of features");
+        }
+        if (states.find(className) == states.end()) {
+            throw std::invalid_argument("class name not found in states");
+        }
+        for (auto feature : features) {
+            if (states.find(feature) == states.end()) {
+                throw std::invalid_argument("feature [" + feature + "] not found in states");
+            }
+        }
+    }
+    torch::Tensor Classifier::predict(torch::Tensor& X)
+    {
+        if (!fitted) {
+            throw std::logic_error(CLASSIFIER_NOT_FITTED);
+        }
+        return model.predict(X);
+    }
+    std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X)
+    {
+        if (!fitted) {
+            throw std::logic_error(CLASSIFIER_NOT_FITTED);
+        }
+        auto m_ = X[0].size();
+        auto n_ = X.size();
+        std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
+        for (auto i = 0; i < n_; i++) {
+            Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
+        }
+        auto yp = model.predict(Xd);
+        return yp;
+    }
+    torch::Tensor Classifier::predict_proba(torch::Tensor& X)
+    {
+        if (!fitted) {
+            throw std::logic_error(CLASSIFIER_NOT_FITTED);
+        }
+        return model.predict_proba(X);
+    }
+    std::vector<std::vector<double>> Classifier::predict_proba(std::vector<std::vector<int>>& X)
+    {
+        if (!fitted) {
+            throw std::logic_error(CLASSIFIER_NOT_FITTED);
+        }
+        auto m_ = X[0].size();
+        auto n_ = X.size();
+        std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
+        // Convert to nxm vector
+        for (auto i = 0; i < n_; i++) {
+            Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
+        }
+        auto yp = model.predict_proba(Xd);
+        return yp;
+    }
+    float Classifier::score(torch::Tensor& X, torch::Tensor& y)
+    {
+        torch::Tensor y_pred = predict(X);
+        return (y_pred == y).sum().item<float>() / y.size(0);
+    }
+    float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
+    {
+        if (!fitted) {
+            throw std::logic_error(CLASSIFIER_NOT_FITTED);
+        }
+        return model.score(X, y);
+    }
+    std::vector<std::string> Classifier::show() const
+    {
+        return model.show();
+    }
+    void Classifier::addNodes()
+    {
+        // Add all nodes to the network
+        for (const auto& feature : features) {
+            model.addNode(feature);
+        }
+        model.addNode(className);
+    }
+    int Classifier::getNumberOfNodes() const
+    {
+        // Features does not include class
+        return fitted ? model.getFeatures().size() : 0;
+    }
+    int Classifier::getNumberOfEdges() const
+    {
+        return fitted ? model.getNumEdges() : 0;
+    }
+    int Classifier::getNumberOfStates() const
+    {
+        return fitted ? model.getStates() : 0;
+    }
+    int Classifier::getClassNumStates() const
+    {
+        return fitted ? model.getClassNumStates() : 0;
+    }
+    std::vector<std::string> Classifier::topological_order()
+    {
+        return model.topological_sort();
+    }
+    std::string Classifier::dump_cpt() const
+    {
+        return model.dump_cpt();
+    }
+    void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
+    {
+        if (!hyperparameters.empty()) {
+            throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
+        }
+    }
+}
--- a/bayesnet/classifiers/Classifier.h
+++ b/bayesnet/classifiers/Classifier.h
@@ -0,0 +1,63 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef CLASSIFIER_H
+#define CLASSIFIER_H
+#include <torch/torch.h>
+#include "bayesnet/utils/BayesMetrics.h"
+#include "bayesnet/BaseClassifier.h"
+
+namespace bayesnet {
+    class Classifier : public BaseClassifier {
+    public:
+        Classifier(Network model);
+        virtual ~Classifier() = default;
+        Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
+        Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
+        Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
+        Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing) override;
+        void addNodes();
+        int getNumberOfNodes() const override;
+        int getNumberOfEdges() const override;
+        int getNumberOfStates() const override;
+        int getClassNumStates() const override;
+        torch::Tensor predict(torch::Tensor& X) override;
+        std::vector<int> predict(std::vector<std::vector<int>>& X) override;
+        torch::Tensor predict_proba(torch::Tensor& X) override;
+        std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
+        status_t getStatus() const override { return status; }
+        std::string getVersion() override { return { project_version.begin(), project_version.end() }; };
+        float score(torch::Tensor& X, torch::Tensor& y) override;
+        float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
+        std::vector<std::string> show() const override;
+        std::vector<std::string> topological_order()  override;
+        std::vector<std::string> getNotes() const override { return notes; }
+        std::string dump_cpt() const override;
+        void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
+    protected:
+        bool fitted;
+        unsigned int m, n; // m: number of samples, n: number of features
+        Network model;
+        Metrics metrics;
+        std::vector<std::string> features;
+        std::string className;
+        std::map<std::string, std::vector<int>> states;
+        torch::Tensor dataset; // (n+1)xm tensor
+        void checkFitParameters();
+        virtual void buildModel(const torch::Tensor& weights) = 0;
+        void trainModel(const torch::Tensor& weights, const Smoothing_t smoothing) override;
+        void buildDataset(torch::Tensor& y);
+        const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
+    private:
+        Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing);
+    };
+}
+#endif
+
+
+
+
+
--- a/bayesnet/classifiers/KDB.cc
+++ b/bayesnet/classifiers/KDB.cc
@@ -0,0 +1,111 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+#include "bayesnet/utils/bayesnetUtils.h"
+#include "KDB.h"
+
+namespace bayesnet {
+    KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta)
+    {
+        validHyperparameters = { "k", "theta" };
+
+    }
+    void KDB::setHyperparameters(const nlohmann::json& hyperparameters_)
+    {
+        auto hyperparameters = hyperparameters_;
+        if (hyperparameters.contains("k")) {
+            k = hyperparameters["k"];
+            hyperparameters.erase("k");
+        }
+        if (hyperparameters.contains("theta")) {
+            theta = hyperparameters["theta"];
+            hyperparameters.erase("theta");
+        }
+        Classifier::setHyperparameters(hyperparameters);
+    }
+    void KDB::buildModel(const torch::Tensor& weights)
+    {
+        /*
+        1. For each feature Xi, compute mutual information, I(X;C),
+        where C is the class.
+        2. Compute class conditional mutual information I(Xi;XjIC), f or each
+        pair of features Xi and Xj, where i#j.
+        3. Let the used variable list, S, be empty.
+        4. Let the DAG network being constructed, BN, begin with a single
+        class node, C.
+        5. Repeat until S includes all domain features
+        5.1. Select feature Xmax which is not in S and has the largest value
+        I(Xmax;C).
+        5.2. Add a node to BN representing Xmax.
+        5.3. Add an arc from C to Xmax in BN.
+        5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
+        the highest value for I(Xmax;X,jC).
+        5.5. Add Xmax to S.
+        Compute the conditional probabilility infered by the structure of BN by
+        using counts from DB, and output BN.
+        */
+        // 1. For each feature Xi, compute mutual information, I(X;C),
+        // where C is the class.
+        addNodes();
+        const torch::Tensor& y = dataset.index({ -1, "..." });
+        std::vector<double> mi;
+        for (auto i = 0; i < features.size(); i++) {
+            torch::Tensor firstFeature = dataset.index({ i, "..." });
+            mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
+        }
+        // 2. Compute class conditional mutual information I(Xi;XjIC), f or each
+        auto conditionalEdgeWeights = metrics.conditionalEdge(weights);
+        // 3. Let the used variable list, S, be empty.
+        std::vector<int> S;
+        // 4. Let the DAG network being constructed, BN, begin with a single
+        // class node, C.
+        // 5. Repeat until S includes all domain features
+        // 5.1. Select feature Xmax which is not in S and has the largest value
+        // I(Xmax;C).
+        auto order = argsort(mi);
+        for (auto idx : order) {
+            // 5.2. Add a node to BN representing Xmax.
+            // 5.3. Add an arc from C to Xmax in BN.
+            model.addEdge(className, features[idx]);
+            // 5.4. Add m = min(lSl,/c) arcs from m distinct features Xj in S with
+            // the highest value for I(Xmax;X,jC).
+            add_m_edges(idx, S, conditionalEdgeWeights);
+            // 5.5. Add Xmax to S.
+            S.push_back(idx);
+        }
+    }
+    void KDB::add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights)
+    {
+        auto n_edges = std::min(k, static_cast<int>(S.size()));
+        auto cond_w = clone(weights);
+        bool exit_cond = k == 0;
+        int num = 0;
+        while (!exit_cond) {
+            auto max_minfo = argmax(cond_w.index({ idx, "..." })).item<int>();
+            auto belongs = find(S.begin(), S.end(), max_minfo) != S.end();
+            if (belongs && cond_w.index({ idx, max_minfo }).item<float>() > theta) {
+                try {
+                    model.addEdge(features[max_minfo], features[idx]);
+                    num++;
+                }
+                catch (const std::invalid_argument& e) {
+                    // Loops are not allowed
+                }
+            }
+            cond_w.index_put_({ idx, max_minfo }, -1);
+            auto candidates_mask = cond_w.index({ idx, "..." }).gt(theta);
+            auto candidates = candidates_mask.nonzero();
+            exit_cond = num == n_edges || candidates.size(0) == 0;
+        }
+    }
+    std::vector<std::string> KDB::graph(const std::string& title) const
+    {
+        std::string header{ title };
+        if (title == "KDB") {
+            header += " (k=" + std::to_string(k) + ", theta=" + std::to_string(theta) + ")";
+        }
+        return model.graph(header);
+    }
+}
--- a/bayesnet/classifiers/KDB.h
+++ b/bayesnet/classifiers/KDB.h
@@ -0,0 +1,26 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef KDB_H
+#define KDB_H
+#include <torch/torch.h>
+#include "Classifier.h"
+namespace bayesnet {
+    class KDB : public Classifier {
+    private:
+        int k;
+        float theta;
+    protected:
+        void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
+        void buildModel(const torch::Tensor& weights) override;
+    public:
+        explicit KDB(int k, float theta = 0.03);
+        virtual ~KDB() = default;
+        void setHyperparameters(const nlohmann::json& hyperparameters_) override;
+        std::vector<std::string> graph(const std::string& name = "KDB") const override;
+    };
+}
+#endif
--- a/bayesnet/classifiers/KDBLd.cc
+++ b/bayesnet/classifiers/KDBLd.cc
@@ -0,0 +1,35 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include "KDBLd.h"
+
+namespace bayesnet {
+    KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
+    KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
+    {
+        checkInput(X_, y_);
+        features = features_;
+        className = className_;
+        Xf = X_;
+        y = y_;
+        // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
+        states = fit_local_discretization(y);
+        // We have discretized the input data
+        // 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
+        KDB::fit(dataset, features, className, states, smoothing);
+        states = localDiscretizationProposal(states, model);
+        return *this;
+    }
+    torch::Tensor KDBLd::predict(torch::Tensor& X)
+    {
+        auto Xt = prepareX(X);
+        return KDB::predict(Xt);
+    }
+    std::vector<std::string> KDBLd::graph(const std::string& name) const
+    {
+        return KDB::graph(name);
+    }
+}
--- a/bayesnet/classifiers/KDBLd.h
+++ b/bayesnet/classifiers/KDBLd.h
@@ -0,0 +1,24 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef KDBLD_H
+#define KDBLD_H
+#include "Proposal.h"
+#include "KDB.h"
+
+namespace bayesnet {
+    class KDBLd : public KDB, public Proposal {
+    private:
+    public:
+        explicit KDBLd(int k);
+        virtual ~KDBLd() = default;
+        KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
+        std::vector<std::string> graph(const std::string& name = "KDB") const override;
+        torch::Tensor predict(torch::Tensor& X) override;
+        static inline std::string version() { return "0.0.1"; };
+    };
+}
+#endif // !KDBLD_H
--- a/bayesnet/classifiers/Proposal.cc
+++ b/bayesnet/classifiers/Proposal.cc
@@ -0,0 +1,129 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include "Proposal.h"
+
+namespace bayesnet {
+    Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
+    Proposal::~Proposal()
+    {
+        for (auto& [key, value] : discretizers) {
+            delete value; 
+        }
+    }
+    void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
+    {
+        if (!torch::is_floating_point(X)) {
+            throw std::invalid_argument("X must be a floating point tensor");
+        }
+        if (torch::is_floating_point(y)) {
+            throw std::invalid_argument("y must be an integer tensor");
+        }
+    }
+    map<std::string, std::vector<int>> Proposal::localDiscretizationProposal(const map<std::string, std::vector<int>>& oldStates, Network& model)
+    {
+        // order of local discretization is important. no good 0, 1, 2...
+        // although we rediscretize features after the local discretization of every feature
+        auto order = model.topological_sort();
+        auto& nodes = model.getNodes();
+        map<std::string, std::vector<int>> states = oldStates;
+        std::vector<int> indicesToReDiscretize;
+        bool upgrade = false; // Flag to check if we need to upgrade the model
+        for (auto feature : order) {
+            auto nodeParents = nodes[feature]->getParents();
+            if (nodeParents.size() < 2) continue; // Only has class as parent
+            upgrade = true;
+            int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin();
+            indicesToReDiscretize.push_back(index); // We need to re-discretize this feature
+            std::vector<std::string> parents;
+            transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); });
+            // Remove class as parent as it will be added later
+            parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end());
+            // Get the indices of the parents
+            std::vector<int> indices;
+            indices.push_back(-1); // Add class index
+            transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); });
+            // Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y)
+            std::vector<std::string> yJoinParents(Xf.size(1));
+            for (auto idx : indices) {
+                for (int i = 0; i < Xf.size(1); ++i) {
+                    yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
+                }
+            }
+            auto yxv = factorize(yJoinParents);
+            auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
+            auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
+            discretizers[feature]->fit(xvf, yxv);
+        }
+        if (upgrade) {
+            // Discretize again X (only the affected indices) with the new fitted discretizers
+            for (auto index : indicesToReDiscretize) {
+                auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
+                auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
+                pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt)));
+                auto xStates = std::vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
+                iota(xStates.begin(), xStates.end(), 0);
+                //Update new states of the feature/node
+                states[pFeatures[index]] = xStates;
+            }
+            const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
+            model.fit(pDataset, weights, pFeatures, pClassName, states, Smoothing_t::ORIGINAL);
+        }
+        return states;
+    }
+    map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
+    {
+        // Discretize the continuous input data and build pDataset (Classifier::dataset)
+        int m = Xf.size(1);
+        int n = Xf.size(0);
+        map<std::string, std::vector<int>> states;
+        pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
+        auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
+        // discretize input data by feature(row)
+        for (auto i = 0; i < pFeatures.size(); ++i) {
+            auto* discretizer = new mdlp::CPPFImdlp();
+            auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
+            auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
+            discretizer->fit(Xt, yv);
+            pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
+            auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
+            iota(xStates.begin(), xStates.end(), 0);
+            states[pFeatures[i]] = xStates;
+            discretizers[pFeatures[i]] = discretizer;
+        }
+        int n_classes = torch::max(y).item<int>() + 1;
+        auto yStates = std::vector<int>(n_classes);
+        iota(yStates.begin(), yStates.end(), 0);
+        states[pClassName] = yStates;
+        pDataset.index_put_({ n, "..." }, y);
+        return states;
+    }
+    torch::Tensor Proposal::prepareX(torch::Tensor& X)
+    {
+        auto Xtd = torch::zeros_like(X, torch::kInt32);
+        for (int i = 0; i < X.size(0); ++i) {
+            auto Xt = std::vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
+            auto Xd = discretizers[pFeatures[i]]->transform(Xt);
+            Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
+        }
+        return Xtd;
+    }
+    std::vector<int> Proposal::factorize(const std::vector<std::string>& labels_t)
+    {
+        std::vector<int> yy;
+        yy.reserve(labels_t.size());
+        std::map<std::string, int> labelMap;
+        int i = 0;
+        for (const std::string& label : labels_t) {
+            if (labelMap.find(label) == labelMap.end()) {
+                labelMap[label] = i++;
+                bool allDigits = std::all_of(label.begin(), label.end(), ::isdigit);
+            }
+            yy.push_back(labelMap[label]);
+        }
+        return yy;
+    }
+}
--- a/bayesnet/classifiers/Proposal.h
+++ b/bayesnet/classifiers/Proposal.h
@@ -0,0 +1,37 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef PROPOSAL_H
+#define PROPOSAL_H
+#include <string>
+#include <map>
+#include <torch/torch.h>
+#include <fimdlp/CPPFImdlp.h>
+#include "bayesnet/network/Network.h"
+#include "Classifier.h"
+
+namespace bayesnet {
+    class Proposal {
+    public:
+        Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_);
+        virtual ~Proposal();
+    protected:
+        void checkInput(const torch::Tensor& X, const torch::Tensor& y);
+        torch::Tensor prepareX(torch::Tensor& X);
+        map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
+        map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
+        torch::Tensor Xf; // X continuous nxm tensor
+        torch::Tensor y; // y discrete nx1 tensor
+        map<std::string, mdlp::CPPFImdlp*> discretizers;
+    private:
+        std::vector<int> factorize(const std::vector<std::string>& labels_t);
+        torch::Tensor& pDataset; // (n+1)xm tensor
+        std::vector<std::string>& pFeatures;
+        std::string& pClassName;
+    };
+}
+
+#endif  
--- a/bayesnet/classifiers/SPODE.cc
+++ b/bayesnet/classifiers/SPODE.cc
@@ -0,0 +1,46 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include "SPODE.h"
+
+namespace bayesnet {
+
+    SPODE::SPODE(int root) : Classifier(Network()), root(root)
+    {
+        validHyperparameters = { "parent" };
+    }
+
+    void SPODE::setHyperparameters(const nlohmann::json& hyperparameters_)
+    {
+        auto hyperparameters = hyperparameters_;
+        if (hyperparameters.contains("parent")) {
+            root = hyperparameters["parent"];
+            hyperparameters.erase("parent");
+        }
+        Classifier::setHyperparameters(hyperparameters);
+    }
+    void SPODE::buildModel(const torch::Tensor& weights)
+    {
+        // 0. Add all nodes to the model
+        addNodes();
+        // 1. Add edges from the class node to all other nodes
+        // 2. Add edges from the root node to all other nodes
+        if (root >= static_cast<int>(features.size())) {
+            throw std::invalid_argument("The parent node is not in the dataset");
+        }
+        for (int i = 0; i < static_cast<int>(features.size()); ++i) {
+            model.addEdge(className, features[i]);
+            if (i != root) {
+                model.addEdge(features[root], features[i]);
+            }
+        }
+    }
+    std::vector<std::string> SPODE::graph(const std::string& name) const
+    {
+        return model.graph(name);
+    }
+
+}
--- a/bayesnet/classifiers/SPODE.h
+++ b/bayesnet/classifiers/SPODE.h
@@ -0,0 +1,24 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef SPODE_H
+#define SPODE_H
+#include "Classifier.h"
+
+namespace bayesnet {
+    class SPODE : public Classifier {
+    public:
+        explicit SPODE(int root);
+        virtual ~SPODE() = default;
+        void setHyperparameters(const nlohmann::json& hyperparameters_) override;
+        std::vector<std::string> graph(const std::string& name = "SPODE") const override;
+    protected:
+        void buildModel(const torch::Tensor& weights) override;
+    private:
+        int root;
+    };
+}
+#endif
--- a/bayesnet/classifiers/SPODELd.cc
+++ b/bayesnet/classifiers/SPODELd.cc
@@ -0,0 +1,50 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include "SPODELd.h"
+
+namespace bayesnet {
+    SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
+    SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
+    {
+        checkInput(X_, y_);
+        Xf = X_;
+        y = y_;
+        return commonFit(features_, className_, states_, smoothing);
+    }
+
+    SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
+    {
+        if (!torch::is_floating_point(dataset)) {
+            throw std::runtime_error("Dataset must be a floating point tensor");
+        }
+        Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
+        y = dataset.index({ -1, "..." }).clone().to(torch::kInt32);
+        return commonFit(features_, className_, states_, smoothing);
+    }
+
+    SPODELd& SPODELd::commonFit(const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
+    {
+        features = features_;
+        className = className_;
+        // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
+        states = fit_local_discretization(y);
+        // We have discretized the input data
+        // 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
+        SPODE::fit(dataset, features, className, states, smoothing);
+        states = localDiscretizationProposal(states, model);
+        return *this;
+    }
+    torch::Tensor SPODELd::predict(torch::Tensor& X)
+    {
+        auto Xt = prepareX(X);
+        return SPODE::predict(Xt);
+    }
+    std::vector<std::string> SPODELd::graph(const std::string& name) const
+    {
+        return SPODE::graph(name);
+    }
+}
--- a/bayesnet/classifiers/SPODELd.h
+++ b/bayesnet/classifiers/SPODELd.h
@@ -0,0 +1,25 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef SPODELD_H
+#define SPODELD_H
+#include "SPODE.h"
+#include "Proposal.h"
+
+namespace bayesnet {
+    class SPODELd : public SPODE, public Proposal {
+    public:
+        explicit SPODELd(int root);
+        virtual ~SPODELd() = default;
+        SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
+        SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
+        SPODELd& commonFit(const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing);
+        std::vector<std::string> graph(const std::string& name = "SPODELd") const override;
+        torch::Tensor predict(torch::Tensor& X) override;
+        static inline std::string version() { return "0.0.1"; };
+    };
+}
+#endif // !SPODELD_H
--- a/bayesnet/classifiers/SPnDE.cc
+++ b/bayesnet/classifiers/SPnDE.cc
@@ -0,0 +1,38 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include "SPnDE.h"
+
+namespace bayesnet {
+
+    SPnDE::SPnDE(std::vector<int> parents) : Classifier(Network()), parents(parents) {}
+
+    void SPnDE::buildModel(const torch::Tensor& weights)
+    {
+        // 0. Add all nodes to the model
+        addNodes();
+        std::vector<int> attributes;
+        for (int i = 0; i < static_cast<int>(features.size()); ++i) {
+            if (std::find(parents.begin(), parents.end(), i) == parents.end()) {
+                attributes.push_back(i);
+            }
+        }
+        // 1. Add edges from the class node to all other nodes
+        // 2. Add edges from the parents nodes to all other nodes
+        for (const auto& attribute : attributes) {
+            model.addEdge(className, features[attribute]);
+            for (const auto& root : parents) {
+
+                model.addEdge(features[root], features[attribute]);
+            }
+        }
+    }
+    std::vector<std::string> SPnDE::graph(const std::string& name) const
+    {
+        return model.graph(name);
+    }
+
+}
--- a/bayesnet/classifiers/SPnDE.h
+++ b/bayesnet/classifiers/SPnDE.h
@@ -0,0 +1,26 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef SPnDE_H
+#define SPnDE_H
+#include <vector>
+#include "Classifier.h"
+
+namespace bayesnet {
+    class SPnDE : public Classifier {
+    public:
+        explicit SPnDE(std::vector<int> parents);
+        virtual ~SPnDE() = default;
+        std::vector<std::string> graph(const std::string& name = "SPnDE") const override;
+    protected:
+        void buildModel(const torch::Tensor& weights) override;
+    private:
+        std::vector<int> parents;
+
+
+    };
+}
+#endif
--- a/bayesnet/classifiers/TAN.cc
+++ b/bayesnet/classifiers/TAN.cc
@@ -0,0 +1,60 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include "TAN.h"
+
+namespace bayesnet {
+    TAN::TAN() : Classifier(Network())
+    {
+        validHyperparameters = { "parent" };
+    }
+
+    void TAN::setHyperparameters(const nlohmann::json& hyperparameters_)
+    {
+        auto hyperparameters = hyperparameters_;
+        if (hyperparameters.contains("parent")) {
+            parent = hyperparameters["parent"];
+            hyperparameters.erase("parent");
+        }
+        Classifier::setHyperparameters(hyperparameters);
+    }
+    void TAN::buildModel(const torch::Tensor& weights)
+    {
+        // 0. Add all nodes to the model
+        addNodes();
+        // 1. Compute mutual information between each feature and the class and set the root node
+        // as the highest mutual information with the class
+        auto mi = std::vector <std::pair<int, float >>();
+        torch::Tensor class_dataset = dataset.index({ -1, "..." });
+        for (int i = 0; i < static_cast<int>(features.size()); ++i) {
+            torch::Tensor feature_dataset = dataset.index({ i, "..." });
+            auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights);
+            mi.push_back({ i, mi_value });
+        }
+        sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;});
+        auto root = parent == -1 ? mi[mi.size() - 1].first : parent;
+        if (root >= static_cast<int>(features.size())) {
+            throw std::invalid_argument("The parent node is not in the dataset");
+        }
+        // 2. Compute mutual information between each feature and the class
+        auto weights_matrix = metrics.conditionalEdge(weights);
+        // 3. Compute the maximum spanning tree
+        auto mst = metrics.maximumSpanningTree(features, weights_matrix, root);
+        // 4. Add edges from the maximum spanning tree to the model
+        for (auto i = 0; i < mst.size(); ++i) {
+            auto [from, to] = mst[i];
+            model.addEdge(features[from], features[to]);
+        }
+        // 5. Add edges from the class to all features
+        for (auto feature : features) {
+            model.addEdge(className, feature);
+        }
+    }
+    std::vector<std::string> TAN::graph(const std::string& title) const
+    {
+        return model.graph(title);
+    }
+}
--- a/bayesnet/classifiers/TAN.h
+++ b/bayesnet/classifiers/TAN.h
@@ -0,0 +1,23 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef TAN_H
+#define TAN_H
+#include "Classifier.h"
+namespace bayesnet {
+    class TAN : public Classifier {
+    public:
+        TAN();
+        virtual ~TAN() = default;
+        void setHyperparameters(const nlohmann::json& hyperparameters_) override;
+        std::vector<std::string> graph(const std::string& name = "TAN") const override;
+    protected:
+        void buildModel(const torch::Tensor& weights) override;
+    private:
+        int parent = -1;
+    };
+}
+#endif
--- a/bayesnet/classifiers/TANLd.cc
+++ b/bayesnet/classifiers/TANLd.cc
@@ -0,0 +1,36 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include "TANLd.h"
+
+namespace bayesnet {
+    TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
+    TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_, const Smoothing_t smoothing)
+    {
+        checkInput(X_, y_);
+        features = features_;
+        className = className_;
+        Xf = X_;
+        y = y_;
+        // Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
+        states = fit_local_discretization(y);
+        // We have discretized the input data
+        // 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
+        TAN::fit(dataset, features, className, states, smoothing);
+        states = localDiscretizationProposal(states, model);
+        return *this;
+
+    }
+    torch::Tensor TANLd::predict(torch::Tensor& X)
+    {
+        auto Xt = prepareX(X);
+        return TAN::predict(Xt);
+    }
+    std::vector<std::string> TANLd::graph(const std::string& name) const
+    {
+        return TAN::graph(name);
+    }
+}
--- a/bayesnet/classifiers/TANLd.h
+++ b/bayesnet/classifiers/TANLd.h
@@ -0,0 +1,23 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef TANLD_H
+#define TANLD_H
+#include "TAN.h"
+#include "Proposal.h"
+
+namespace bayesnet {
+    class TANLd : public TAN, public Proposal {
+    private:
+    public:
+        TANLd();
+        virtual ~TANLd() = default;
+        TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states, const Smoothing_t smoothing) override;
+        std::vector<std::string> graph(const std::string& name = "TANLd") const override;
+        torch::Tensor predict(torch::Tensor& X) override;
+    };
+}
+#endif // !TANLD_H
--- a/bayesnet/classifiers/XSP2DE.cc
+++ b/bayesnet/classifiers/XSP2DE.cc
@@ -0,0 +1,575 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include "XSP2DE.h"
+#include <pthread.h>   // for pthread_setname_np on linux
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <stdexcept>
+#include <iostream>
+#include "bayesnet/utils/TensorUtils.h"
+
+namespace bayesnet {
+
+// --------------------------------------
+// Constructor
+// --------------------------------------
+XSp2de::XSp2de(int spIndex1, int spIndex2)
+  : superParent1_{ spIndex1 }
+  , superParent2_{ spIndex2 }
+  , nFeatures_{0}
+  , statesClass_{0}
+  , alpha_{1.0}
+  , initializer_{1.0}
+  , semaphore_{ CountingSemaphore::getInstance() }
+  , Classifier(Network())
+{
+  validHyperparameters = { "parent1", "parent2" };
+}
+
+// --------------------------------------
+// setHyperparameters
+// --------------------------------------
+void XSp2de::setHyperparameters(const nlohmann::json &hyperparameters_)
+{
+  auto hyperparameters = hyperparameters_;
+  if (hyperparameters.contains("parent1")) {
+    superParent1_ = hyperparameters["parent1"];
+    hyperparameters.erase("parent1");
+  }
+  if (hyperparameters.contains("parent2")) {
+    superParent2_ = hyperparameters["parent2"];
+    hyperparameters.erase("parent2");
+  }
+  // Hand off anything else to base Classifier
+  Classifier::setHyperparameters(hyperparameters);
+}
+
+// --------------------------------------
+// fitx
+// --------------------------------------
+void XSp2de::fitx(torch::Tensor & X, torch::Tensor & y, 
+                  torch::Tensor & weights_, const Smoothing_t smoothing)
+{
+  m = X.size(1);  // number of samples
+  n = X.size(0);  // number of features
+  dataset = X;
+
+  // Build the dataset in your environment if needed:
+  buildDataset(y);
+
+  // Construct the data structures needed for counting
+  buildModel(weights_);
+
+  // Accumulate counts & convert to probabilities
+  trainModel(weights_, smoothing);
+  fitted = true;
+}
+
+// --------------------------------------
+// buildModel
+// --------------------------------------
+void XSp2de::buildModel(const torch::Tensor &weights)
+{
+  nFeatures_ = n;
+
+  // Derive the number of states for each feature from the dataset
+  // states_[f] = max value in dataset[f] + 1.
+  states_.resize(nFeatures_);
+  for (int f = 0; f < nFeatures_; f++) {
+    // This is naive: we take max in feature f. You might adapt for real data.
+    states_[f] = dataset[f].max().item<int>() + 1;
+  }
+  // Class states:
+  statesClass_ = dataset[-1].max().item<int>() + 1;
+
+  // Initialize the class counts
+  classCounts_.resize(statesClass_, 0.0);
+
+  // For sp1 -> p(sp1Val| c)
+  sp1FeatureCounts_.resize(states_[superParent1_] * statesClass_, 0.0);
+
+  // For sp2 -> p(sp2Val| c)
+  sp2FeatureCounts_.resize(states_[superParent2_] * statesClass_, 0.0);
+
+  // For child features, we store p(childVal | c, sp1Val, sp2Val).
+  // childCounts_ will hold raw counts. We’ll gather them in one big vector.
+  // We need an offset for each feature.
+  childOffsets_.resize(nFeatures_, -1);
+
+  int totalSize = 0;
+  for (int f = 0; f < nFeatures_; f++) {
+    if (f == superParent1_ || f == superParent2_) {
+      // skip the superparents
+      childOffsets_[f] = -1;
+      continue;
+    }
+    childOffsets_[f] = totalSize;
+    // block size for a single child f: states_[f] * statesClass_ 
+    //                               * states_[superParent1_] 
+    //                               * states_[superParent2_].
+    totalSize += (states_[f] * statesClass_ 
+                  * states_[superParent1_] 
+                  * states_[superParent2_]);
+  }
+  childCounts_.resize(totalSize, 0.0);
+}
+
+// --------------------------------------
+// trainModel
+// --------------------------------------
+void XSp2de::trainModel(const torch::Tensor &weights, 
+                        const bayesnet::Smoothing_t smoothing)
+{
+  // Accumulate raw counts
+  for (int i = 0; i < m; i++) {
+    std::vector<int> instance(nFeatures_ + 1);
+    for (int f = 0; f < nFeatures_; f++) {
+      instance[f] = dataset[f][i].item<int>();
+    }
+    instance[nFeatures_] = dataset[-1][i].item<int>();  // class
+    double w = weights[i].item<double>();
+    addSample(instance, w);
+  }
+
+  // Choose alpha based on smoothing:
+  switch (smoothing) {
+    case bayesnet::Smoothing_t::ORIGINAL:
+      alpha_ = 1.0 / m;
+      break;
+    case bayesnet::Smoothing_t::LAPLACE:
+      alpha_ = 1.0;
+      break;
+    default:
+      alpha_ = 0.0; // no smoothing
+  }
+
+  // Large initializer factor for numerical stability
+  initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
+
+  // Convert raw counts to probabilities
+  computeProbabilities();
+}
+
+// --------------------------------------
+// addSample
+// --------------------------------------
+void XSp2de::addSample(const std::vector<int> &instance, double weight)
+{
+  if (weight <= 0.0)
+    return;
+
+  int c = instance.back();
+  // increment classCounts
+  classCounts_[c] += weight;
+
+  int sp1Val = instance[superParent1_];
+  int sp2Val = instance[superParent2_];
+
+  // p(sp1|c)
+  sp1FeatureCounts_[sp1Val * statesClass_ + c] += weight;
+
+  // p(sp2|c)
+  sp2FeatureCounts_[sp2Val * statesClass_ + c] += weight;
+
+  // p(childVal| c, sp1Val, sp2Val)
+  for (int f = 0; f < nFeatures_; f++) {
+    if (f == superParent1_ || f == superParent2_)
+      continue;
+
+    int childVal = instance[f];
+    int offset = childOffsets_[f];
+    // block layout: 
+    //    offset + (sp1Val*(states_[sp2_]* states_[f]* statesClass_)) 
+    //            + (sp2Val*(states_[f]* statesClass_)) 
+    //            + childVal*(statesClass_) 
+    //            + c
+    int blockSizeSp2 = states_[superParent2_] 
+                       * states_[f] 
+                       * statesClass_;
+    int blockSizeChild = states_[f] * statesClass_;
+
+    int idx = offset 
+            + sp1Val*blockSizeSp2 
+            + sp2Val*blockSizeChild 
+            + childVal*statesClass_ 
+            + c;
+    childCounts_[idx] += weight;
+  }
+}
+
+// --------------------------------------
+// computeProbabilities
+// --------------------------------------
+void XSp2de::computeProbabilities()
+{
+  double totalCount = std::accumulate(classCounts_.begin(), 
+                                      classCounts_.end(), 0.0);
+
+  // classPriors_
+  classPriors_.resize(statesClass_, 0.0);
+  if (totalCount <= 0.0) {
+    // fallback => uniform
+    double unif = 1.0 / static_cast<double>(statesClass_);
+    for (int c = 0; c < statesClass_; c++) {
+      classPriors_[c] = unif;
+    }
+  } else {
+    for (int c = 0; c < statesClass_; c++) {
+      classPriors_[c] = 
+        (classCounts_[c] + alpha_) 
+        / (totalCount + alpha_ * statesClass_);
+    }
+  }
+
+  // p(sp1Val| c)
+  sp1FeatureProbs_.resize(sp1FeatureCounts_.size());
+  int sp1Card = states_[superParent1_];
+  for (int spVal = 0; spVal < sp1Card; spVal++) {
+    for (int c = 0; c < statesClass_; c++) {
+      double denom = classCounts_[c] + alpha_ * sp1Card;
+      double num = sp1FeatureCounts_[spVal * statesClass_ + c] + alpha_;
+      sp1FeatureProbs_[spVal * statesClass_ + c] = 
+         (denom <= 0.0 ? 0.0 : num / denom);
+    }
+  }
+
+  // p(sp2Val| c)
+  sp2FeatureProbs_.resize(sp2FeatureCounts_.size());
+  int sp2Card = states_[superParent2_];
+  for (int spVal = 0; spVal < sp2Card; spVal++) {
+    for (int c = 0; c < statesClass_; c++) {
+      double denom = classCounts_[c] + alpha_ * sp2Card;
+      double num = sp2FeatureCounts_[spVal * statesClass_ + c] + alpha_;
+      sp2FeatureProbs_[spVal * statesClass_ + c] = 
+         (denom <= 0.0 ? 0.0 : num / denom);
+    }
+  }
+
+  // p(childVal| c, sp1Val, sp2Val)
+  childProbs_.resize(childCounts_.size());
+  int offset = 0;
+  for (int f = 0; f < nFeatures_; f++) {
+    if (f == superParent1_ || f == superParent2_) 
+      continue;
+
+    int fCard = states_[f];
+    int sp1Card_ = states_[superParent1_];
+    int sp2Card_ = states_[superParent2_];
+    int childBlockSizeSp2 = sp2Card_ * fCard * statesClass_;
+    int childBlockSizeF   = fCard * statesClass_;
+
+    int blockSize = fCard * sp1Card_ * sp2Card_ * statesClass_;
+    for (int sp1Val = 0; sp1Val < sp1Card_; sp1Val++) {
+      for (int sp2Val = 0; sp2Val < sp2Card_; sp2Val++) {
+        for (int childVal = 0; childVal < fCard; childVal++) {
+          for (int c = 0; c < statesClass_; c++) {
+            // index in childCounts_ 
+            int idx = offset 
+                    + sp1Val*childBlockSizeSp2 
+                    + sp2Val*childBlockSizeF 
+                    + childVal*statesClass_ 
+                    + c;
+            double num = childCounts_[idx] + alpha_;
+            // denominator is the count of (sp1Val,sp2Val,c) plus alpha * fCard
+            // We can find that by summing childVal dimension, but we already
+            // have it in childCounts_[...] or we can re-check the superparent 
+            // counts if your approach is purely hierarchical. 
+            // Here we'll do it like the XSpode approach: sp1&sp2 are 
+            // conditionally independent given c, so denominators come from 
+            // summing the relevant block or we treat sp1,sp2 as "parents."
+            // A simpler approach: 
+            double sumSp1Sp2C = 0.0;
+            // sum over all childVal:
+            for (int cv = 0; cv < fCard; cv++) {
+              int idx2 = offset
+                       + sp1Val*childBlockSizeSp2
+                       + sp2Val*childBlockSizeF
+                       + cv*statesClass_ + c;
+              sumSp1Sp2C += childCounts_[idx2];
+            }
+            double denom = sumSp1Sp2C + alpha_ * fCard;
+            childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
+          }
+        }
+      }
+    }
+    offset += blockSize;
+  }
+}
+
+// --------------------------------------
+// predict_proba (single instance)
+// --------------------------------------
+std::vector<double> XSp2de::predict_proba(const std::vector<int> &instance) const
+{
+  if (!fitted) {
+    throw std::logic_error(CLASSIFIER_NOT_FITTED);
+  }
+  std::vector<double> probs(statesClass_, 0.0);
+
+  int sp1Val = instance[superParent1_];
+  int sp2Val = instance[superParent2_];
+
+  // Start with p(c) * p(sp1Val| c) * p(sp2Val| c)
+  for (int c = 0; c < statesClass_; c++) {
+    double pC = classPriors_[c];
+    double pSp1C = sp1FeatureProbs_[sp1Val * statesClass_ + c];
+    double pSp2C = sp2FeatureProbs_[sp2Val * statesClass_ + c];
+    probs[c] = pC * pSp1C * pSp2C * initializer_;
+  }
+
+  // Multiply by each child feature f
+  int offset = 0;
+  for (int f = 0; f < nFeatures_; f++) {
+    if (f == superParent1_ || f == superParent2_) 
+      continue;
+
+    int valF = instance[f];
+    int fCard = states_[f];
+    int sp1Card = states_[superParent1_];
+    int sp2Card = states_[superParent2_];
+    int blockSizeSp2 = sp2Card * fCard * statesClass_;
+    int blockSizeF   = fCard * statesClass_;
+
+    // base index for childProbs_ for this child and sp1Val, sp2Val
+    int base = offset 
+             + sp1Val*blockSizeSp2 
+             + sp2Val*blockSizeF 
+             + valF*statesClass_;
+    for (int c = 0; c < statesClass_; c++) {
+      probs[c] *= childProbs_[base + c];
+    }
+    offset += (fCard * sp1Card * sp2Card * statesClass_);
+  }
+
+  // Normalize
+  normalize(probs);
+  return probs;
+}
+
+// --------------------------------------
+// predict_proba (batch)
+// --------------------------------------
+std::vector<std::vector<double>> XSp2de::predict_proba(std::vector<std::vector<int>> &test_data)
+{
+  int test_size = test_data[0].size();  // each feature is test_data[f], size = #samples
+  int sample_size = test_data.size();   // = nFeatures_
+  std::vector<std::vector<double>> probabilities(
+      test_size, std::vector<double>(statesClass_, 0.0));
+
+  // same concurrency approach
+  int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
+  std::vector<std::thread> threads;
+
+  auto worker = [&](const std::vector<std::vector<int>> &samples, 
+                    int begin, 
+                    int chunk, 
+                    int sample_size, 
+                    std::vector<std::vector<double>> &predictions) {
+    std::string threadName =
+      "XSp2de-" + std::to_string(begin) + "-" + std::to_string(chunk);
+#if defined(__linux__)
+    pthread_setname_np(pthread_self(), threadName.c_str());
+#else
+    pthread_setname_np(threadName.c_str());
+#endif
+
+    std::vector<int> instance(sample_size);
+    for (int sample = begin; sample < begin + chunk; ++sample) {
+      for (int feature = 0; feature < sample_size; ++feature) {
+        instance[feature] = samples[feature][sample];
+      }
+      predictions[sample] = predict_proba(instance);
+    }
+    semaphore_.release();
+  };
+
+  for (int begin = 0; begin < test_size; begin += chunk_size) {
+    int chunk = std::min(chunk_size, test_size - begin);
+    semaphore_.acquire();
+    threads.emplace_back(worker, test_data, begin, chunk, sample_size, 
+                         std::ref(probabilities));
+  }
+  for (auto &th : threads) {
+    th.join();
+  }
+  return probabilities;
+}
+
+// --------------------------------------
+// predict (single instance)
+// --------------------------------------
+int XSp2de::predict(const std::vector<int> &instance) const
+{
+  auto p = predict_proba(instance);
+  return static_cast<int>(
+    std::distance(p.begin(), std::max_element(p.begin(), p.end()))
+  );
+}
+
+// --------------------------------------
+// predict (batch of data)
+// --------------------------------------
+std::vector<int> XSp2de::predict(std::vector<std::vector<int>> &test_data)
+{
+  auto probabilities = predict_proba(test_data);
+  std::vector<int> predictions(probabilities.size(), 0);
+
+  for (size_t i = 0; i < probabilities.size(); i++) {
+    predictions[i] = static_cast<int>(
+      std::distance(probabilities[i].begin(), 
+                    std::max_element(probabilities[i].begin(), 
+                                     probabilities[i].end()))
+    );
+  }
+  return predictions;
+}
+
+// --------------------------------------
+// predict (torch::Tensor version)
+// --------------------------------------
+torch::Tensor XSp2de::predict(torch::Tensor &X)
+{
+  auto X_ = TensorUtils::to_matrix(X);
+  auto result_v = predict(X_);
+  return torch::tensor(result_v, torch::kInt32);
+}
+
+// --------------------------------------
+// predict_proba (torch::Tensor version)
+// --------------------------------------
+torch::Tensor XSp2de::predict_proba(torch::Tensor &X)
+{
+  auto X_ = TensorUtils::to_matrix(X);
+  auto result_v = predict_proba(X_);
+  int n_samples = X.size(1);
+  torch::Tensor result =
+    torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
+  for (int i = 0; i < (int)result_v.size(); ++i) {
+    result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
+  }
+  return result;
+}
+
+// --------------------------------------
+// score (torch::Tensor version)
+// --------------------------------------
+float XSp2de::score(torch::Tensor &X, torch::Tensor &y)
+{
+  torch::Tensor y_pred = predict(X);
+  return (y_pred == y).sum().item<float>() / y.size(0);
+}
+
+// --------------------------------------
+// score (vector version)
+// --------------------------------------
+float XSp2de::score(std::vector<std::vector<int>> &X, std::vector<int> &y)
+{
+  auto y_pred = predict(X);
+  int correct = 0;
+  for (size_t i = 0; i < y_pred.size(); ++i) {
+    if (y_pred[i] == y[i]) {
+      correct++;
+    }
+  }
+  return static_cast<float>(correct) / static_cast<float>(y_pred.size());
+}
+
+// --------------------------------------
+// Utility: normalize
+// --------------------------------------
+void XSp2de::normalize(std::vector<double> &v) const
+{
+  double sum = 0.0;
+  for (auto &val : v) {
+    sum += val;
+  }
+  if (sum > 0.0) {
+    for (auto &val : v) {
+      val /= sum;
+    }
+  }
+}
+
+// --------------------------------------
+// to_string
+// --------------------------------------
+std::string XSp2de::to_string() const
+{
+  std::ostringstream oss;
+  oss << "----- XSp2de Model -----\n"
+      << "nFeatures_    = " << nFeatures_    << "\n"
+      << "superParent1_ = " << superParent1_ << "\n"
+      << "superParent2_ = " << superParent2_ << "\n"
+      << "statesClass_  = " << statesClass_  << "\n\n";
+
+  oss << "States: [";
+  for (auto s : states_) oss << s << " ";
+  oss << "]\n";
+
+  oss << "classCounts_:\n";
+  for (auto v : classCounts_) oss << v << " ";
+  oss << "\nclassPriors_:\n";
+  for (auto v : classPriors_) oss << v << " ";
+  oss << "\nsp1FeatureCounts_ (size=" << sp1FeatureCounts_.size() << ")\n";
+  for (auto v : sp1FeatureCounts_) oss << v << " ";
+  oss << "\nsp2FeatureCounts_ (size=" << sp2FeatureCounts_.size() << ")\n";
+  for (auto v : sp2FeatureCounts_) oss << v << " ";
+  oss << "\nchildCounts_ (size=" << childCounts_.size() << ")\n";
+  for (auto v : childCounts_) oss << v << " ";
+
+  oss << "\nchildOffsets_:\n";
+  for (auto c : childOffsets_) oss << c << " ";
+
+  oss << "\n----------------------------------------\n";
+  return oss.str();
+}
+
+// --------------------------------------
+// Some introspection about the graph
+// --------------------------------------
+int XSp2de::getNumberOfNodes() const 
+{
+  // nFeatures + 1 class node
+  return nFeatures_ + 1;
+}
+
+int XSp2de::getClassNumStates() const 
+{ 
+  return statesClass_; 
+}
+
+int XSp2de::getNFeatures() const 
+{ 
+  return nFeatures_; 
+}
+
+int XSp2de::getNumberOfStates() const
+{
+  // purely an example. Possibly you want to sum up actual 
+  // cardinalities or something else. 
+  return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
+}
+
+int XSp2de::getNumberOfEdges() const
+{
+  // In an SPNDE with n=2, for each feature we have edges from class, sp1, sp2. 
+  // So that’s 3*(nFeatures_) edges, minus the ones for the superparents themselves, 
+  // plus the edges from class->superparent1, class->superparent2. 
+  // For a quick approximation:
+  //   - class->sp1, class->sp2 => 2 edges
+  //   - class->child => (nFeatures -2) edges
+  //   - sp1->child, sp2->child => 2*(nFeatures -2) edges
+  // total = 2 + (nFeatures-2) + 2*(nFeatures-2) = 2 + 3*(nFeatures-2) 
+  //         = 3nFeatures - 4 (just an example).
+  // You can adapt to your liking:
+  return 3 * nFeatures_ - 4; 
+}
+
+} // namespace bayesnet
+
--- a/bayesnet/classifiers/XSP2DE.h
+++ b/bayesnet/classifiers/XSP2DE.h
@@ -0,0 +1,75 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef XSP2DE_H
+#define XSP2DE_H
+
+#include "Classifier.h"
+#include "bayesnet/utils/CountingSemaphore.h"
+#include <torch/torch.h>
+#include <vector>
+
+namespace bayesnet {
+
+class XSp2de : public Classifier {
+  public:
+    XSp2de(int spIndex1, int spIndex2);
+    void setHyperparameters(const nlohmann::json &hyperparameters_) override;
+    void fitx(torch::Tensor &X, torch::Tensor &y, torch::Tensor &weights_, const Smoothing_t smoothing);
+    std::vector<double> predict_proba(const std::vector<int> &instance) const;
+    std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>> &test_data) override;
+    int predict(const std::vector<int> &instance) const;
+    std::vector<int> predict(std::vector<std::vector<int>> &test_data) override;
+    torch::Tensor predict(torch::Tensor &X) override;
+    torch::Tensor predict_proba(torch::Tensor &X) override;
+
+    float score(torch::Tensor &X, torch::Tensor &y) override;
+    float score(std::vector<std::vector<int>> &X, std::vector<int> &y) override;
+    std::string to_string() const;
+    std::vector<std::string> graph(const std::string &title) const override {
+        return std::vector<std::string>({title});
+    }
+
+    int getNumberOfNodes() const override;
+    int getNumberOfEdges() const override;
+    int getNFeatures() const;
+    int getClassNumStates() const override;
+    int getNumberOfStates() const override;
+
+  protected:
+    void buildModel(const torch::Tensor &weights) override;
+    void trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) override;
+
+  private:
+    void addSample(const std::vector<int> &instance, double weight);
+    void normalize(std::vector<double> &v) const;
+    void computeProbabilities();
+
+    int superParent1_;
+    int superParent2_;
+    int nFeatures_;
+    int statesClass_;
+    double alpha_;
+    double initializer_;
+
+    std::vector<int> states_;
+    std::vector<double> classCounts_;
+    std::vector<double> classPriors_;
+    std::vector<double> sp1FeatureCounts_, sp1FeatureProbs_;
+    std::vector<double> sp2FeatureCounts_, sp2FeatureProbs_;
+    // childOffsets_[f] will be the offset into childCounts_ for feature f.
+    // If f is either superParent1 or superParent2, childOffsets_[f] = -1
+    std::vector<int> childOffsets_;
+    // For each child f, we store p(x_f | c, sp1Val, sp2Val).  We'll store the raw
+    // counts in childCounts_, and the probabilities in childProbs_, with a
+    // dimension block of size: states_[f]* statesClass_* states_[sp1]* states_[sp2].
+    std::vector<double> childCounts_;
+    std::vector<double> childProbs_;
+    CountingSemaphore &semaphore_;
+};
+
+} // namespace bayesnet
+#endif // XSP2DE_H
--- a/bayesnet/classifiers/XSPODE.cc
+++ b/bayesnet/classifiers/XSPODE.cc
@@ -0,0 +1,450 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <sstream>
+#include <stdexcept>
+#include "XSPODE.h"
+#include "bayesnet/utils/TensorUtils.h"
+
+namespace bayesnet {
+
+  // --------------------------------------
+  // Constructor
+  // --------------------------------------
+  XSpode::XSpode(int spIndex)
+    : superParent_{ spIndex }, nFeatures_{ 0 }, statesClass_{ 0 }, alpha_{ 1.0 },
+    initializer_{ 1.0 }, semaphore_{ CountingSemaphore::getInstance() },
+    Classifier(Network())
+  {
+    validHyperparameters = { "parent" };
+  }
+
+  void XSpode::setHyperparameters(const nlohmann::json& hyperparameters_)
+  {
+    auto hyperparameters = hyperparameters_;
+    if (hyperparameters.contains("parent")) {
+      superParent_ = hyperparameters["parent"];
+      hyperparameters.erase("parent");
+    }
+    Classifier::setHyperparameters(hyperparameters);
+  }
+
+  void XSpode::fitx(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
+  {
+    m = X.size(1);
+    n = X.size(0);
+    dataset = X;
+    buildDataset(y);
+    buildModel(weights_);
+    trainModel(weights_, smoothing);
+    fitted = true;
+  }
+
+  // --------------------------------------
+  // trainModel
+  // --------------------------------------
+  // Initialize storage needed for the super-parent and child features counts and
+  // probs.
+  // --------------------------------------
+  void XSpode::buildModel(const torch::Tensor& weights)
+  {
+    int numInstances = m;
+    nFeatures_ = n;
+
+    // Derive the number of states for each feature and for the class.
+    // (This is just one approach; adapt to match your environment.)
+    // Here, we assume the user also gave us the total #states per feature in e.g.
+    // statesMap. We'll simply reconstruct the integer states_ array. The last
+    // entry is statesClass_.
+    states_.resize(nFeatures_);
+    for (int f = 0; f < nFeatures_; f++) {
+      // Suppose you look up in “statesMap” by the feature name, or read directly
+      // from X. We'll assume states_[f] = max value in X[f] + 1.
+      states_[f] = dataset[f].max().item<int>() + 1;
+    }
+    // For the class: states_.back() = max(y)+1
+    statesClass_ = dataset[-1].max().item<int>() + 1;
+
+    // Initialize counts
+    classCounts_.resize(statesClass_, 0.0);
+    // p(x_sp = spVal | c)
+    // We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].
+    spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0);
+
+    // For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of
+    // childCounts_. childCounts_ will be sized as sum_{child≠sp} (states_[child]
+    // * statesClass_ * states_[sp]). We also need an offset for each child to
+    // index into childCounts_.
+    childOffsets_.resize(nFeatures_, -1);
+    int totalSize = 0;
+    for (int f = 0; f < nFeatures_; f++) {
+      if (f == superParent_)
+        continue; // skip sp
+      childOffsets_[f] = totalSize;
+      // block size for this child's counts: states_[f] * statesClass_ *
+      // states_[superParent_]
+      totalSize += (states_[f] * statesClass_ * states_[superParent_]);
+    }
+    childCounts_.resize(totalSize, 0.0);
+  }
+  // --------------------------------------
+  // buildModel
+  // --------------------------------------
+  //
+  // We only store conditional probabilities for:
+  //   p(x_sp| c)   (the super-parent feature)
+  //   p(x_child| c, x_sp)  for all child ≠ sp
+  //
+  // --------------------------------------
+  void XSpode::trainModel(const torch::Tensor& weights,
+    const bayesnet::Smoothing_t smoothing)
+  {
+    // Accumulate raw counts
+    for (int i = 0; i < m; i++) {
+      std::vector<int> instance(nFeatures_ + 1);
+      for (int f = 0; f < nFeatures_; f++) {
+        instance[f] = dataset[f][i].item<int>();
+      }
+      instance[nFeatures_] = dataset[-1][i].item<int>();
+      addSample(instance, weights[i].item<double>());
+    }
+    switch (smoothing) {
+      case bayesnet::Smoothing_t::ORIGINAL:
+        alpha_ = 1.0 / m;
+        break;
+      case bayesnet::Smoothing_t::LAPLACE:
+        alpha_ = 1.0;
+        break;
+      default:
+        alpha_ = 0.0; // No smoothing
+    }
+    initializer_ = std::numeric_limits<double>::max() /
+      (nFeatures_ * nFeatures_); // for numerical stability
+    // Convert raw counts to probabilities
+    computeProbabilities();
+  }
+
+  // --------------------------------------
+  // addSample
+  // --------------------------------------
+  //
+  // instance has size nFeatures_ + 1, with the class at the end.
+  // We add 1 to the appropriate counters for each (c, superParentVal, childVal).
+  //
+  void XSpode::addSample(const std::vector<int>& instance, double weight)
+  {
+    if (weight <= 0.0)
+      return;
+
+    int c = instance.back();
+    // (A) increment classCounts
+    classCounts_[c] += weight;
+
+    // (B) increment super-parent counts => p(x_sp | c)
+    int spVal = instance[superParent_];
+    spFeatureCounts_[spVal * statesClass_ + c] += weight;
+
+    // (C) increment child counts => p(childVal | c, x_sp)
+    for (int f = 0; f < nFeatures_; f++) {
+      if (f == superParent_)
+        continue;
+      int childVal = instance[f];
+      int offset = childOffsets_[f];
+      // Compute index in childCounts_.
+      // Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]
+      int blockSize = states_[f] * statesClass_;
+      int idx = offset + spVal * blockSize + childVal * statesClass_ + c;
+      childCounts_[idx] += weight;
+    }
+  }
+
+  // --------------------------------------
+  // computeProbabilities
+  // --------------------------------------
+  //
+  // Once all samples are added in COUNTS mode, call this to:
+  //    p(c)
+  //    p(x_sp = spVal | c)
+  //    p(x_child = v | c, x_sp = s_sp)
+  //
+  // --------------------------------------
+  void XSpode::computeProbabilities()
+  {
+    double totalCount =
+      std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);
+
+    // p(c) => classPriors_
+    classPriors_.resize(statesClass_, 0.0);
+    if (totalCount <= 0.0) {
+      // fallback => uniform
+      double unif = 1.0 / static_cast<double>(statesClass_);
+      for (int c = 0; c < statesClass_; c++) {
+        classPriors_[c] = unif;
+      }
+    } else {
+      for (int c = 0; c < statesClass_; c++) {
+        classPriors_[c] =
+          (classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_);
+      }
+    }
+
+    // p(x_sp | c)
+    spFeatureProbs_.resize(spFeatureCounts_.size());
+    // denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ *
+    // (#states of sp)
+    int spCard = states_[superParent_];
+    for (int spVal = 0; spVal < spCard; spVal++) {
+      for (int c = 0; c < statesClass_; c++) {
+        double denom = classCounts_[c] + alpha_ * spCard;
+        double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_;
+        spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom);
+      }
+    }
+
+    // p(x_child | c, x_sp)
+    childProbs_.resize(childCounts_.size());
+    for (int f = 0; f < nFeatures_; f++) {
+      if (f == superParent_)
+        continue;
+      int offset = childOffsets_[f];
+      int childCard = states_[f];
+
+      // For each spVal, c, childVal in childCounts_:
+      for (int spVal = 0; spVal < spCard; spVal++) {
+        for (int childVal = 0; childVal < childCard; childVal++) {
+          for (int c = 0; c < statesClass_; c++) {
+            int idx = offset + spVal * (childCard * statesClass_) +
+              childVal * statesClass_ + c;
+
+            double num = childCounts_[idx] + alpha_;
+            // denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ *
+            // (#states of child)
+            double denom =
+              spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * childCard;
+            childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
+          }
+        }
+      }
+    }
+  }
+
+  // --------------------------------------
+  // predict_proba
+  // --------------------------------------
+  //
+  // For a single instance x of dimension nFeatures_:
+  //  P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).
+  //
+  // --------------------------------------
+  std::vector<double> XSpode::predict_proba(const std::vector<int>& instance) const
+  {
+    if (!fitted) {
+      throw std::logic_error(CLASSIFIER_NOT_FITTED);
+    }
+    std::vector<double> probs(statesClass_, 0.0);
+    // Multiply p(c) × p(x_sp | c)
+    int spVal = instance[superParent_];
+    for (int c = 0; c < statesClass_; c++) {
+      double pc = classPriors_[c];
+      double pSpC = spFeatureProbs_[spVal * statesClass_ + c];
+      probs[c] = pc * pSpC * initializer_;
+    }
+
+    // Multiply by each child’s probability p(x_child | c, x_sp)
+    for (int feature = 0; feature < nFeatures_; feature++) {
+      if (feature == superParent_)
+        continue; // skip sp
+      int sf = instance[feature];
+      int offset = childOffsets_[feature];
+      int childCard = states_[feature]; // not used directly, but for clarity
+      // Index into childProbs_ = offset + spVal*(childCard*statesClass_) +
+      // childVal*statesClass_ + c
+      int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_;
+      for (int c = 0; c < statesClass_; c++) {
+        probs[c] *= childProbs_[base + c];
+      }
+    }
+
+    // Normalize
+    normalize(probs);
+    return probs;
+  }
+  std::vector<std::vector<double>> XSpode::predict_proba(std::vector<std::vector<int>>& test_data)
+  {
+    int test_size = test_data[0].size();
+    int sample_size = test_data.size();
+    auto probabilities = std::vector<std::vector<double>>(
+      test_size, std::vector<double>(statesClass_));
+
+    int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
+    std::vector<std::thread> threads;
+    auto worker = [&](const std::vector<std::vector<int>>& samples, int begin,
+      int chunk, int sample_size,
+      std::vector<std::vector<double>>& predictions) {
+        std::string threadName =
+          "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
+#if defined(__linux__)
+        pthread_setname_np(pthread_self(), threadName.c_str());
+#else
+        pthread_setname_np(threadName.c_str());
+#endif
+
+        std::vector<int> instance(sample_size);
+        for (int sample = begin; sample < begin + chunk; ++sample) {
+          for (int feature = 0; feature < sample_size; ++feature) {
+            instance[feature] = samples[feature][sample];
+          }
+          predictions[sample] = predict_proba(instance);
+        }
+        semaphore_.release();
+      };
+    for (int begin = 0; begin < test_size; begin += chunk_size) {
+      int chunk = std::min(chunk_size, test_size - begin);
+      semaphore_.acquire();
+      threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
+    }
+    for (auto& thread : threads) {
+      thread.join();
+    }
+    return probabilities;
+  }
+
+  // --------------------------------------
+  // Utility: normalize
+  // --------------------------------------
+  void XSpode::normalize(std::vector<double>& v) const
+  {
+    double sum = 0.0;
+    for (auto val : v) {
+      sum += val;
+    }
+    if (sum <= 0.0) {
+      return;
+    }
+    for (auto& val : v) {
+      val /= sum;
+    }
+  }
+
+  // --------------------------------------
+  // representation of the model
+  // --------------------------------------
+  std::string XSpode::to_string() const
+  {
+    std::ostringstream oss;
+    oss << "----- XSpode Model -----" << std::endl
+      << "nFeatures_  = " << nFeatures_ << std::endl
+      << "superParent_ = " << superParent_ << std::endl
+      << "statesClass_ = " << statesClass_ << std::endl
+      << std::endl;
+
+    oss << "States: [";
+    for (int s : states_)
+      oss << s << " ";
+    oss << "]" << std::endl;
+    oss << "classCounts_: [";
+    for (double c : classCounts_)
+      oss << c << " ";
+    oss << "]" << std::endl;
+    oss << "classPriors_: [";
+    for (double c : classPriors_)
+      oss << c << " ";
+    oss << "]" << std::endl;
+    oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << std::endl
+      << "[";
+    for (double c : spFeatureCounts_)
+      oss << c << " ";
+    oss << "]" << std::endl;
+    oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << std::endl
+      << "[";
+    for (double c : spFeatureProbs_)
+      oss << c << " ";
+    oss << "]" << std::endl;
+    oss << "childCounts_: size = " << childCounts_.size() << std::endl << "[";
+    for (double cc : childCounts_)
+      oss << cc << " ";
+    oss << "]" << std::endl;
+
+    for (double cp : childProbs_)
+      oss << cp << " ";
+    oss << "]" << std::endl;
+    oss << "childOffsets_: [";
+    for (int co : childOffsets_)
+      oss << co << " ";
+    oss << "]" << std::endl;
+    oss << std::string(40,'-') << std::endl;
+    return oss.str();
+  }
+  int XSpode::getNumberOfNodes() const { return nFeatures_ + 1; }
+  int XSpode::getClassNumStates() const { return statesClass_; }
+  int XSpode::getNFeatures() const { return nFeatures_; }
+  int XSpode::getNumberOfStates() const
+  {
+    return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
+  }
+  int XSpode::getNumberOfEdges() const
+  {
+    return 2 * nFeatures_ + 1;
+  }
+
+  // ------------------------------------------------------
+  // Predict overrides (classifier interface)
+  // ------------------------------------------------------
+  int XSpode::predict(const std::vector<int>& instance) const
+  {
+    auto p = predict_proba(instance);
+    return static_cast<int>(std::distance(p.begin(), std::max_element(p.begin(), p.end())));
+  }
+  std::vector<int> XSpode::predict(std::vector<std::vector<int>>& test_data)
+  {
+    auto probabilities = predict_proba(test_data);
+    std::vector<int> predictions(probabilities.size(), 0);
+
+    for (size_t i = 0; i < probabilities.size(); i++) {
+      predictions[i] = std::distance(
+        probabilities[i].begin(),
+        std::max_element(probabilities[i].begin(), probabilities[i].end()));
+    }
+    return predictions;
+  }
+  torch::Tensor XSpode::predict(torch::Tensor& X)
+  {
+    auto X_ = TensorUtils::to_matrix(X);
+    auto result_v = predict(X_);
+    return torch::tensor(result_v, torch::kInt32);
+  }
+  torch::Tensor XSpode::predict_proba(torch::Tensor& X)
+  {
+    auto X_ = TensorUtils::to_matrix(X);
+    auto result_v = predict_proba(X_);
+    int n_samples = X.size(1);
+    torch::Tensor result =
+      torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
+    for (int i = 0; i < result_v.size(); ++i) {
+      result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
+    }
+    return result;
+  }
+  float XSpode::score(torch::Tensor& X, torch::Tensor& y)
+  {
+    torch::Tensor y_pred = predict(X);
+    return (y_pred == y).sum().item<float>() / y.size(0);
+  }
+  float XSpode::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
+  {
+    auto y_pred = this->predict(X);
+    int correct = 0;
+    for (int i = 0; i < y_pred.size(); ++i) {
+      if (y_pred[i] == y[i]) {
+        correct++;
+      }
+    }
+    return (double)correct / y_pred.size();
+  }
+} // namespace bayesnet
--- a/bayesnet/classifiers/XSPODE.h
+++ b/bayesnet/classifiers/XSPODE.h
@@ -0,0 +1,76 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef XSPODE_H
+#define XSPODE_H
+
+#include <vector>
+#include <torch/torch.h>
+#include "Classifier.h"
+#include "bayesnet/utils/CountingSemaphore.h"
+
+namespace bayesnet {
+
+    class XSpode : public Classifier {
+    public:
+        explicit XSpode(int spIndex);
+        std::vector<double> predict_proba(const std::vector<int>& instance) const;
+        std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>>& X) override;
+        int predict(const std::vector<int>& instance) const;
+        void normalize(std::vector<double>& v) const;
+        std::string to_string() const;
+        int getNFeatures() const;
+        int getNumberOfNodes() const override;
+        int getNumberOfEdges() const override;
+        int getNumberOfStates() const override;
+        int getClassNumStates() const override;
+        std::vector<int>& getStates();
+        std::vector<std::string> graph(const std::string& title) const override { return std::vector<std::string>({ title }); }
+        void fitx(torch::Tensor& X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing);
+        void setHyperparameters(const nlohmann::json& hyperparameters_) override;
+
+        //
+        // Classifier interface
+        //
+        torch::Tensor predict(torch::Tensor& X) override;
+        std::vector<int> predict(std::vector<std::vector<int>>& X) override;
+        torch::Tensor predict_proba(torch::Tensor& X) override;
+        float score(torch::Tensor& X, torch::Tensor& y) override;
+        float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
+    protected:
+        void buildModel(const torch::Tensor& weights) override;
+        void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
+    private:
+        void addSample(const std::vector<int>& instance, double weight);
+        void computeProbabilities();
+        int superParent_;
+        int nFeatures_;
+        int statesClass_;
+        std::vector<int> states_;          // [states_feat0, ..., states_feat(N-1)] (class not included in this array)
+
+        // Class counts
+        std::vector<double> classCounts_;  // [c], accumulative
+        std::vector<double> classPriors_;  // [c], after normalization
+
+        // For p(x_sp = spVal | c)
+        std::vector<double> spFeatureCounts_; // [spVal * statesClass_ + c]
+        std::vector<double> spFeatureProbs_;  // same shape, after normalization
+
+        // For p(x_child = childVal | x_sp = spVal, c)
+        // childCounts_ is big enough to hold all child features except sp:
+        //   For each child f, we store childOffsets_[f] as the start index, then
+        //   childVal, spVal, c => the data.
+        std::vector<double> childCounts_;
+        std::vector<double> childProbs_;
+        std::vector<int>    childOffsets_;
+
+        double alpha_ = 1.0;
+        double initializer_; // for numerical stability
+        CountingSemaphore& semaphore_;
+    };
+}
+
+#endif // XSPODE_H