Add SPnDE n=2

2025-03-13 10:58:43 +01:00
parent 4ded6f51eb
commit c234308701
7 changed files with 775 additions and 16 deletions
--- a/bayesnet/classifiers/SPnDE.cc
+++ b/bayesnet/classifiers/SPnDE.cc
@@ -35,4 +35,4 @@ namespace bayesnet {
        return model.graph(name);
    }

-}
+}
--- a/bayesnet/classifiers/XSPnDE.cc
+++ b/bayesnet/classifiers/XSPnDE.cc
@@ -0,0 +1,575 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include "XSPnDE.h"
+#include <pthread.h>   // for pthread_setname_np on linux
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <stdexcept>
+#include <iostream>
+#include "bayesnet/utils/TensorUtils.h"
+
+namespace bayesnet {
+
+// --------------------------------------
+// Constructor
+// --------------------------------------
+XSpnde::XSpnde(int spIndex1, int spIndex2)
+  : superParent1_{ spIndex1 }
+  , superParent2_{ spIndex2 }
+  , nFeatures_{0}
+  , statesClass_{0}
+  , alpha_{1.0}
+  , initializer_{1.0}
+  , semaphore_{ CountingSemaphore::getInstance() }
+  , Classifier(Network())
+{
+  validHyperparameters = { "parent1", "parent2" };
+}
+
+// --------------------------------------
+// setHyperparameters
+// --------------------------------------
+void XSpnde::setHyperparameters(const nlohmann::json &hyperparameters_)
+{
+  auto hyperparameters = hyperparameters_;
+  if (hyperparameters.contains("parent1")) {
+    superParent1_ = hyperparameters["parent1"];
+    hyperparameters.erase("parent1");
+  }
+  if (hyperparameters.contains("parent2")) {
+    superParent2_ = hyperparameters["parent2"];
+    hyperparameters.erase("parent2");
+  }
+  // Hand off anything else to base Classifier
+  Classifier::setHyperparameters(hyperparameters);
+}
+
+// --------------------------------------
+// fitx
+// --------------------------------------
+void XSpnde::fitx(torch::Tensor & X, torch::Tensor & y, 
+                  torch::Tensor & weights_, const Smoothing_t smoothing)
+{
+  m = X.size(1);  // number of samples
+  n = X.size(0);  // number of features
+  dataset = X;
+
+  // Build the dataset in your environment if needed:
+  buildDataset(y);
+
+  // Construct the data structures needed for counting
+  buildModel(weights_);
+
+  // Accumulate counts & convert to probabilities
+  trainModel(weights_, smoothing);
+  fitted = true;
+}
+
+// --------------------------------------
+// buildModel
+// --------------------------------------
+void XSpnde::buildModel(const torch::Tensor &weights)
+{
+  nFeatures_ = n;
+
+  // Derive the number of states for each feature from the dataset
+  // states_[f] = max value in dataset[f] + 1.
+  states_.resize(nFeatures_);
+  for (int f = 0; f < nFeatures_; f++) {
+    // This is naive: we take max in feature f. You might adapt for real data.
+    states_[f] = dataset[f].max().item<int>() + 1;
+  }
+  // Class states:
+  statesClass_ = dataset[-1].max().item<int>() + 1;
+
+  // Initialize the class counts
+  classCounts_.resize(statesClass_, 0.0);
+
+  // For sp1 -> p(sp1Val| c)
+  sp1FeatureCounts_.resize(states_[superParent1_] * statesClass_, 0.0);
+
+  // For sp2 -> p(sp2Val| c)
+  sp2FeatureCounts_.resize(states_[superParent2_] * statesClass_, 0.0);
+
+  // For child features, we store p(childVal | c, sp1Val, sp2Val).
+  // childCounts_ will hold raw counts. We’ll gather them in one big vector.
+  // We need an offset for each feature.
+  childOffsets_.resize(nFeatures_, -1);
+
+  int totalSize = 0;
+  for (int f = 0; f < nFeatures_; f++) {
+    if (f == superParent1_ || f == superParent2_) {
+      // skip the superparents
+      childOffsets_[f] = -1;
+      continue;
+    }
+    childOffsets_[f] = totalSize;
+    // block size for a single child f: states_[f] * statesClass_ 
+    //                               * states_[superParent1_] 
+    //                               * states_[superParent2_].
+    totalSize += (states_[f] * statesClass_ 
+                  * states_[superParent1_] 
+                  * states_[superParent2_]);
+  }
+  childCounts_.resize(totalSize, 0.0);
+}
+
+// --------------------------------------
+// trainModel
+// --------------------------------------
+void XSpnde::trainModel(const torch::Tensor &weights, 
+                        const bayesnet::Smoothing_t smoothing)
+{
+  // Accumulate raw counts
+  for (int i = 0; i < m; i++) {
+    std::vector<int> instance(nFeatures_ + 1);
+    for (int f = 0; f < nFeatures_; f++) {
+      instance[f] = dataset[f][i].item<int>();
+    }
+    instance[nFeatures_] = dataset[-1][i].item<int>();  // class
+    double w = weights[i].item<double>();
+    addSample(instance, w);
+  }
+
+  // Choose alpha based on smoothing:
+  switch (smoothing) {
+    case bayesnet::Smoothing_t::ORIGINAL:
+      alpha_ = 1.0 / m;
+      break;
+    case bayesnet::Smoothing_t::LAPLACE:
+      alpha_ = 1.0;
+      break;
+    default:
+      alpha_ = 0.0; // no smoothing
+  }
+
+  // Large initializer factor for numerical stability
+  initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
+
+  // Convert raw counts to probabilities
+  computeProbabilities();
+}
+
+// --------------------------------------
+// addSample
+// --------------------------------------
+void XSpnde::addSample(const std::vector<int> &instance, double weight)
+{
+  if (weight <= 0.0)
+    return;
+
+  int c = instance.back();
+  // increment classCounts
+  classCounts_[c] += weight;
+
+  int sp1Val = instance[superParent1_];
+  int sp2Val = instance[superParent2_];
+
+  // p(sp1|c)
+  sp1FeatureCounts_[sp1Val * statesClass_ + c] += weight;
+
+  // p(sp2|c)
+  sp2FeatureCounts_[sp2Val * statesClass_ + c] += weight;
+
+  // p(childVal| c, sp1Val, sp2Val)
+  for (int f = 0; f < nFeatures_; f++) {
+    if (f == superParent1_ || f == superParent2_)
+      continue;
+
+    int childVal = instance[f];
+    int offset = childOffsets_[f];
+    // block layout: 
+    //    offset + (sp1Val*(states_[sp2_]* states_[f]* statesClass_)) 
+    //            + (sp2Val*(states_[f]* statesClass_)) 
+    //            + childVal*(statesClass_) 
+    //            + c
+    int blockSizeSp2 = states_[superParent2_] 
+                       * states_[f] 
+                       * statesClass_;
+    int blockSizeChild = states_[f] * statesClass_;
+
+    int idx = offset 
+            + sp1Val*blockSizeSp2 
+            + sp2Val*blockSizeChild 
+            + childVal*statesClass_ 
+            + c;
+    childCounts_[idx] += weight;
+  }
+}
+
+// --------------------------------------
+// computeProbabilities
+// --------------------------------------
+void XSpnde::computeProbabilities()
+{
+  double totalCount = std::accumulate(classCounts_.begin(), 
+                                      classCounts_.end(), 0.0);
+
+  // classPriors_
+  classPriors_.resize(statesClass_, 0.0);
+  if (totalCount <= 0.0) {
+    // fallback => uniform
+    double unif = 1.0 / static_cast<double>(statesClass_);
+    for (int c = 0; c < statesClass_; c++) {
+      classPriors_[c] = unif;
+    }
+  } else {
+    for (int c = 0; c < statesClass_; c++) {
+      classPriors_[c] = 
+        (classCounts_[c] + alpha_) 
+        / (totalCount + alpha_ * statesClass_);
+    }
+  }
+
+  // p(sp1Val| c)
+  sp1FeatureProbs_.resize(sp1FeatureCounts_.size());
+  int sp1Card = states_[superParent1_];
+  for (int spVal = 0; spVal < sp1Card; spVal++) {
+    for (int c = 0; c < statesClass_; c++) {
+      double denom = classCounts_[c] + alpha_ * sp1Card;
+      double num = sp1FeatureCounts_[spVal * statesClass_ + c] + alpha_;
+      sp1FeatureProbs_[spVal * statesClass_ + c] = 
+         (denom <= 0.0 ? 0.0 : num / denom);
+    }
+  }
+
+  // p(sp2Val| c)
+  sp2FeatureProbs_.resize(sp2FeatureCounts_.size());
+  int sp2Card = states_[superParent2_];
+  for (int spVal = 0; spVal < sp2Card; spVal++) {
+    for (int c = 0; c < statesClass_; c++) {
+      double denom = classCounts_[c] + alpha_ * sp2Card;
+      double num = sp2FeatureCounts_[spVal * statesClass_ + c] + alpha_;
+      sp2FeatureProbs_[spVal * statesClass_ + c] = 
+         (denom <= 0.0 ? 0.0 : num / denom);
+    }
+  }
+
+  // p(childVal| c, sp1Val, sp2Val)
+  childProbs_.resize(childCounts_.size());
+  int offset = 0;
+  for (int f = 0; f < nFeatures_; f++) {
+    if (f == superParent1_ || f == superParent2_) 
+      continue;
+
+    int fCard = states_[f];
+    int sp1Card_ = states_[superParent1_];
+    int sp2Card_ = states_[superParent2_];
+    int childBlockSizeSp2 = sp2Card_ * fCard * statesClass_;
+    int childBlockSizeF   = fCard * statesClass_;
+
+    int blockSize = fCard * sp1Card_ * sp2Card_ * statesClass_;
+    for (int sp1Val = 0; sp1Val < sp1Card_; sp1Val++) {
+      for (int sp2Val = 0; sp2Val < sp2Card_; sp2Val++) {
+        for (int childVal = 0; childVal < fCard; childVal++) {
+          for (int c = 0; c < statesClass_; c++) {
+            // index in childCounts_ 
+            int idx = offset 
+                    + sp1Val*childBlockSizeSp2 
+                    + sp2Val*childBlockSizeF 
+                    + childVal*statesClass_ 
+                    + c;
+            double num = childCounts_[idx] + alpha_;
+            // denominator is the count of (sp1Val,sp2Val,c) plus alpha * fCard
+            // We can find that by summing childVal dimension, but we already
+            // have it in childCounts_[...] or we can re-check the superparent 
+            // counts if your approach is purely hierarchical. 
+            // Here we'll do it like the XSpode approach: sp1&sp2 are 
+            // conditionally independent given c, so denominators come from 
+            // summing the relevant block or we treat sp1,sp2 as "parents."
+            // A simpler approach: 
+            double sumSp1Sp2C = 0.0;
+            // sum over all childVal:
+            for (int cv = 0; cv < fCard; cv++) {
+              int idx2 = offset
+                       + sp1Val*childBlockSizeSp2
+                       + sp2Val*childBlockSizeF
+                       + cv*statesClass_ + c;
+              sumSp1Sp2C += childCounts_[idx2];
+            }
+            double denom = sumSp1Sp2C + alpha_ * fCard;
+            childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
+          }
+        }
+      }
+    }
+    offset += blockSize;
+  }
+}
+
+// --------------------------------------
+// predict_proba (single instance)
+// --------------------------------------
+std::vector<double> XSpnde::predict_proba(const std::vector<int> &instance) const
+{
+  if (!fitted) {
+    throw std::logic_error(CLASSIFIER_NOT_FITTED);
+  }
+  std::vector<double> probs(statesClass_, 0.0);
+
+  int sp1Val = instance[superParent1_];
+  int sp2Val = instance[superParent2_];
+
+  // Start with p(c) * p(sp1Val| c) * p(sp2Val| c)
+  for (int c = 0; c < statesClass_; c++) {
+    double pC = classPriors_[c];
+    double pSp1C = sp1FeatureProbs_[sp1Val * statesClass_ + c];
+    double pSp2C = sp2FeatureProbs_[sp2Val * statesClass_ + c];
+    probs[c] = pC * pSp1C * pSp2C * initializer_;
+  }
+
+  // Multiply by each child feature f
+  int offset = 0;
+  for (int f = 0; f < nFeatures_; f++) {
+    if (f == superParent1_ || f == superParent2_) 
+      continue;
+
+    int valF = instance[f];
+    int fCard = states_[f];
+    int sp1Card = states_[superParent1_];
+    int sp2Card = states_[superParent2_];
+    int blockSizeSp2 = sp2Card * fCard * statesClass_;
+    int blockSizeF   = fCard * statesClass_;
+
+    // base index for childProbs_ for this child and sp1Val, sp2Val
+    int base = offset 
+             + sp1Val*blockSizeSp2 
+             + sp2Val*blockSizeF 
+             + valF*statesClass_;
+    for (int c = 0; c < statesClass_; c++) {
+      probs[c] *= childProbs_[base + c];
+    }
+    offset += (fCard * sp1Card * sp2Card * statesClass_);
+  }
+
+  // Normalize
+  normalize(probs);
+  return probs;
+}
+
+// --------------------------------------
+// predict_proba (batch)
+// --------------------------------------
+std::vector<std::vector<double>> XSpnde::predict_proba(std::vector<std::vector<int>> &test_data)
+{
+  int test_size = test_data[0].size();  // each feature is test_data[f], size = #samples
+  int sample_size = test_data.size();   // = nFeatures_
+  std::vector<std::vector<double>> probabilities(
+      test_size, std::vector<double>(statesClass_, 0.0));
+
+  // same concurrency approach
+  int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
+  std::vector<std::thread> threads;
+
+  auto worker = [&](const std::vector<std::vector<int>> &samples, 
+                    int begin, 
+                    int chunk, 
+                    int sample_size, 
+                    std::vector<std::vector<double>> &predictions) {
+    std::string threadName =
+      "XSpnde-" + std::to_string(begin) + "-" + std::to_string(chunk);
+#if defined(__linux__)
+    pthread_setname_np(pthread_self(), threadName.c_str());
+#else
+    pthread_setname_np(threadName.c_str());
+#endif
+
+    std::vector<int> instance(sample_size);
+    for (int sample = begin; sample < begin + chunk; ++sample) {
+      for (int feature = 0; feature < sample_size; ++feature) {
+        instance[feature] = samples[feature][sample];
+      }
+      predictions[sample] = predict_proba(instance);
+    }
+    semaphore_.release();
+  };
+
+  for (int begin = 0; begin < test_size; begin += chunk_size) {
+    int chunk = std::min(chunk_size, test_size - begin);
+    semaphore_.acquire();
+    threads.emplace_back(worker, test_data, begin, chunk, sample_size, 
+                         std::ref(probabilities));
+  }
+  for (auto &th : threads) {
+    th.join();
+  }
+  return probabilities;
+}
+
+// --------------------------------------
+// predict (single instance)
+// --------------------------------------
+int XSpnde::predict(const std::vector<int> &instance) const
+{
+  auto p = predict_proba(instance);
+  return static_cast<int>(
+    std::distance(p.begin(), std::max_element(p.begin(), p.end()))
+  );
+}
+
+// --------------------------------------
+// predict (batch of data)
+// --------------------------------------
+std::vector<int> XSpnde::predict(std::vector<std::vector<int>> &test_data)
+{
+  auto probabilities = predict_proba(test_data);
+  std::vector<int> predictions(probabilities.size(), 0);
+
+  for (size_t i = 0; i < probabilities.size(); i++) {
+    predictions[i] = static_cast<int>(
+      std::distance(probabilities[i].begin(), 
+                    std::max_element(probabilities[i].begin(), 
+                                     probabilities[i].end()))
+    );
+  }
+  return predictions;
+}
+
+// --------------------------------------
+// predict (torch::Tensor version)
+// --------------------------------------
+torch::Tensor XSpnde::predict(torch::Tensor &X)
+{
+  auto X_ = TensorUtils::to_matrix(X);
+  auto result_v = predict(X_);
+  return torch::tensor(result_v, torch::kInt32);
+}
+
+// --------------------------------------
+// predict_proba (torch::Tensor version)
+// --------------------------------------
+torch::Tensor XSpnde::predict_proba(torch::Tensor &X)
+{
+  auto X_ = TensorUtils::to_matrix(X);
+  auto result_v = predict_proba(X_);
+  int n_samples = X.size(1);
+  torch::Tensor result =
+    torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
+  for (int i = 0; i < (int)result_v.size(); ++i) {
+    result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
+  }
+  return result;
+}
+
+// --------------------------------------
+// score (torch::Tensor version)
+// --------------------------------------
+float XSpnde::score(torch::Tensor &X, torch::Tensor &y)
+{
+  torch::Tensor y_pred = predict(X);
+  return (y_pred == y).sum().item<float>() / y.size(0);
+}
+
+// --------------------------------------
+// score (vector version)
+// --------------------------------------
+float XSpnde::score(std::vector<std::vector<int>> &X, std::vector<int> &y)
+{
+  auto y_pred = predict(X);
+  int correct = 0;
+  for (size_t i = 0; i < y_pred.size(); ++i) {
+    if (y_pred[i] == y[i]) {
+      correct++;
+    }
+  }
+  return static_cast<float>(correct) / static_cast<float>(y_pred.size());
+}
+
+// --------------------------------------
+// Utility: normalize
+// --------------------------------------
+void XSpnde::normalize(std::vector<double> &v) const
+{
+  double sum = 0.0;
+  for (auto &val : v) {
+    sum += val;
+  }
+  if (sum > 0.0) {
+    for (auto &val : v) {
+      val /= sum;
+    }
+  }
+}
+
+// --------------------------------------
+// to_string
+// --------------------------------------
+std::string XSpnde::to_string() const
+{
+  std::ostringstream oss;
+  oss << "----- XSpnde Model -----\n"
+      << "nFeatures_    = " << nFeatures_    << "\n"
+      << "superParent1_ = " << superParent1_ << "\n"
+      << "superParent2_ = " << superParent2_ << "\n"
+      << "statesClass_  = " << statesClass_  << "\n\n";
+
+  oss << "States: [";
+  for (auto s : states_) oss << s << " ";
+  oss << "]\n";
+
+  oss << "classCounts_:\n";
+  for (auto v : classCounts_) oss << v << " ";
+  oss << "\nclassPriors_:\n";
+  for (auto v : classPriors_) oss << v << " ";
+  oss << "\nsp1FeatureCounts_ (size=" << sp1FeatureCounts_.size() << ")\n";
+  for (auto v : sp1FeatureCounts_) oss << v << " ";
+  oss << "\nsp2FeatureCounts_ (size=" << sp2FeatureCounts_.size() << ")\n";
+  for (auto v : sp2FeatureCounts_) oss << v << " ";
+  oss << "\nchildCounts_ (size=" << childCounts_.size() << ")\n";
+  for (auto v : childCounts_) oss << v << " ";
+
+  oss << "\nchildOffsets_:\n";
+  for (auto c : childOffsets_) oss << c << " ";
+
+  oss << "\n----------------------------------------\n";
+  return oss.str();
+}
+
+// --------------------------------------
+// Some introspection about the graph
+// --------------------------------------
+int XSpnde::getNumberOfNodes() const 
+{
+  // nFeatures + 1 class node
+  return nFeatures_ + 1;
+}
+
+int XSpnde::getClassNumStates() const 
+{ 
+  return statesClass_; 
+}
+
+int XSpnde::getNFeatures() const 
+{ 
+  return nFeatures_; 
+}
+
+int XSpnde::getNumberOfStates() const
+{
+  // purely an example. Possibly you want to sum up actual 
+  // cardinalities or something else. 
+  return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
+}
+
+int XSpnde::getNumberOfEdges() const
+{
+  // In an SPNDE with n=2, for each feature we have edges from class, sp1, sp2. 
+  // So that’s 3*(nFeatures_) edges, minus the ones for the superparents themselves, 
+  // plus the edges from class->superparent1, class->superparent2. 
+  // For a quick approximation:
+  //   - class->sp1, class->sp2 => 2 edges
+  //   - class->child => (nFeatures -2) edges
+  //   - sp1->child, sp2->child => 2*(nFeatures -2) edges
+  // total = 2 + (nFeatures-2) + 2*(nFeatures-2) = 2 + 3*(nFeatures-2) 
+  //         = 3nFeatures - 4 (just an example).
+  // You can adapt to your liking:
+  return 3 * nFeatures_ - 4; 
+}
+
+} // namespace bayesnet
+
--- a/bayesnet/classifiers/XSPnDE.h
+++ b/bayesnet/classifiers/XSPnDE.h
@@ -0,0 +1,75 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef XSPNDE_H
+#define XSPNDE_H
+
+#include "Classifier.h"
+#include "bayesnet/utils/CountingSemaphore.h"
+#include <torch/torch.h>
+#include <vector>
+
+namespace bayesnet {
+
+class XSpnde : public Classifier {
+  public:
+    XSpnde(int spIndex1, int spIndex2);
+    void setHyperparameters(const nlohmann::json &hyperparameters_) override;
+    void fitx(torch::Tensor &X, torch::Tensor &y, torch::Tensor &weights_, const Smoothing_t smoothing);
+    std::vector<double> predict_proba(const std::vector<int> &instance) const;
+    std::vector<std::vector<double>> predict_proba(std::vector<std::vector<int>> &test_data) override;
+    int predict(const std::vector<int> &instance) const;
+    std::vector<int> predict(std::vector<std::vector<int>> &test_data) override;
+    torch::Tensor predict(torch::Tensor &X) override;
+    torch::Tensor predict_proba(torch::Tensor &X) override;
+
+    float score(torch::Tensor &X, torch::Tensor &y) override;
+    float score(std::vector<std::vector<int>> &X, std::vector<int> &y) override;
+    std::string to_string() const;
+    std::vector<std::string> graph(const std::string &title) const override {
+        return std::vector<std::string>({title});
+    }
+
+    int getNumberOfNodes() const override;
+    int getNumberOfEdges() const override;
+    int getNFeatures() const;
+    int getClassNumStates() const override;
+    int getNumberOfStates() const override;
+
+  protected:
+    void buildModel(const torch::Tensor &weights) override;
+    void trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) override;
+
+  private:
+    void addSample(const std::vector<int> &instance, double weight);
+    void normalize(std::vector<double> &v) const;
+    void computeProbabilities();
+
+    int superParent1_;
+    int superParent2_;
+    int nFeatures_;
+    int statesClass_;
+    double alpha_;
+    double initializer_;
+
+    std::vector<int> states_;
+    std::vector<double> classCounts_;
+    std::vector<double> classPriors_;
+    std::vector<double> sp1FeatureCounts_, sp1FeatureProbs_;
+    std::vector<double> sp2FeatureCounts_, sp2FeatureProbs_;
+    // childOffsets_[f] will be the offset into childCounts_ for feature f.
+    // If f is either superParent1 or superParent2, childOffsets_[f] = -1
+    std::vector<int> childOffsets_;
+    // For each child f, we store p(x_f | c, sp1Val, sp2Val).  We'll store the raw
+    // counts in childCounts_, and the probabilities in childProbs_, with a
+    // dimension block of size: states_[f]* statesClass_* states_[sp1]* states_[sp2].
+    std::vector<double> childCounts_;
+    std::vector<double> childProbs_;
+    CountingSemaphore &semaphore_;
+};
+
+} // namespace bayesnet
+#endif // XSPNDE_H