BayesNet_vcpkg/bayesnet/classifiers/XSPODE.cc

// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
#include <algorithm>
#include <cmath>
#include <limits>
#include <numeric>
#include <sstream>
#include <stdexcept>
#include "XSPODE.h"
#include "bayesnet/utils/TensorUtils.h"

namespace bayesnet {

  // --------------------------------------
  // Constructor
  // --------------------------------------
  XSpode::XSpode(int spIndex)
    : superParent_{ spIndex }, nFeatures_{ 0 }, statesClass_{ 0 }, alpha_{ 1.0 },
    initializer_{ 1.0 }, semaphore_{ CountingSemaphore::getInstance() },
    Classifier(Network())
  {
    validHyperparameters = { "parent" };
  }

  void XSpode::setHyperparameters(const nlohmann::json& hyperparameters_)
  {
    auto hyperparameters = hyperparameters_;
    if (hyperparameters.contains("parent")) {
      superParent_ = hyperparameters["parent"];
      hyperparameters.erase("parent");
    }
    Classifier::setHyperparameters(hyperparameters);
  }

  void XSpode::fitx(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
  {
    m = X.size(1);
    n = X.size(0);
    dataset = X;
    buildDataset(y);
    buildModel(weights_);
    trainModel(weights_, smoothing);
    fitted = true;
  }

  // --------------------------------------
  // trainModel
  // --------------------------------------
  // Initialize storage needed for the super-parent and child features counts and
  // probs.
  // --------------------------------------
  void XSpode::buildModel(const torch::Tensor& weights)
  {
    int numInstances = m;
    nFeatures_ = n;

    // Derive the number of states for each feature and for the class.
    // (This is just one approach; adapt to match your environment.)
    // Here, we assume the user also gave us the total #states per feature in e.g.
    // statesMap. We'll simply reconstruct the integer states_ array. The last
    // entry is statesClass_.
    states_.resize(nFeatures_);
    for (int f = 0; f < nFeatures_; f++) {
      // Suppose you look up in “statesMap” by the feature name, or read directly
      // from X. We'll assume states_[f] = max value in X[f] + 1.
      states_[f] = dataset[f].max().item<int>() + 1;
    }
    // For the class: states_.back() = max(y)+1
    statesClass_ = dataset[-1].max().item<int>() + 1;

    // Initialize counts
    classCounts_.resize(statesClass_, 0.0);
    // p(x_sp = spVal | c)
    // We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].
    spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0);

    // For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of
    // childCounts_. childCounts_ will be sized as sum_{child≠sp} (states_[child]
    // * statesClass_ * states_[sp]). We also need an offset for each child to
    // index into childCounts_.
    childOffsets_.resize(nFeatures_, -1);
    int totalSize = 0;
    for (int f = 0; f < nFeatures_; f++) {
      if (f == superParent_)
        continue; // skip sp
      childOffsets_[f] = totalSize;
      // block size for this child's counts: states_[f] * statesClass_ *
      // states_[superParent_]
      totalSize += (states_[f] * statesClass_ * states_[superParent_]);
    }
    childCounts_.resize(totalSize, 0.0);
  }
  // --------------------------------------
  // buildModel
  // --------------------------------------
  //
  // We only store conditional probabilities for:
  //   p(x_sp| c)   (the super-parent feature)
  //   p(x_child| c, x_sp)  for all child ≠ sp
  //
  // --------------------------------------
  void XSpode::trainModel(const torch::Tensor& weights,
    const bayesnet::Smoothing_t smoothing)
  {
    // Accumulate raw counts
    for (int i = 0; i < m; i++) {
      std::vector<int> instance(nFeatures_ + 1);
      for (int f = 0; f < nFeatures_; f++) {
        instance[f] = dataset[f][i].item<int>();
      }
      instance[nFeatures_] = dataset[-1][i].item<int>();
      addSample(instance, weights[i].item<double>());
    }
    switch (smoothing) {
      case bayesnet::Smoothing_t::ORIGINAL:
        alpha_ = 1.0 / m;
        break;
      case bayesnet::Smoothing_t::LAPLACE:
        alpha_ = 1.0;
        break;
      default:
        alpha_ = 0.0; // No smoothing
    }
    initializer_ = std::numeric_limits<double>::max() /
      (nFeatures_ * nFeatures_); // for numerical stability
    // Convert raw counts to probabilities
    computeProbabilities();
  }

  // --------------------------------------
  // addSample
  // --------------------------------------
  //
  // instance has size nFeatures_ + 1, with the class at the end.
  // We add 1 to the appropriate counters for each (c, superParentVal, childVal).
  //
  void XSpode::addSample(const std::vector<int>& instance, double weight)
  {
    if (weight <= 0.0)
      return;

    int c = instance.back();
    // (A) increment classCounts
    classCounts_[c] += weight;

    // (B) increment super-parent counts => p(x_sp | c)
    int spVal = instance[superParent_];
    spFeatureCounts_[spVal * statesClass_ + c] += weight;

    // (C) increment child counts => p(childVal | c, x_sp)
    for (int f = 0; f < nFeatures_; f++) {
      if (f == superParent_)
        continue;
      int childVal = instance[f];
      int offset = childOffsets_[f];
      // Compute index in childCounts_.
      // Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]
      int blockSize = states_[f] * statesClass_;
      int idx = offset + spVal * blockSize + childVal * statesClass_ + c;
      childCounts_[idx] += weight;
    }
  }

  // --------------------------------------
  // computeProbabilities
  // --------------------------------------
  //
  // Once all samples are added in COUNTS mode, call this to:
  //    p(c)
  //    p(x_sp = spVal | c)
  //    p(x_child = v | c, x_sp = s_sp)
  //
  // --------------------------------------
  void XSpode::computeProbabilities()
  {
    double totalCount =
      std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);

    // p(c) => classPriors_
    classPriors_.resize(statesClass_, 0.0);
    if (totalCount <= 0.0) {
      // fallback => uniform
      double unif = 1.0 / static_cast<double>(statesClass_);
      for (int c = 0; c < statesClass_; c++) {
        classPriors_[c] = unif;
      }
    } else {
      for (int c = 0; c < statesClass_; c++) {
        classPriors_[c] =
          (classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_);
      }
    }

    // p(x_sp | c)
    spFeatureProbs_.resize(spFeatureCounts_.size());
    // denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ *
    // (#states of sp)
    int spCard = states_[superParent_];
    for (int spVal = 0; spVal < spCard; spVal++) {
      for (int c = 0; c < statesClass_; c++) {
        double denom = classCounts_[c] + alpha_ * spCard;
        double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_;
        spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom);
      }
    }

    // p(x_child | c, x_sp)
    childProbs_.resize(childCounts_.size());
    for (int f = 0; f < nFeatures_; f++) {
      if (f == superParent_)
        continue;
      int offset = childOffsets_[f];
      int childCard = states_[f];

      // For each spVal, c, childVal in childCounts_:
      for (int spVal = 0; spVal < spCard; spVal++) {
        for (int childVal = 0; childVal < childCard; childVal++) {
          for (int c = 0; c < statesClass_; c++) {
            int idx = offset + spVal * (childCard * statesClass_) +
              childVal * statesClass_ + c;

            double num = childCounts_[idx] + alpha_;
            // denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ *
            // (#states of child)
            double denom =
              spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * childCard;
            childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
          }
        }
      }
    }
  }

  // --------------------------------------
  // predict_proba
  // --------------------------------------
  //
  // For a single instance x of dimension nFeatures_:
  //  P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).
  //
  // --------------------------------------
  std::vector<double> XSpode::predict_proba(const std::vector<int>& instance) const
  {
    if (!fitted) {
      throw std::logic_error(CLASSIFIER_NOT_FITTED);
    }
    std::vector<double> probs(statesClass_, 0.0);
    // Multiply p(c) × p(x_sp | c)
    int spVal = instance[superParent_];
    for (int c = 0; c < statesClass_; c++) {
      double pc = classPriors_[c];
      double pSpC = spFeatureProbs_[spVal * statesClass_ + c];
      probs[c] = pc * pSpC * initializer_;
    }

    // Multiply by each child’s probability p(x_child | c, x_sp)
    for (int feature = 0; feature < nFeatures_; feature++) {
      if (feature == superParent_)
        continue; // skip sp
      int sf = instance[feature];
      int offset = childOffsets_[feature];
      int childCard = states_[feature]; // not used directly, but for clarity
      // Index into childProbs_ = offset + spVal*(childCard*statesClass_) +
      // childVal*statesClass_ + c
      int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_;
      for (int c = 0; c < statesClass_; c++) {
        probs[c] *= childProbs_[base + c];
      }
    }

    // Normalize
    normalize(probs);
    return probs;
  }
  std::vector<std::vector<double>> XSpode::predict_proba(std::vector<std::vector<int>>& test_data)
  {
    int test_size = test_data[0].size();
    int sample_size = test_data.size();
    auto probabilities = std::vector<std::vector<double>>(
      test_size, std::vector<double>(statesClass_));

    int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
    std::vector<std::thread> threads;
    auto worker = [&](const std::vector<std::vector<int>>& samples, int begin,
      int chunk, int sample_size,
      std::vector<std::vector<double>>& predictions) {
        std::string threadName =
          "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
#if defined(__linux__)
        pthread_setname_np(pthread_self(), threadName.c_str());
#else
        pthread_setname_np(threadName.c_str());
#endif

        std::vector<int> instance(sample_size);
        for (int sample = begin; sample < begin + chunk; ++sample) {
          for (int feature = 0; feature < sample_size; ++feature) {
            instance[feature] = samples[feature][sample];
          }
          predictions[sample] = predict_proba(instance);
        }
        semaphore_.release();
      };
    for (int begin = 0; begin < test_size; begin += chunk_size) {
      int chunk = std::min(chunk_size, test_size - begin);
      semaphore_.acquire();
      threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
    }
    for (auto& thread : threads) {
      thread.join();
    }
    return probabilities;
  }

  // --------------------------------------
  // Utility: normalize
  // --------------------------------------
  void XSpode::normalize(std::vector<double>& v) const
  {
    double sum = 0.0;
    for (auto val : v) {
      sum += val;
    }
    if (sum <= 0.0) {
      return;
    }
    for (auto& val : v) {
      val /= sum;
    }
  }

  // --------------------------------------
  // representation of the model
  // --------------------------------------
  std::string XSpode::to_string() const
  {
    std::ostringstream oss;
    oss << "----- XSpode Model -----" << std::endl
      << "nFeatures_  = " << nFeatures_ << std::endl
      << "superParent_ = " << superParent_ << std::endl
      << "statesClass_ = " << statesClass_ << std::endl
      << std::endl;

    oss << "States: [";
    for (int s : states_)
      oss << s << " ";
    oss << "]" << std::endl;
    oss << "classCounts_: [";
    for (double c : classCounts_)
      oss << c << " ";
    oss << "]" << std::endl;
    oss << "classPriors_: [";
    for (double c : classPriors_)
      oss << c << " ";
    oss << "]" << std::endl;
    oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << std::endl
      << "[";
    for (double c : spFeatureCounts_)
      oss << c << " ";
    oss << "]" << std::endl;
    oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << std::endl
      << "[";
    for (double c : spFeatureProbs_)
      oss << c << " ";
    oss << "]" << std::endl;
    oss << "childCounts_: size = " << childCounts_.size() << std::endl << "[";
    for (double cc : childCounts_)
      oss << cc << " ";
    oss << "]" << std::endl;

    for (double cp : childProbs_)
      oss << cp << " ";
    oss << "]" << std::endl;
    oss << "childOffsets_: [";
    for (int co : childOffsets_)
      oss << co << " ";
    oss << "]" << std::endl;
    oss << std::string(40,'-') << std::endl;
    return oss.str();
  }
  int XSpode::getNumberOfNodes() const { return nFeatures_ + 1; }
  int XSpode::getClassNumStates() const { return statesClass_; }
  int XSpode::getNFeatures() const { return nFeatures_; }
  int XSpode::getNumberOfStates() const
  {
    return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
  }
  int XSpode::getNumberOfEdges() const
  {
    return 2 * nFeatures_ + 1;
  }

  // ------------------------------------------------------
  // Predict overrides (classifier interface)
  // ------------------------------------------------------
  int XSpode::predict(const std::vector<int>& instance) const
  {
    auto p = predict_proba(instance);
    return static_cast<int>(std::distance(p.begin(), std::max_element(p.begin(), p.end())));
  }
  std::vector<int> XSpode::predict(std::vector<std::vector<int>>& test_data)
  {
    auto probabilities = predict_proba(test_data);
    std::vector<int> predictions(probabilities.size(), 0);

    for (size_t i = 0; i < probabilities.size(); i++) {
      predictions[i] = std::distance(
        probabilities[i].begin(),
        std::max_element(probabilities[i].begin(), probabilities[i].end()));
    }
    return predictions;
  }
  torch::Tensor XSpode::predict(torch::Tensor& X)
  {
    auto X_ = TensorUtils::to_matrix(X);
    auto result_v = predict(X_);
    return torch::tensor(result_v, torch::kInt32);
  }
  torch::Tensor XSpode::predict_proba(torch::Tensor& X)
  {
    auto X_ = TensorUtils::to_matrix(X);
    auto result_v = predict_proba(X_);
    int n_samples = X.size(1);
    torch::Tensor result =
      torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
    for (int i = 0; i < result_v.size(); ++i) {
      result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
    }
    return result;
  }
  float XSpode::score(torch::Tensor& X, torch::Tensor& y)
  {
    torch::Tensor y_pred = predict(X);
    return (y_pred == y).sum().item<float>() / y.size(0);
  }
  float XSpode::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
  {
    auto y_pred = this->predict(X);
    int correct = 0;
    for (int i = 0; i < y_pred.size(); ++i) {
      if (y_pred[i] == y[i]) {
        correct++;
      }
    }
    return (double)correct / y_pred.size();
  }
} // namespace bayesnet