// *************************************************************** // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez // SPDX-FileType: SOURCE // SPDX-License-Identifier: MIT // *************************************************************** #include "XSP2DE.h" #include // for pthread_setname_np on linux #include #include #include #include #include #include "bayesnet/utils/TensorUtils.h" namespace bayesnet { // -------------------------------------- // Constructor // -------------------------------------- XSp2de::XSp2de(int spIndex1, int spIndex2) : superParent1_{ spIndex1 } , superParent2_{ spIndex2 } , nFeatures_{0} , statesClass_{0} , alpha_{1.0} , initializer_{1.0} , semaphore_{ CountingSemaphore::getInstance() } , Classifier(Network()) { validHyperparameters = { "parent1", "parent2" }; } // -------------------------------------- // setHyperparameters // -------------------------------------- void XSp2de::setHyperparameters(const nlohmann::json &hyperparameters_) { auto hyperparameters = hyperparameters_; if (hyperparameters.contains("parent1")) { superParent1_ = hyperparameters["parent1"]; hyperparameters.erase("parent1"); } if (hyperparameters.contains("parent2")) { superParent2_ = hyperparameters["parent2"]; hyperparameters.erase("parent2"); } // Hand off anything else to base Classifier Classifier::setHyperparameters(hyperparameters); } // -------------------------------------- // fitx // -------------------------------------- void XSp2de::fitx(torch::Tensor & X, torch::Tensor & y, torch::Tensor & weights_, const Smoothing_t smoothing) { m = X.size(1); // number of samples n = X.size(0); // number of features dataset = X; // Build the dataset in your environment if needed: buildDataset(y); // Construct the data structures needed for counting buildModel(weights_); // Accumulate counts & convert to probabilities trainModel(weights_, smoothing); fitted = true; } // -------------------------------------- // buildModel // -------------------------------------- void XSp2de::buildModel(const torch::Tensor &weights) { nFeatures_ = n; // Derive the number of states for each feature from the dataset // states_[f] = max value in dataset[f] + 1. states_.resize(nFeatures_); for (int f = 0; f < nFeatures_; f++) { // This is naive: we take max in feature f. You might adapt for real data. states_[f] = dataset[f].max().item() + 1; } // Class states: statesClass_ = dataset[-1].max().item() + 1; // Initialize the class counts classCounts_.resize(statesClass_, 0.0); // For sp1 -> p(sp1Val| c) sp1FeatureCounts_.resize(states_[superParent1_] * statesClass_, 0.0); // For sp2 -> p(sp2Val| c) sp2FeatureCounts_.resize(states_[superParent2_] * statesClass_, 0.0); // For child features, we store p(childVal | c, sp1Val, sp2Val). // childCounts_ will hold raw counts. We’ll gather them in one big vector. // We need an offset for each feature. childOffsets_.resize(nFeatures_, -1); int totalSize = 0; for (int f = 0; f < nFeatures_; f++) { if (f == superParent1_ || f == superParent2_) { // skip the superparents childOffsets_[f] = -1; continue; } childOffsets_[f] = totalSize; // block size for a single child f: states_[f] * statesClass_ // * states_[superParent1_] // * states_[superParent2_]. totalSize += (states_[f] * statesClass_ * states_[superParent1_] * states_[superParent2_]); } childCounts_.resize(totalSize, 0.0); } // -------------------------------------- // trainModel // -------------------------------------- void XSp2de::trainModel(const torch::Tensor &weights, const bayesnet::Smoothing_t smoothing) { // Accumulate raw counts for (int i = 0; i < m; i++) { std::vector instance(nFeatures_ + 1); for (int f = 0; f < nFeatures_; f++) { instance[f] = dataset[f][i].item(); } instance[nFeatures_] = dataset[-1][i].item(); // class double w = weights[i].item(); addSample(instance, w); } // Choose alpha based on smoothing: switch (smoothing) { case bayesnet::Smoothing_t::ORIGINAL: alpha_ = 1.0 / m; break; case bayesnet::Smoothing_t::LAPLACE: alpha_ = 1.0; break; default: alpha_ = 0.0; // no smoothing } // Large initializer factor for numerical stability initializer_ = std::numeric_limits::max() / (nFeatures_ * nFeatures_); // Convert raw counts to probabilities computeProbabilities(); } // -------------------------------------- // addSample // -------------------------------------- void XSp2de::addSample(const std::vector &instance, double weight) { if (weight <= 0.0) return; int c = instance.back(); // increment classCounts classCounts_[c] += weight; int sp1Val = instance[superParent1_]; int sp2Val = instance[superParent2_]; // p(sp1|c) sp1FeatureCounts_[sp1Val * statesClass_ + c] += weight; // p(sp2|c) sp2FeatureCounts_[sp2Val * statesClass_ + c] += weight; // p(childVal| c, sp1Val, sp2Val) for (int f = 0; f < nFeatures_; f++) { if (f == superParent1_ || f == superParent2_) continue; int childVal = instance[f]; int offset = childOffsets_[f]; // block layout: // offset + (sp1Val*(states_[sp2_]* states_[f]* statesClass_)) // + (sp2Val*(states_[f]* statesClass_)) // + childVal*(statesClass_) // + c int blockSizeSp2 = states_[superParent2_] * states_[f] * statesClass_; int blockSizeChild = states_[f] * statesClass_; int idx = offset + sp1Val*blockSizeSp2 + sp2Val*blockSizeChild + childVal*statesClass_ + c; childCounts_[idx] += weight; } } // -------------------------------------- // computeProbabilities // -------------------------------------- void XSp2de::computeProbabilities() { double totalCount = std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0); // classPriors_ classPriors_.resize(statesClass_, 0.0); if (totalCount <= 0.0) { // fallback => uniform double unif = 1.0 / static_cast(statesClass_); for (int c = 0; c < statesClass_; c++) { classPriors_[c] = unif; } } else { for (int c = 0; c < statesClass_; c++) { classPriors_[c] = (classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_); } } // p(sp1Val| c) sp1FeatureProbs_.resize(sp1FeatureCounts_.size()); int sp1Card = states_[superParent1_]; for (int spVal = 0; spVal < sp1Card; spVal++) { for (int c = 0; c < statesClass_; c++) { double denom = classCounts_[c] + alpha_ * sp1Card; double num = sp1FeatureCounts_[spVal * statesClass_ + c] + alpha_; sp1FeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom); } } // p(sp2Val| c) sp2FeatureProbs_.resize(sp2FeatureCounts_.size()); int sp2Card = states_[superParent2_]; for (int spVal = 0; spVal < sp2Card; spVal++) { for (int c = 0; c < statesClass_; c++) { double denom = classCounts_[c] + alpha_ * sp2Card; double num = sp2FeatureCounts_[spVal * statesClass_ + c] + alpha_; sp2FeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom); } } // p(childVal| c, sp1Val, sp2Val) childProbs_.resize(childCounts_.size()); int offset = 0; for (int f = 0; f < nFeatures_; f++) { if (f == superParent1_ || f == superParent2_) continue; int fCard = states_[f]; int sp1Card_ = states_[superParent1_]; int sp2Card_ = states_[superParent2_]; int childBlockSizeSp2 = sp2Card_ * fCard * statesClass_; int childBlockSizeF = fCard * statesClass_; int blockSize = fCard * sp1Card_ * sp2Card_ * statesClass_; for (int sp1Val = 0; sp1Val < sp1Card_; sp1Val++) { for (int sp2Val = 0; sp2Val < sp2Card_; sp2Val++) { for (int childVal = 0; childVal < fCard; childVal++) { for (int c = 0; c < statesClass_; c++) { // index in childCounts_ int idx = offset + sp1Val*childBlockSizeSp2 + sp2Val*childBlockSizeF + childVal*statesClass_ + c; double num = childCounts_[idx] + alpha_; // denominator is the count of (sp1Val,sp2Val,c) plus alpha * fCard // We can find that by summing childVal dimension, but we already // have it in childCounts_[...] or we can re-check the superparent // counts if your approach is purely hierarchical. // Here we'll do it like the XSpode approach: sp1&sp2 are // conditionally independent given c, so denominators come from // summing the relevant block or we treat sp1,sp2 as "parents." // A simpler approach: double sumSp1Sp2C = 0.0; // sum over all childVal: for (int cv = 0; cv < fCard; cv++) { int idx2 = offset + sp1Val*childBlockSizeSp2 + sp2Val*childBlockSizeF + cv*statesClass_ + c; sumSp1Sp2C += childCounts_[idx2]; } double denom = sumSp1Sp2C + alpha_ * fCard; childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom); } } } } offset += blockSize; } } // -------------------------------------- // predict_proba (single instance) // -------------------------------------- std::vector XSp2de::predict_proba(const std::vector &instance) const { if (!fitted) { throw std::logic_error(CLASSIFIER_NOT_FITTED); } std::vector probs(statesClass_, 0.0); int sp1Val = instance[superParent1_]; int sp2Val = instance[superParent2_]; // Start with p(c) * p(sp1Val| c) * p(sp2Val| c) for (int c = 0; c < statesClass_; c++) { double pC = classPriors_[c]; double pSp1C = sp1FeatureProbs_[sp1Val * statesClass_ + c]; double pSp2C = sp2FeatureProbs_[sp2Val * statesClass_ + c]; probs[c] = pC * pSp1C * pSp2C * initializer_; } // Multiply by each child feature f int offset = 0; for (int f = 0; f < nFeatures_; f++) { if (f == superParent1_ || f == superParent2_) continue; int valF = instance[f]; int fCard = states_[f]; int sp1Card = states_[superParent1_]; int sp2Card = states_[superParent2_]; int blockSizeSp2 = sp2Card * fCard * statesClass_; int blockSizeF = fCard * statesClass_; // base index for childProbs_ for this child and sp1Val, sp2Val int base = offset + sp1Val*blockSizeSp2 + sp2Val*blockSizeF + valF*statesClass_; for (int c = 0; c < statesClass_; c++) { probs[c] *= childProbs_[base + c]; } offset += (fCard * sp1Card * sp2Card * statesClass_); } // Normalize normalize(probs); return probs; } // -------------------------------------- // predict_proba (batch) // -------------------------------------- std::vector> XSp2de::predict_proba(std::vector> &test_data) { int test_size = test_data[0].size(); // each feature is test_data[f], size = #samples int sample_size = test_data.size(); // = nFeatures_ std::vector> probabilities( test_size, std::vector(statesClass_, 0.0)); // same concurrency approach int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1); std::vector threads; auto worker = [&](const std::vector> &samples, int begin, int chunk, int sample_size, std::vector> &predictions) { std::string threadName = "XSp2de-" + std::to_string(begin) + "-" + std::to_string(chunk); #if defined(__linux__) pthread_setname_np(pthread_self(), threadName.c_str()); #else pthread_setname_np(threadName.c_str()); #endif std::vector instance(sample_size); for (int sample = begin; sample < begin + chunk; ++sample) { for (int feature = 0; feature < sample_size; ++feature) { instance[feature] = samples[feature][sample]; } predictions[sample] = predict_proba(instance); } semaphore_.release(); }; for (int begin = 0; begin < test_size; begin += chunk_size) { int chunk = std::min(chunk_size, test_size - begin); semaphore_.acquire(); threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities)); } for (auto &th : threads) { th.join(); } return probabilities; } // -------------------------------------- // predict (single instance) // -------------------------------------- int XSp2de::predict(const std::vector &instance) const { auto p = predict_proba(instance); return static_cast( std::distance(p.begin(), std::max_element(p.begin(), p.end())) ); } // -------------------------------------- // predict (batch of data) // -------------------------------------- std::vector XSp2de::predict(std::vector> &test_data) { auto probabilities = predict_proba(test_data); std::vector predictions(probabilities.size(), 0); for (size_t i = 0; i < probabilities.size(); i++) { predictions[i] = static_cast( std::distance(probabilities[i].begin(), std::max_element(probabilities[i].begin(), probabilities[i].end())) ); } return predictions; } // -------------------------------------- // predict (torch::Tensor version) // -------------------------------------- torch::Tensor XSp2de::predict(torch::Tensor &X) { auto X_ = TensorUtils::to_matrix(X); auto result_v = predict(X_); return torch::tensor(result_v, torch::kInt32); } // -------------------------------------- // predict_proba (torch::Tensor version) // -------------------------------------- torch::Tensor XSp2de::predict_proba(torch::Tensor &X) { auto X_ = TensorUtils::to_matrix(X); auto result_v = predict_proba(X_); int n_samples = X.size(1); torch::Tensor result = torch::zeros({ n_samples, statesClass_ }, torch::kDouble); for (int i = 0; i < (int)result_v.size(); ++i) { result.index_put_({ i, "..." }, torch::tensor(result_v[i])); } return result; } // -------------------------------------- // score (torch::Tensor version) // -------------------------------------- float XSp2de::score(torch::Tensor &X, torch::Tensor &y) { torch::Tensor y_pred = predict(X); return (y_pred == y).sum().item() / y.size(0); } // -------------------------------------- // score (vector version) // -------------------------------------- float XSp2de::score(std::vector> &X, std::vector &y) { auto y_pred = predict(X); int correct = 0; for (size_t i = 0; i < y_pred.size(); ++i) { if (y_pred[i] == y[i]) { correct++; } } return static_cast(correct) / static_cast(y_pred.size()); } // -------------------------------------- // Utility: normalize // -------------------------------------- void XSp2de::normalize(std::vector &v) const { double sum = 0.0; for (auto &val : v) { sum += val; } if (sum > 0.0) { for (auto &val : v) { val /= sum; } } } // -------------------------------------- // to_string // -------------------------------------- std::string XSp2de::to_string() const { std::ostringstream oss; oss << "----- XSp2de Model -----\n" << "nFeatures_ = " << nFeatures_ << "\n" << "superParent1_ = " << superParent1_ << "\n" << "superParent2_ = " << superParent2_ << "\n" << "statesClass_ = " << statesClass_ << "\n\n"; oss << "States: ["; for (auto s : states_) oss << s << " "; oss << "]\n"; oss << "classCounts_:\n"; for (auto v : classCounts_) oss << v << " "; oss << "\nclassPriors_:\n"; for (auto v : classPriors_) oss << v << " "; oss << "\nsp1FeatureCounts_ (size=" << sp1FeatureCounts_.size() << ")\n"; for (auto v : sp1FeatureCounts_) oss << v << " "; oss << "\nsp2FeatureCounts_ (size=" << sp2FeatureCounts_.size() << ")\n"; for (auto v : sp2FeatureCounts_) oss << v << " "; oss << "\nchildCounts_ (size=" << childCounts_.size() << ")\n"; for (auto v : childCounts_) oss << v << " "; oss << "\nchildOffsets_:\n"; for (auto c : childOffsets_) oss << c << " "; oss << "\n----------------------------------------\n"; return oss.str(); } // -------------------------------------- // Some introspection about the graph // -------------------------------------- int XSp2de::getNumberOfNodes() const { // nFeatures + 1 class node return nFeatures_ + 1; } int XSp2de::getClassNumStates() const { return statesClass_; } int XSp2de::getNFeatures() const { return nFeatures_; } int XSp2de::getNumberOfStates() const { // purely an example. Possibly you want to sum up actual // cardinalities or something else. return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_; } int XSp2de::getNumberOfEdges() const { // In an SPNDE with n=2, for each feature we have edges from class, sp1, sp2. // So that’s 3*(nFeatures_) edges, minus the ones for the superparents themselves, // plus the edges from class->superparent1, class->superparent2. // For a quick approximation: // - class->sp1, class->sp2 => 2 edges // - class->child => (nFeatures -2) edges // - sp1->child, sp2->child => 2*(nFeatures -2) edges // total = 2 + (nFeatures-2) + 2*(nFeatures-2) = 2 + 3*(nFeatures-2) // = 3nFeatures - 4 (just an example). // You can adapt to your liking: return 3 * nFeatures_ - 4; } } // namespace bayesnet