Add XBAODE & XSpode classifiers
This commit is contained in:
379
bayesnet/classifiers/XSPODE.cc
Normal file
379
bayesnet/classifiers/XSPODE.cc
Normal file
@@ -0,0 +1,379 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#include "XSPODE.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace bayesnet {
|
||||||
|
|
||||||
|
// --------------------------------------
|
||||||
|
// Constructor
|
||||||
|
// --------------------------------------
|
||||||
|
XSpode::XSpode(int spIndex)
|
||||||
|
: superParent_{ spIndex },
|
||||||
|
nFeatures_{ 0 },
|
||||||
|
statesClass_{ 0 },
|
||||||
|
alpha_{ 1.0 },
|
||||||
|
initializer_{ 1.0 },
|
||||||
|
semaphore_{ CountingSemaphore::getInstance() }, Classifier(Network())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void XSpode::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, torch::Tensor& weights_, const Smoothing_t smoothing)
|
||||||
|
{
|
||||||
|
m = X[0].size();
|
||||||
|
n = X.size();
|
||||||
|
buildModel(weights_);
|
||||||
|
trainModel(weights_, smoothing);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------
|
||||||
|
// trainModel
|
||||||
|
// --------------------------------------
|
||||||
|
// Initialize storage needed for the super-parent and child features counts and probs.
|
||||||
|
// --------------------------------------
|
||||||
|
void XSpode::buildModel(const torch::Tensor& weights)
|
||||||
|
{
|
||||||
|
int numInstances = m;
|
||||||
|
nFeatures_ = n;
|
||||||
|
|
||||||
|
// Derive the number of states for each feature and for the class.
|
||||||
|
// (This is just one approach; adapt to match your environment.)
|
||||||
|
// Here, we assume the user also gave us the total #states per feature in e.g. statesMap.
|
||||||
|
// We'll simply reconstruct the integer states_ array. The last entry is statesClass_.
|
||||||
|
states_.resize(nFeatures_);
|
||||||
|
for (int f = 0; f < nFeatures_; f++) {
|
||||||
|
// Suppose you look up in “statesMap” by the feature name, or read directly from X.
|
||||||
|
// We'll assume states_[f] = max value in X[f] + 1.
|
||||||
|
states_[f] = dataset[f].max().item<int>() + 1;
|
||||||
|
}
|
||||||
|
// For the class: states_.back() = max(y)+1
|
||||||
|
statesClass_ = dataset[-1].max().item<int>() + 1;
|
||||||
|
|
||||||
|
// Initialize counts
|
||||||
|
classCounts_.resize(statesClass_, 0.0);
|
||||||
|
// p(x_sp = spVal | c)
|
||||||
|
// We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].
|
||||||
|
spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0);
|
||||||
|
|
||||||
|
// For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of childCounts_.
|
||||||
|
// childCounts_ will be sized as sum_{child≠sp} (states_[child] * statesClass_ * states_[sp]).
|
||||||
|
// We also need an offset for each child to index into childCounts_.
|
||||||
|
childOffsets_.resize(nFeatures_, -1);
|
||||||
|
int totalSize = 0;
|
||||||
|
for (int f = 0; f < nFeatures_; f++) {
|
||||||
|
if (f == superParent_) continue; // skip sp
|
||||||
|
childOffsets_[f] = totalSize;
|
||||||
|
// block size for this child's counts: states_[f] * statesClass_ * states_[superParent_]
|
||||||
|
totalSize += (states_[f] * statesClass_ * states_[superParent_]);
|
||||||
|
}
|
||||||
|
childCounts_.resize(totalSize, 0.0);
|
||||||
|
}
|
||||||
|
// --------------------------------------
|
||||||
|
// buildModel
|
||||||
|
// --------------------------------------
|
||||||
|
//
|
||||||
|
// We only store conditional probabilities for:
|
||||||
|
// p(x_sp| c) (the super-parent feature)
|
||||||
|
// p(x_child| c, x_sp) for all child ≠ sp
|
||||||
|
//
|
||||||
|
// --------------------------------------
|
||||||
|
void XSpode::trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
|
||||||
|
{
|
||||||
|
// Accumulate raw counts
|
||||||
|
for (int i = 0; i < m; i++) {
|
||||||
|
std::vector<int> instance(nFeatures_ + 1);
|
||||||
|
for (int f = 0; f < nFeatures_; f++) {
|
||||||
|
instance[f] = dataset[f][i].item<int>();
|
||||||
|
}
|
||||||
|
instance[nFeatures_] = dataset[-1].item<int>();
|
||||||
|
addSample(instance, weights[i].item<double>());
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (smoothing) {
|
||||||
|
case bayesnet::Smoothing_t::ORIGINAL:
|
||||||
|
alpha_ = 1.0 / m;
|
||||||
|
break;
|
||||||
|
case bayesnet::Smoothing_t::LAPLACE:
|
||||||
|
alpha_ = 1.0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
alpha_ = 0.0; // No smoothing
|
||||||
|
}
|
||||||
|
initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_); // for numerical stability
|
||||||
|
// Convert raw counts to probabilities
|
||||||
|
computeProbabilities();
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------
|
||||||
|
// addSample
|
||||||
|
// --------------------------------------
|
||||||
|
//
|
||||||
|
// instance has size nFeatures_ + 1, with the class at the end.
|
||||||
|
// We add 1 to the appropriate counters for each (c, superParentVal, childVal).
|
||||||
|
//
|
||||||
|
void XSpode::addSample(const std::vector<int>& instance, double weight)
|
||||||
|
{
|
||||||
|
if (weight <= 0.0) return;
|
||||||
|
|
||||||
|
int c = instance.back();
|
||||||
|
// (A) increment classCounts
|
||||||
|
classCounts_[c] += weight;
|
||||||
|
|
||||||
|
// (B) increment super-parent counts => p(x_sp | c)
|
||||||
|
int spVal = instance[superParent_];
|
||||||
|
spFeatureCounts_[spVal * statesClass_ + c] += weight;
|
||||||
|
|
||||||
|
// (C) increment child counts => p(childVal | c, x_sp)
|
||||||
|
for (int f = 0; f < nFeatures_; f++) {
|
||||||
|
if (f == superParent_) continue;
|
||||||
|
int childVal = instance[f];
|
||||||
|
int offset = childOffsets_[f];
|
||||||
|
// Compute index in childCounts_.
|
||||||
|
// Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]
|
||||||
|
int blockSize = states_[f] * statesClass_;
|
||||||
|
int idx = offset + spVal * blockSize + childVal * statesClass_ + c;
|
||||||
|
childCounts_[idx] += weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------
|
||||||
|
// computeProbabilities
|
||||||
|
// --------------------------------------
|
||||||
|
//
|
||||||
|
// Once all samples are added in COUNTS mode, call this to:
|
||||||
|
// p(c)
|
||||||
|
// p(x_sp = spVal | c)
|
||||||
|
// p(x_child = v | c, x_sp = s_sp)
|
||||||
|
//
|
||||||
|
// --------------------------------------
|
||||||
|
void XSpode::computeProbabilities()
|
||||||
|
{
|
||||||
|
double totalCount = std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);
|
||||||
|
|
||||||
|
// p(c) => classPriors_
|
||||||
|
classPriors_.resize(statesClass_, 0.0);
|
||||||
|
if (totalCount <= 0.0) {
|
||||||
|
// fallback => uniform
|
||||||
|
double unif = 1.0 / static_cast<double>(statesClass_);
|
||||||
|
for (int c = 0; c < statesClass_; c++) {
|
||||||
|
classPriors_[c] = unif;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int c = 0; c < statesClass_; c++) {
|
||||||
|
classPriors_[c] = (classCounts_[c] + alpha_)
|
||||||
|
/ (totalCount + alpha_ * statesClass_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// p(x_sp | c)
|
||||||
|
spFeatureProbs_.resize(spFeatureCounts_.size());
|
||||||
|
// denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ * (#states of sp)
|
||||||
|
int spCard = states_[superParent_];
|
||||||
|
for (int spVal = 0; spVal < spCard; spVal++) {
|
||||||
|
for (int c = 0; c < statesClass_; c++) {
|
||||||
|
double denom = classCounts_[c] + alpha_ * spCard;
|
||||||
|
double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_;
|
||||||
|
spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// p(x_child | c, x_sp)
|
||||||
|
childProbs_.resize(childCounts_.size());
|
||||||
|
for (int f = 0; f < nFeatures_; f++) {
|
||||||
|
if (f == superParent_) continue;
|
||||||
|
int offset = childOffsets_[f];
|
||||||
|
int childCard = states_[f];
|
||||||
|
|
||||||
|
// For each spVal, c, childVal in childCounts_:
|
||||||
|
for (int spVal = 0; spVal < spCard; spVal++) {
|
||||||
|
for (int childVal = 0; childVal < childCard; childVal++) {
|
||||||
|
for (int c = 0; c < statesClass_; c++) {
|
||||||
|
int idx = offset + spVal * (childCard * statesClass_)
|
||||||
|
+ childVal * statesClass_
|
||||||
|
+ c;
|
||||||
|
|
||||||
|
double num = childCounts_[idx] + alpha_;
|
||||||
|
// denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * (#states of child)
|
||||||
|
double denom = spFeatureCounts_[spVal * statesClass_ + c]
|
||||||
|
+ alpha_ * childCard;
|
||||||
|
childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------
|
||||||
|
// predict_proba
|
||||||
|
// --------------------------------------
|
||||||
|
//
|
||||||
|
// For a single instance x of dimension nFeatures_:
|
||||||
|
// P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).
|
||||||
|
//
|
||||||
|
// --------------------------------------
|
||||||
|
std::vector<double> XSpode::predict_proba(const std::vector<int>& instance) const
|
||||||
|
{
|
||||||
|
std::vector<double> probs(statesClass_, 0.0);
|
||||||
|
|
||||||
|
// Multiply p(c) × p(x_sp | c)
|
||||||
|
int spVal = instance[superParent_];
|
||||||
|
for (int c = 0; c < statesClass_; c++) {
|
||||||
|
double pc = classPriors_[c];
|
||||||
|
double pSpC = spFeatureProbs_[spVal * statesClass_ + c];
|
||||||
|
probs[c] = pc * pSpC * initializer_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiply by each child’s probability p(x_child | c, x_sp)
|
||||||
|
for (int feature = 0; feature < nFeatures_; feature++) {
|
||||||
|
if (feature == superParent_) continue; // skip sp
|
||||||
|
int sf = instance[feature];
|
||||||
|
int offset = childOffsets_[feature];
|
||||||
|
int childCard = states_[feature]; // not used directly, but for clarity
|
||||||
|
// Index into childProbs_ = offset + spVal*(childCard*statesClass_) + childVal*statesClass_ + c
|
||||||
|
int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_;
|
||||||
|
for (int c = 0; c < statesClass_; c++) {
|
||||||
|
probs[c] *= childProbs_[base + c];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize
|
||||||
|
normalize(probs);
|
||||||
|
return probs;
|
||||||
|
}
|
||||||
|
std::vector<std::vector<double>> XSpode::predict_proba(const std::vector<std::vector<int>>& test_data)
|
||||||
|
{
|
||||||
|
int test_size = test_data[0].size();
|
||||||
|
int sample_size = test_data.size();
|
||||||
|
auto probabilities = std::vector<std::vector<double>>(test_size, std::vector<double>(statesClass_));
|
||||||
|
|
||||||
|
int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
auto worker = [&](const std::vector<std::vector<int>>& samples, int begin, int chunk, int sample_size, std::vector<std::vector<double>>& predictions) {
|
||||||
|
std::string threadName = "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
|
||||||
|
#if defined(__linux__)
|
||||||
|
pthread_setname_np(pthread_self(), threadName.c_str());
|
||||||
|
#else
|
||||||
|
pthread_setname_np(threadName.c_str());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::vector<int> instance(sample_size);
|
||||||
|
for (int sample = begin; sample < begin + chunk; ++sample) {
|
||||||
|
for (int feature = 0; feature < sample_size; ++feature) {
|
||||||
|
instance[feature] = samples[feature][sample];
|
||||||
|
}
|
||||||
|
predictions[sample] = predict_proba(instance);
|
||||||
|
}
|
||||||
|
semaphore_.release();
|
||||||
|
};
|
||||||
|
for (int begin = 0; begin < test_size; begin += chunk_size) {
|
||||||
|
int chunk = std::min(chunk_size, test_size - begin);
|
||||||
|
semaphore_.acquire();
|
||||||
|
threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
|
||||||
|
}
|
||||||
|
for (auto& thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
return probabilities;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------
|
||||||
|
// predict
|
||||||
|
// --------------------------------------
|
||||||
|
//
|
||||||
|
// Return the class argmax( P(c|x) ).
|
||||||
|
// --------------------------------------
|
||||||
|
int XSpode::predict(const std::vector<int>& instance) const
|
||||||
|
{
|
||||||
|
auto p = predict_proba(instance);
|
||||||
|
return static_cast<int>(std::distance(p.begin(),
|
||||||
|
std::max_element(p.begin(), p.end())));
|
||||||
|
}
|
||||||
|
std::vector<int> XSpode::predict(std::vector<std::vector<int>>& test_data)
|
||||||
|
{
|
||||||
|
if (!fitted) {
|
||||||
|
throw std::logic_error(CLASSIFIER_NOT_FITTED);
|
||||||
|
}
|
||||||
|
auto probabilities = predict_proba(test_data);
|
||||||
|
std::vector<int> predictions(probabilities.size(), 0);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < probabilities.size(); i++) {
|
||||||
|
predictions[i] = std::distance(probabilities[i].begin(), std::max_element(probabilities[i].begin(), probabilities[i].end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return predictions;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------
|
||||||
|
// Utility: normalize
|
||||||
|
// --------------------------------------
|
||||||
|
void XSpode::normalize(std::vector<double>& v) const
|
||||||
|
{
|
||||||
|
double sum = 0.0;
|
||||||
|
for (auto val : v) { sum += val; }
|
||||||
|
if (sum <= 0.0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (auto& val : v) {
|
||||||
|
val /= sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------
|
||||||
|
// representation of the model
|
||||||
|
// --------------------------------------
|
||||||
|
std::string XSpode::to_string() const
|
||||||
|
{
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "---- SPODE Model ----" << std::endl
|
||||||
|
<< "nFeatures_ = " << nFeatures_ << std::endl
|
||||||
|
<< "superParent_ = " << superParent_ << std::endl
|
||||||
|
<< "statesClass_ = " << statesClass_ << std::endl
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
oss << "States: [";
|
||||||
|
for (int s : states_) oss << s << " ";
|
||||||
|
oss << "]" << std::endl;
|
||||||
|
oss << "classCounts_: [";
|
||||||
|
for (double c : classCounts_) oss << c << " ";
|
||||||
|
oss << "]" << std::endl;
|
||||||
|
oss << "classPriors_: [";
|
||||||
|
for (double c : classPriors_) oss << c << " ";
|
||||||
|
oss << "]" << std::endl;
|
||||||
|
oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << std::endl << "[";
|
||||||
|
for (double c : spFeatureCounts_) oss << c << " ";
|
||||||
|
oss << "]" << std::endl;
|
||||||
|
oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << std::endl << "[";
|
||||||
|
for (double c : spFeatureProbs_) oss << c << " ";
|
||||||
|
oss << "]" << std::endl;
|
||||||
|
oss << "childCounts_: size = " << childCounts_.size() << std::endl << "[";
|
||||||
|
for (double cc : childCounts_) oss << cc << " ";
|
||||||
|
oss << "]" << std::endl;
|
||||||
|
|
||||||
|
for (double cp : childProbs_) oss << cp << " ";
|
||||||
|
oss << "]" << std::endl;
|
||||||
|
oss << "childOffsets_: [";
|
||||||
|
for (int co : childOffsets_) oss << co << " ";
|
||||||
|
oss << "]" << std::endl;
|
||||||
|
oss << "---------------------" << std::endl;
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
int XSpode::getNumberOfNodes() const { return nFeatures_ + 1; }
|
||||||
|
int XSpode::getClassNumStates() const { return statesClass_; }
|
||||||
|
int XSpode::getNFeatures() const { return nFeatures_; }
|
||||||
|
int XSpode::getNumberOfStates() const
|
||||||
|
{
|
||||||
|
return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
|
||||||
|
}
|
||||||
|
int XSpode::getNumberOfEdges() const
|
||||||
|
{
|
||||||
|
return nFeatures_ * (2 * nFeatures_ - 1);
|
||||||
|
}
|
||||||
|
std::vector<int>& XSpode::getStates() { return states_; }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
79
bayesnet/classifiers/XSPODE.h
Normal file
79
bayesnet/classifiers/XSPODE.h
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#ifndef XSPODE_H
|
||||||
|
#define XSPODE_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <numeric>
|
||||||
|
#include <string>
|
||||||
|
#include <cmath>
|
||||||
|
#include <limits>
|
||||||
|
#include <sstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include "Classifier.h"
|
||||||
|
#include "bayesnet/utils/CountingSemaphore.h"
|
||||||
|
|
||||||
|
namespace bayesnet {
|
||||||
|
|
||||||
|
class XSpode : public Classifier {
|
||||||
|
public:
|
||||||
|
explicit XSpode(int spIndex);
|
||||||
|
std::vector<double> predict_proba(const std::vector<int>& instance) const;
|
||||||
|
std::vector<std::vector<double>> predict_proba(const std::vector<std::vector<int>>& test_data);
|
||||||
|
int predict(const std::vector<int>& instance) const;
|
||||||
|
std::vector<int> predict(std::vector<std::vector<int>>& test_data);
|
||||||
|
void normalize(std::vector<double>& v) const;
|
||||||
|
std::string to_string() const;
|
||||||
|
int statesClass() const;
|
||||||
|
int getNFeatures() const;
|
||||||
|
int getNumberOfNodes() const override;
|
||||||
|
int getNumberOfEdges() const override;
|
||||||
|
int getNumberOfStates() const override;
|
||||||
|
int getClassNumStates() const override;
|
||||||
|
std::vector<int>& getStates();
|
||||||
|
std::vector<std::string> graph(const std::string& title) const override { return std::vector<std::string>({title}); }
|
||||||
|
void fit(std::vector<std::vector<int>>& X, std::vector<int>& y, torch::Tensor& weights_, const Smoothing_t smoothing);
|
||||||
|
protected:
|
||||||
|
void buildModel(const torch::Tensor& weights) override;
|
||||||
|
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
||||||
|
private:
|
||||||
|
void addSample(const std::vector<int>& instance, double weight);
|
||||||
|
void computeProbabilities();
|
||||||
|
int superParent_;
|
||||||
|
int nFeatures_;
|
||||||
|
int statesClass_;
|
||||||
|
std::vector<int> states_; // [states_feat0, ..., states_feat(N-1)] (class not included in this array)
|
||||||
|
|
||||||
|
const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted";
|
||||||
|
|
||||||
|
// Class counts
|
||||||
|
std::vector<double> classCounts_; // [c], accumulative
|
||||||
|
std::vector<double> classPriors_; // [c], after normalization
|
||||||
|
|
||||||
|
// For p(x_sp = spVal | c)
|
||||||
|
std::vector<double> spFeatureCounts_; // [spVal * statesClass_ + c]
|
||||||
|
std::vector<double> spFeatureProbs_; // same shape, after normalization
|
||||||
|
|
||||||
|
// For p(x_child = childVal | x_sp = spVal, c)
|
||||||
|
// childCounts_ is big enough to hold all child features except sp:
|
||||||
|
// For each child f, we store childOffsets_[f] as the start index, then
|
||||||
|
// childVal, spVal, c => the data.
|
||||||
|
std::vector<double> childCounts_;
|
||||||
|
std::vector<double> childProbs_;
|
||||||
|
std::vector<int> childOffsets_;
|
||||||
|
|
||||||
|
double alpha_ = 1.0;
|
||||||
|
double initializer_; // for numerical stability
|
||||||
|
CountingSemaphore& semaphore_;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // XSPODE_H
|
@@ -4,7 +4,6 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
// ***************************************************************
|
// ***************************************************************
|
||||||
#include "Ensemble.h"
|
#include "Ensemble.h"
|
||||||
#include "bayesnet/utils/CountingSemaphore.h"
|
|
||||||
|
|
||||||
namespace bayesnet {
|
namespace bayesnet {
|
||||||
|
|
||||||
|
179
bayesnet/ensembles/XBAODE.cc
Normal file
179
bayesnet/ensembles/XBAODE.cc
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
#include <random>
|
||||||
|
#include <set>
|
||||||
|
#include <functional>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <tuple>
|
||||||
|
#include "XBAODE.h"
|
||||||
|
#include "bayesnet/classifiers/XSPODE.h"
|
||||||
|
#include "bayesnet/utils/TensorUtils.hpp"
|
||||||
|
|
||||||
|
namespace bayesnet {
|
||||||
|
XBAODE::XBAODE()
|
||||||
|
{
|
||||||
|
validHyperparameters = { "alpha_block", "order", "convergence", "convergence_best", "bisection", "threshold", "maxTolerance",
|
||||||
|
"predict_voting", "select_features" };
|
||||||
|
}
|
||||||
|
void XBAODE::add_model(std::unique_ptr<Classifier> model, double significance)
|
||||||
|
{
|
||||||
|
models.push_back(std::move(model));
|
||||||
|
n_models++;
|
||||||
|
significanceModels.push_back(significance);
|
||||||
|
}
|
||||||
|
void XBAODE::remove_last_model()
|
||||||
|
{
|
||||||
|
models.pop_back();
|
||||||
|
significanceModels.pop_back();
|
||||||
|
n_models--;
|
||||||
|
}
|
||||||
|
std::vector<int> XBAODE::initializeModels(const Smoothing_t smoothing)
|
||||||
|
{
|
||||||
|
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||||
|
std::vector<int> featuresSelected = featureSelection(weights_);
|
||||||
|
for (const int& feature : featuresSelected) {
|
||||||
|
std::unique_ptr<Classifier> model = std::make_unique<XSpode>(feature);
|
||||||
|
model->fit(dataset, features, className, states, weights_, smoothing);
|
||||||
|
add_model(std::move(model), 1.0);
|
||||||
|
}
|
||||||
|
notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm);
|
||||||
|
return featuresSelected;
|
||||||
|
}
|
||||||
|
void XBAODE::trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing)
|
||||||
|
{
|
||||||
|
X_train_ = TensorUtils::to_matrix(X_train);
|
||||||
|
y_train_ = TensorUtils::to_vector<int>(y_train);
|
||||||
|
X_test_ = TensorUtils::to_matrix(X_test);
|
||||||
|
y_test_ = TensorUtils::to_vector<int>(y_test);
|
||||||
|
significanceModels.resize(n, 0.0); // n initialized in Classifier.cc
|
||||||
|
fitted = true;
|
||||||
|
double alpha_t;
|
||||||
|
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
|
||||||
|
bool finished = false;
|
||||||
|
std::vector<int> featuresUsed;
|
||||||
|
n_models = 0;
|
||||||
|
if (selectFeatures) {
|
||||||
|
featuresUsed = initializeModels(smoothing);
|
||||||
|
auto ypred = predict(X_train_);
|
||||||
|
auto ypred_t = torch::tensor(ypred);
|
||||||
|
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
|
||||||
|
// Update significance of the models
|
||||||
|
for (const int& feature : featuresUsed) {
|
||||||
|
significanceModels.pop_back();
|
||||||
|
}
|
||||||
|
for (const int& feature : featuresUsed) {
|
||||||
|
significanceModels.push_back(alpha_t);
|
||||||
|
}
|
||||||
|
// VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models);
|
||||||
|
if (finished) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int numItemsPack = 0; // The counter of the models inserted in the current pack
|
||||||
|
// Variables to control the accuracy finish condition
|
||||||
|
double priorAccuracy = 0.0;
|
||||||
|
double improvement = 1.0;
|
||||||
|
double convergence_threshold = 1e-4;
|
||||||
|
int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold
|
||||||
|
// Step 0: Set the finish condition
|
||||||
|
// epsilon sub t > 0.5 => inverse the weights_ policy
|
||||||
|
// validation error is not decreasing
|
||||||
|
// run out of features
|
||||||
|
bool ascending = order_algorithm == bayesnet::Orders.ASC;
|
||||||
|
std::mt19937 g{ 173 };
|
||||||
|
while (!finished) {
|
||||||
|
// Step 1: Build ranking with mutual information
|
||||||
|
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
|
||||||
|
if (order_algorithm == bayesnet::Orders.RAND) {
|
||||||
|
std::shuffle(featureSelection.begin(), featureSelection.end(), g);
|
||||||
|
}
|
||||||
|
// Remove used features
|
||||||
|
featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(), [&](auto x)
|
||||||
|
{ return std::find(featuresUsed.begin(), featuresUsed.end(), x) != featuresUsed.end();}),
|
||||||
|
featureSelection.end()
|
||||||
|
);
|
||||||
|
int k = bisection ? pow(2, tolerance) : 1;
|
||||||
|
int counter = 0; // The model counter of the current pack
|
||||||
|
// VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size());
|
||||||
|
while (counter++ < k && featureSelection.size() > 0) {
|
||||||
|
auto feature = featureSelection[0];
|
||||||
|
featureSelection.erase(featureSelection.begin());
|
||||||
|
std::unique_ptr<Classifier> model;
|
||||||
|
model = std::make_unique<XSpode>(feature);
|
||||||
|
dynamic_cast<XSpode*>(model.get())->fit(X_train_, y_train_, weights_, smoothing); // using exclusive XSpode fit method
|
||||||
|
std::vector<int> ypred;
|
||||||
|
if (alpha_block) {
|
||||||
|
//
|
||||||
|
// Compute the prediction with the current ensemble + model
|
||||||
|
//
|
||||||
|
// Add the model to the ensemble
|
||||||
|
add_model(std::move(model), 1.0);
|
||||||
|
// Compute the prediction
|
||||||
|
ypred = predict(X_train_);
|
||||||
|
// Remove the model from the ensemble
|
||||||
|
significanceModels.pop_back();
|
||||||
|
remove_last_model();
|
||||||
|
} else {
|
||||||
|
ypred = model->predict(X_train_);
|
||||||
|
}
|
||||||
|
// Step 3.1: Compute the classifier amout of say
|
||||||
|
auto ypred_t = torch::tensor(ypred);
|
||||||
|
std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_);
|
||||||
|
// Step 3.4: Store classifier and its accuracy to weigh its future vote
|
||||||
|
numItemsPack++;
|
||||||
|
featuresUsed.push_back(feature);
|
||||||
|
add_model(std::move(model), alpha_t);
|
||||||
|
// VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size());
|
||||||
|
} // End of the pack
|
||||||
|
if (convergence && !finished) {
|
||||||
|
auto y_val_predict = predict(X_test);
|
||||||
|
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
|
||||||
|
if (priorAccuracy == 0) {
|
||||||
|
priorAccuracy = accuracy;
|
||||||
|
} else {
|
||||||
|
improvement = accuracy - priorAccuracy;
|
||||||
|
}
|
||||||
|
if (improvement < convergence_threshold) {
|
||||||
|
// VLOG_SCOPE_F(3, " (improvement<threshold) tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||||
|
tolerance++;
|
||||||
|
} else {
|
||||||
|
// VLOG_SCOPE_F(3, "* (improvement>=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy);
|
||||||
|
tolerance = 0; // Reset the counter if the model performs better
|
||||||
|
numItemsPack = 0;
|
||||||
|
}
|
||||||
|
if (convergence_best) {
|
||||||
|
// Keep the best accuracy until now as the prior accuracy
|
||||||
|
priorAccuracy = std::max(accuracy, priorAccuracy);
|
||||||
|
} else {
|
||||||
|
// Keep the last accuray obtained as the prior accuracy
|
||||||
|
priorAccuracy = accuracy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size());
|
||||||
|
finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size();
|
||||||
|
}
|
||||||
|
if (tolerance > maxTolerance) {
|
||||||
|
if (numItemsPack < n_models) {
|
||||||
|
notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated");
|
||||||
|
// VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models);
|
||||||
|
for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) {
|
||||||
|
remove_last_model();
|
||||||
|
significanceModels[featuresUsed[i]] = 0.0;
|
||||||
|
}
|
||||||
|
// VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features used.", n_models, featuresUsed.size());
|
||||||
|
} else {
|
||||||
|
notes.push_back("Convergence threshold reached & 0 models eliminated");
|
||||||
|
// VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (featuresUsed.size() != features.size()) {
|
||||||
|
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
|
||||||
|
status = bayesnet::WARNING;
|
||||||
|
}
|
||||||
|
notes.push_back("Number of models: " + std::to_string(n_models));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
36
bayesnet/ensembles/XBAODE.h
Normal file
36
bayesnet/ensembles/XBAODE.h
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#ifndef XBAODE_H
|
||||||
|
#define XBAODE_H
|
||||||
|
#include <vector>
|
||||||
|
#include <cmath>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
|
#include "bayesnet/classifiers/XSPODE.h"
|
||||||
|
#include "Boost.h"
|
||||||
|
|
||||||
|
namespace bayesnet {
|
||||||
|
class XBAODE : public Boost {
|
||||||
|
|
||||||
|
// Hay que hacer un vector de modelos entrenados y hacer un predict ensemble con todos ellos
|
||||||
|
// Probar XA1DE con smooth original y laplace y comprobar diferencias si se pasan pesos a 1 o a 1/m
|
||||||
|
public:
|
||||||
|
XBAODE();
|
||||||
|
std::string getVersion() override { return version; };
|
||||||
|
protected:
|
||||||
|
void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override;
|
||||||
|
private:
|
||||||
|
void add_model(std::unique_ptr<Classifier> model, double significance);
|
||||||
|
void remove_last_model();
|
||||||
|
std::vector<int> initializeModels(const Smoothing_t smoothing);
|
||||||
|
std::vector<std::vector<int>> X_train_, X_test_;
|
||||||
|
std::vector<int> y_train_, y_test_;
|
||||||
|
std::string version = "0.9.7";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif // XBAODE_H
|
@@ -32,6 +32,14 @@ public:
|
|||||||
cv_.notify_one();
|
cv_.notify_one();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
uint getCount() const
|
||||||
|
{
|
||||||
|
return count_;
|
||||||
|
}
|
||||||
|
uint getMaxCount() const
|
||||||
|
{
|
||||||
|
return max_count_;
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
CountingSemaphore()
|
CountingSemaphore()
|
||||||
: max_count_(std::max(1u, static_cast<uint>(0.95 * std::thread::hardware_concurrency()))),
|
: max_count_(std::max(1u, static_cast<uint>(0.95 * std::thread::hardware_concurrency()))),
|
||||||
|
51
bayesnet/utils/TensorUtils.hpp
Normal file
51
bayesnet/utils/TensorUtils.hpp
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
#ifndef TENSORUTILS_HPP
|
||||||
|
#define TENSORUTILS_HPP
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include <vector>
|
||||||
|
namespace bayesnet {
|
||||||
|
class TensorUtils {
|
||||||
|
public:
|
||||||
|
static std::vector<std::vector<int>> to_matrix(const torch::Tensor& X)
|
||||||
|
{
|
||||||
|
// Ensure tensor is contiguous in memory
|
||||||
|
auto X_contig = X.contiguous();
|
||||||
|
|
||||||
|
// Access tensor data pointer directly
|
||||||
|
auto data_ptr = X_contig.data_ptr<int>();
|
||||||
|
|
||||||
|
// IF you are using int64_t as the data type, use the following line
|
||||||
|
//auto data_ptr = X_contig.data_ptr<int64_t>();
|
||||||
|
//std::vector<std::vector<int64_t>> data(X.size(0), std::vector<int64_t>(X.size(1)));
|
||||||
|
|
||||||
|
// Prepare output container
|
||||||
|
std::vector<std::vector<int>> data(X.size(0), std::vector<int>(X.size(1)));
|
||||||
|
|
||||||
|
// Fill the 2D vector in a single loop using pointer arithmetic
|
||||||
|
int rows = X.size(0);
|
||||||
|
int cols = X.size(1);
|
||||||
|
for (int i = 0; i < rows; ++i) {
|
||||||
|
std::copy(data_ptr + i * cols, data_ptr + (i + 1) * cols, data[i].begin());
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
|
static std::vector<T> to_vector(const torch::Tensor& y)
|
||||||
|
{
|
||||||
|
// Ensure the tensor is contiguous in memory
|
||||||
|
auto y_contig = y.contiguous();
|
||||||
|
|
||||||
|
// Access data pointer
|
||||||
|
auto data_ptr = y_contig.data_ptr<T>();
|
||||||
|
|
||||||
|
// Prepare output container
|
||||||
|
std::vector<T> data(y.size(0));
|
||||||
|
|
||||||
|
// Copy data efficiently
|
||||||
|
std::copy(data_ptr, data_ptr + y.size(0), data.begin());
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // TENSORUTILS_HPP
|
Reference in New Issue
Block a user