diff --git a/bayesnet/classifiers/XSPODE.cc b/bayesnet/classifiers/XSPODE.cc new file mode 100644 index 0000000..91711d2 --- /dev/null +++ b/bayesnet/classifiers/XSPODE.cc @@ -0,0 +1,379 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#include "XSPODE.h" + + +namespace bayesnet { + + // -------------------------------------- + // Constructor + // -------------------------------------- + XSpode::XSpode(int spIndex) + : superParent_{ spIndex }, + nFeatures_{ 0 }, + statesClass_{ 0 }, + alpha_{ 1.0 }, + initializer_{ 1.0 }, + semaphore_{ CountingSemaphore::getInstance() }, Classifier(Network()) + { + } + + void XSpode::fit(std::vector>& X, std::vector& y, torch::Tensor& weights_, const Smoothing_t smoothing) + { + m = X[0].size(); + n = X.size(); + buildModel(weights_); + trainModel(weights_, smoothing); + } + + // -------------------------------------- + // trainModel + // -------------------------------------- + // Initialize storage needed for the super-parent and child features counts and probs. + // -------------------------------------- + void XSpode::buildModel(const torch::Tensor& weights) + { + int numInstances = m; + nFeatures_ = n; + + // Derive the number of states for each feature and for the class. + // (This is just one approach; adapt to match your environment.) + // Here, we assume the user also gave us the total #states per feature in e.g. statesMap. + // We'll simply reconstruct the integer states_ array. The last entry is statesClass_. + states_.resize(nFeatures_); + for (int f = 0; f < nFeatures_; f++) { + // Suppose you look up in “statesMap” by the feature name, or read directly from X. + // We'll assume states_[f] = max value in X[f] + 1. + states_[f] = dataset[f].max().item() + 1; + } + // For the class: states_.back() = max(y)+1 + statesClass_ = dataset[-1].max().item() + 1; + + // Initialize counts + classCounts_.resize(statesClass_, 0.0); + // p(x_sp = spVal | c) + // We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c]. + spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0); + + // For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of childCounts_. + // childCounts_ will be sized as sum_{child≠sp} (states_[child] * statesClass_ * states_[sp]). + // We also need an offset for each child to index into childCounts_. + childOffsets_.resize(nFeatures_, -1); + int totalSize = 0; + for (int f = 0; f < nFeatures_; f++) { + if (f == superParent_) continue; // skip sp + childOffsets_[f] = totalSize; + // block size for this child's counts: states_[f] * statesClass_ * states_[superParent_] + totalSize += (states_[f] * statesClass_ * states_[superParent_]); + } + childCounts_.resize(totalSize, 0.0); + } + // -------------------------------------- + // buildModel + // -------------------------------------- + // + // We only store conditional probabilities for: + // p(x_sp| c) (the super-parent feature) + // p(x_child| c, x_sp) for all child ≠ sp + // + // -------------------------------------- + void XSpode::trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) + { + // Accumulate raw counts + for (int i = 0; i < m; i++) { + std::vector instance(nFeatures_ + 1); + for (int f = 0; f < nFeatures_; f++) { + instance[f] = dataset[f][i].item(); + } + instance[nFeatures_] = dataset[-1].item(); + addSample(instance, weights[i].item()); + } + + switch (smoothing) { + case bayesnet::Smoothing_t::ORIGINAL: + alpha_ = 1.0 / m; + break; + case bayesnet::Smoothing_t::LAPLACE: + alpha_ = 1.0; + break; + default: + alpha_ = 0.0; // No smoothing + } + initializer_ = std::numeric_limits::max() / (nFeatures_ * nFeatures_); // for numerical stability + // Convert raw counts to probabilities + computeProbabilities(); + } + + // -------------------------------------- + // addSample + // -------------------------------------- + // + // instance has size nFeatures_ + 1, with the class at the end. + // We add 1 to the appropriate counters for each (c, superParentVal, childVal). + // + void XSpode::addSample(const std::vector& instance, double weight) + { + if (weight <= 0.0) return; + + int c = instance.back(); + // (A) increment classCounts + classCounts_[c] += weight; + + // (B) increment super-parent counts => p(x_sp | c) + int spVal = instance[superParent_]; + spFeatureCounts_[spVal * statesClass_ + c] += weight; + + // (C) increment child counts => p(childVal | c, x_sp) + for (int f = 0; f < nFeatures_; f++) { + if (f == superParent_) continue; + int childVal = instance[f]; + int offset = childOffsets_[f]; + // Compute index in childCounts_. + // Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ] + int blockSize = states_[f] * statesClass_; + int idx = offset + spVal * blockSize + childVal * statesClass_ + c; + childCounts_[idx] += weight; + } + } + + // -------------------------------------- + // computeProbabilities + // -------------------------------------- + // + // Once all samples are added in COUNTS mode, call this to: + // p(c) + // p(x_sp = spVal | c) + // p(x_child = v | c, x_sp = s_sp) + // + // -------------------------------------- + void XSpode::computeProbabilities() + { + double totalCount = std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0); + + // p(c) => classPriors_ + classPriors_.resize(statesClass_, 0.0); + if (totalCount <= 0.0) { + // fallback => uniform + double unif = 1.0 / static_cast(statesClass_); + for (int c = 0; c < statesClass_; c++) { + classPriors_[c] = unif; + } + } else { + for (int c = 0; c < statesClass_; c++) { + classPriors_[c] = (classCounts_[c] + alpha_) + / (totalCount + alpha_ * statesClass_); + } + } + + // p(x_sp | c) + spFeatureProbs_.resize(spFeatureCounts_.size()); + // denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ * (#states of sp) + int spCard = states_[superParent_]; + for (int spVal = 0; spVal < spCard; spVal++) { + for (int c = 0; c < statesClass_; c++) { + double denom = classCounts_[c] + alpha_ * spCard; + double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_; + spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom); + } + } + + // p(x_child | c, x_sp) + childProbs_.resize(childCounts_.size()); + for (int f = 0; f < nFeatures_; f++) { + if (f == superParent_) continue; + int offset = childOffsets_[f]; + int childCard = states_[f]; + + // For each spVal, c, childVal in childCounts_: + for (int spVal = 0; spVal < spCard; spVal++) { + for (int childVal = 0; childVal < childCard; childVal++) { + for (int c = 0; c < statesClass_; c++) { + int idx = offset + spVal * (childCard * statesClass_) + + childVal * statesClass_ + + c; + + double num = childCounts_[idx] + alpha_; + // denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * (#states of child) + double denom = spFeatureCounts_[spVal * statesClass_ + c] + + alpha_ * childCard; + childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom); + } + } + } + } + + } + + // -------------------------------------- + // predict_proba + // -------------------------------------- + // + // For a single instance x of dimension nFeatures_: + // P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp). + // + // -------------------------------------- + std::vector XSpode::predict_proba(const std::vector& instance) const + { + std::vector probs(statesClass_, 0.0); + + // Multiply p(c) × p(x_sp | c) + int spVal = instance[superParent_]; + for (int c = 0; c < statesClass_; c++) { + double pc = classPriors_[c]; + double pSpC = spFeatureProbs_[spVal * statesClass_ + c]; + probs[c] = pc * pSpC * initializer_; + } + + // Multiply by each child’s probability p(x_child | c, x_sp) + for (int feature = 0; feature < nFeatures_; feature++) { + if (feature == superParent_) continue; // skip sp + int sf = instance[feature]; + int offset = childOffsets_[feature]; + int childCard = states_[feature]; // not used directly, but for clarity + // Index into childProbs_ = offset + spVal*(childCard*statesClass_) + childVal*statesClass_ + c + int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_; + for (int c = 0; c < statesClass_; c++) { + probs[c] *= childProbs_[base + c]; + } + } + + // Normalize + normalize(probs); + return probs; + } + std::vector> XSpode::predict_proba(const std::vector>& test_data) + { + int test_size = test_data[0].size(); + int sample_size = test_data.size(); + auto probabilities = std::vector>(test_size, std::vector(statesClass_)); + + int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1); + std::vector threads; + auto worker = [&](const std::vector>& samples, int begin, int chunk, int sample_size, std::vector>& predictions) { + std::string threadName = "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk); +#if defined(__linux__) + pthread_setname_np(pthread_self(), threadName.c_str()); +#else + pthread_setname_np(threadName.c_str()); +#endif + + std::vector instance(sample_size); + for (int sample = begin; sample < begin + chunk; ++sample) { + for (int feature = 0; feature < sample_size; ++feature) { + instance[feature] = samples[feature][sample]; + } + predictions[sample] = predict_proba(instance); + } + semaphore_.release(); + }; + for (int begin = 0; begin < test_size; begin += chunk_size) { + int chunk = std::min(chunk_size, test_size - begin); + semaphore_.acquire(); + threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities)); + } + for (auto& thread : threads) { + thread.join(); + } + return probabilities; + } + + // -------------------------------------- + // predict + // -------------------------------------- + // + // Return the class argmax( P(c|x) ). + // -------------------------------------- + int XSpode::predict(const std::vector& instance) const + { + auto p = predict_proba(instance); + return static_cast(std::distance(p.begin(), + std::max_element(p.begin(), p.end()))); + } + std::vector XSpode::predict(std::vector>& test_data) + { + if (!fitted) { + throw std::logic_error(CLASSIFIER_NOT_FITTED); + } + auto probabilities = predict_proba(test_data); + std::vector predictions(probabilities.size(), 0); + + for (size_t i = 0; i < probabilities.size(); i++) { + predictions[i] = std::distance(probabilities[i].begin(), std::max_element(probabilities[i].begin(), probabilities[i].end())); + } + + return predictions; + } + + // -------------------------------------- + // Utility: normalize + // -------------------------------------- + void XSpode::normalize(std::vector& v) const + { + double sum = 0.0; + for (auto val : v) { sum += val; } + if (sum <= 0.0) { + return; + } + for (auto& val : v) { + val /= sum; + } + } + + // -------------------------------------- + // representation of the model + // -------------------------------------- + std::string XSpode::to_string() const + { + std::ostringstream oss; + oss << "---- SPODE Model ----" << std::endl + << "nFeatures_ = " << nFeatures_ << std::endl + << "superParent_ = " << superParent_ << std::endl + << "statesClass_ = " << statesClass_ << std::endl + << std::endl; + + oss << "States: ["; + for (int s : states_) oss << s << " "; + oss << "]" << std::endl; + oss << "classCounts_: ["; + for (double c : classCounts_) oss << c << " "; + oss << "]" << std::endl; + oss << "classPriors_: ["; + for (double c : classPriors_) oss << c << " "; + oss << "]" << std::endl; + oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << std::endl << "["; + for (double c : spFeatureCounts_) oss << c << " "; + oss << "]" << std::endl; + oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << std::endl << "["; + for (double c : spFeatureProbs_) oss << c << " "; + oss << "]" << std::endl; + oss << "childCounts_: size = " << childCounts_.size() << std::endl << "["; + for (double cc : childCounts_) oss << cc << " "; + oss << "]" << std::endl; + + for (double cp : childProbs_) oss << cp << " "; + oss << "]" << std::endl; + oss << "childOffsets_: ["; + for (int co : childOffsets_) oss << co << " "; + oss << "]" << std::endl; + oss << "---------------------" << std::endl; + return oss.str(); + } + int XSpode::getNumberOfNodes() const { return nFeatures_ + 1; } + int XSpode::getClassNumStates() const { return statesClass_; } + int XSpode::getNFeatures() const { return nFeatures_; } + int XSpode::getNumberOfStates() const + { + return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_; + } + int XSpode::getNumberOfEdges() const + { + return nFeatures_ * (2 * nFeatures_ - 1); + } + std::vector& XSpode::getStates() { return states_; } + +} + diff --git a/bayesnet/classifiers/XSPODE.h b/bayesnet/classifiers/XSPODE.h new file mode 100644 index 0000000..41301ad --- /dev/null +++ b/bayesnet/classifiers/XSPODE.h @@ -0,0 +1,79 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#ifndef XSPODE_H +#define XSPODE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "Classifier.h" +#include "bayesnet/utils/CountingSemaphore.h" + +namespace bayesnet { + + class XSpode : public Classifier { + public: + explicit XSpode(int spIndex); + std::vector predict_proba(const std::vector& instance) const; + std::vector> predict_proba(const std::vector>& test_data); + int predict(const std::vector& instance) const; + std::vector predict(std::vector>& test_data); + void normalize(std::vector& v) const; + std::string to_string() const; + int statesClass() const; + int getNFeatures() const; + int getNumberOfNodes() const override; + int getNumberOfEdges() const override; + int getNumberOfStates() const override; + int getClassNumStates() const override; + std::vector& getStates(); + std::vector graph(const std::string& title) const override { return std::vector({title}); } + void fit(std::vector>& X, std::vector& y, torch::Tensor& weights_, const Smoothing_t smoothing); + protected: + void buildModel(const torch::Tensor& weights) override; + void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override; + private: + void addSample(const std::vector& instance, double weight); + void computeProbabilities(); + int superParent_; + int nFeatures_; + int statesClass_; + std::vector states_; // [states_feat0, ..., states_feat(N-1)] (class not included in this array) + + const std::string CLASSIFIER_NOT_FITTED = "Classifier has not been fitted"; + + // Class counts + std::vector classCounts_; // [c], accumulative + std::vector classPriors_; // [c], after normalization + + // For p(x_sp = spVal | c) + std::vector spFeatureCounts_; // [spVal * statesClass_ + c] + std::vector spFeatureProbs_; // same shape, after normalization + + // For p(x_child = childVal | x_sp = spVal, c) + // childCounts_ is big enough to hold all child features except sp: + // For each child f, we store childOffsets_[f] as the start index, then + // childVal, spVal, c => the data. + std::vector childCounts_; + std::vector childProbs_; + std::vector childOffsets_; + + double alpha_ = 1.0; + double initializer_; // for numerical stability + CountingSemaphore& semaphore_; + }; +} + +#endif // XSPODE_H diff --git a/bayesnet/ensembles/Ensemble.cc b/bayesnet/ensembles/Ensemble.cc index 4b71a16..0b977ff 100644 --- a/bayesnet/ensembles/Ensemble.cc +++ b/bayesnet/ensembles/Ensemble.cc @@ -4,7 +4,6 @@ // SPDX-License-Identifier: MIT // *************************************************************** #include "Ensemble.h" -#include "bayesnet/utils/CountingSemaphore.h" namespace bayesnet { diff --git a/bayesnet/ensembles/XBAODE.cc b/bayesnet/ensembles/XBAODE.cc new file mode 100644 index 0000000..bc8f657 --- /dev/null +++ b/bayesnet/ensembles/XBAODE.cc @@ -0,0 +1,179 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** +#include +#include +#include +#include +#include +#include "XBAODE.h" +#include "bayesnet/classifiers/XSPODE.h" +#include "bayesnet/utils/TensorUtils.hpp" + +namespace bayesnet { + XBAODE::XBAODE() + { + validHyperparameters = { "alpha_block", "order", "convergence", "convergence_best", "bisection", "threshold", "maxTolerance", + "predict_voting", "select_features" }; + } + void XBAODE::add_model(std::unique_ptr model, double significance) + { + models.push_back(std::move(model)); + n_models++; + significanceModels.push_back(significance); + } + void XBAODE::remove_last_model() + { + models.pop_back(); + significanceModels.pop_back(); + n_models--; + } + std::vector XBAODE::initializeModels(const Smoothing_t smoothing) + { + torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); + std::vector featuresSelected = featureSelection(weights_); + for (const int& feature : featuresSelected) { + std::unique_ptr model = std::make_unique(feature); + model->fit(dataset, features, className, states, weights_, smoothing); + add_model(std::move(model), 1.0); + } + notes.push_back("Used features in initialization: " + std::to_string(featuresSelected.size()) + " of " + std::to_string(features.size()) + " with " + select_features_algorithm); + return featuresSelected; + } + void XBAODE::trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) + { + X_train_ = TensorUtils::to_matrix(X_train); + y_train_ = TensorUtils::to_vector(y_train); + X_test_ = TensorUtils::to_matrix(X_test); + y_test_ = TensorUtils::to_vector(y_test); + significanceModels.resize(n, 0.0); // n initialized in Classifier.cc + fitted = true; + double alpha_t; + torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64); + bool finished = false; + std::vector featuresUsed; + n_models = 0; + if (selectFeatures) { + featuresUsed = initializeModels(smoothing); + auto ypred = predict(X_train_); + auto ypred_t = torch::tensor(ypred); + std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_); + // Update significance of the models + for (const int& feature : featuresUsed) { + significanceModels.pop_back(); + } + for (const int& feature : featuresUsed) { + significanceModels.push_back(alpha_t); + } + // VLOG_SCOPE_F(1, "SelectFeatures. alpha_t: %f n_models: %d", alpha_t, n_models); + if (finished) { + return; + } + } + int numItemsPack = 0; // The counter of the models inserted in the current pack + // Variables to control the accuracy finish condition + double priorAccuracy = 0.0; + double improvement = 1.0; + double convergence_threshold = 1e-4; + int tolerance = 0; // number of times the accuracy is lower than the convergence_threshold + // Step 0: Set the finish condition + // epsilon sub t > 0.5 => inverse the weights_ policy + // validation error is not decreasing + // run out of features + bool ascending = order_algorithm == bayesnet::Orders.ASC; + std::mt19937 g{ 173 }; + while (!finished) { + // Step 1: Build ranking with mutual information + auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted + if (order_algorithm == bayesnet::Orders.RAND) { + std::shuffle(featureSelection.begin(), featureSelection.end(), g); + } + // Remove used features + featureSelection.erase(remove_if(featureSelection.begin(), featureSelection.end(), [&](auto x) + { return std::find(featuresUsed.begin(), featuresUsed.end(), x) != featuresUsed.end();}), + featureSelection.end() + ); + int k = bisection ? pow(2, tolerance) : 1; + int counter = 0; // The model counter of the current pack + // VLOG_SCOPE_F(1, "counter=%d k=%d featureSelection.size: %zu", counter, k, featureSelection.size()); + while (counter++ < k && featureSelection.size() > 0) { + auto feature = featureSelection[0]; + featureSelection.erase(featureSelection.begin()); + std::unique_ptr model; + model = std::make_unique(feature); + dynamic_cast(model.get())->fit(X_train_, y_train_, weights_, smoothing); // using exclusive XSpode fit method + std::vector ypred; + if (alpha_block) { + // + // Compute the prediction with the current ensemble + model + // + // Add the model to the ensemble + add_model(std::move(model), 1.0); + // Compute the prediction + ypred = predict(X_train_); + // Remove the model from the ensemble + significanceModels.pop_back(); + remove_last_model(); + } else { + ypred = model->predict(X_train_); + } + // Step 3.1: Compute the classifier amout of say + auto ypred_t = torch::tensor(ypred); + std::tie(weights_, alpha_t, finished) = update_weights(y_train, ypred_t, weights_); + // Step 3.4: Store classifier and its accuracy to weigh its future vote + numItemsPack++; + featuresUsed.push_back(feature); + add_model(std::move(model), alpha_t); + // VLOG_SCOPE_F(2, "finished: %d numItemsPack: %d n_models: %d featuresUsed: %zu", finished, numItemsPack, n_models, featuresUsed.size()); + } // End of the pack + if (convergence && !finished) { + auto y_val_predict = predict(X_test); + double accuracy = (y_val_predict == y_test).sum().item() / (double)y_test.size(0); + if (priorAccuracy == 0) { + priorAccuracy = accuracy; + } else { + improvement = accuracy - priorAccuracy; + } + if (improvement < convergence_threshold) { + // VLOG_SCOPE_F(3, " (improvement=threshold) Reset. tolerance: %d numItemsPack: %d improvement: %f prior: %f current: %f", tolerance, numItemsPack, improvement, priorAccuracy, accuracy); + tolerance = 0; // Reset the counter if the model performs better + numItemsPack = 0; + } + if (convergence_best) { + // Keep the best accuracy until now as the prior accuracy + priorAccuracy = std::max(accuracy, priorAccuracy); + } else { + // Keep the last accuray obtained as the prior accuracy + priorAccuracy = accuracy; + } + } + // VLOG_SCOPE_F(1, "tolerance: %d featuresUsed.size: %zu features.size: %zu", tolerance, featuresUsed.size(), features.size()); + finished = finished || tolerance > maxTolerance || featuresUsed.size() == features.size(); + } + if (tolerance > maxTolerance) { + if (numItemsPack < n_models) { + notes.push_back("Convergence threshold reached & " + std::to_string(numItemsPack) + " models eliminated"); + // VLOG_SCOPE_F(4, "Convergence threshold reached & %d models eliminated of %d", numItemsPack, n_models); + for (int i = featuresUsed.size() - 1; i >= featuresUsed.size() - numItemsPack; --i) { + remove_last_model(); + significanceModels[featuresUsed[i]] = 0.0; + } + // VLOG_SCOPE_F(4, "*Convergence threshold %d models left & %d features used.", n_models, featuresUsed.size()); + } else { + notes.push_back("Convergence threshold reached & 0 models eliminated"); + // VLOG_SCOPE_F(4, "Convergence threshold reached & 0 models eliminated n_models=%d numItemsPack=%d", n_models, numItemsPack); + } + } + if (featuresUsed.size() != features.size()) { + notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size())); + status = bayesnet::WARNING; + } + notes.push_back("Number of models: " + std::to_string(n_models)); + return; + } +} \ No newline at end of file diff --git a/bayesnet/ensembles/XBAODE.h b/bayesnet/ensembles/XBAODE.h new file mode 100644 index 0000000..04dd8c1 --- /dev/null +++ b/bayesnet/ensembles/XBAODE.h @@ -0,0 +1,36 @@ +// *************************************************************** +// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez +// SPDX-FileType: SOURCE +// SPDX-License-Identifier: MIT +// *************************************************************** + +#ifndef XBAODE_H +#define XBAODE_H +#include +#include +#include + +#include +#include "bayesnet/classifiers/XSPODE.h" +#include "Boost.h" + +namespace bayesnet { + class XBAODE : public Boost { + + // Hay que hacer un vector de modelos entrenados y hacer un predict ensemble con todos ellos + // Probar XA1DE con smooth original y laplace y comprobar diferencias si se pasan pesos a 1 o a 1/m + public: + XBAODE(); + std::string getVersion() override { return version; }; + protected: + void trainModel(const torch::Tensor& weights, const bayesnet::Smoothing_t smoothing) override; + private: + void add_model(std::unique_ptr model, double significance); + void remove_last_model(); + std::vector initializeModels(const Smoothing_t smoothing); + std::vector> X_train_, X_test_; + std::vector y_train_, y_test_; + std::string version = "0.9.7"; + }; +} +#endif // XBAODE_H \ No newline at end of file diff --git a/bayesnet/utils/CountingSemaphore.h b/bayesnet/utils/CountingSemaphore.h index 25d1ac7..d7afc69 100644 --- a/bayesnet/utils/CountingSemaphore.h +++ b/bayesnet/utils/CountingSemaphore.h @@ -32,6 +32,14 @@ public: cv_.notify_one(); } } + uint getCount() const + { + return count_; + } + uint getMaxCount() const + { + return max_count_; + } private: CountingSemaphore() : max_count_(std::max(1u, static_cast(0.95 * std::thread::hardware_concurrency()))), diff --git a/bayesnet/utils/TensorUtils.hpp b/bayesnet/utils/TensorUtils.hpp new file mode 100644 index 0000000..dffd879 --- /dev/null +++ b/bayesnet/utils/TensorUtils.hpp @@ -0,0 +1,51 @@ +#ifndef TENSORUTILS_HPP +#define TENSORUTILS_HPP +#include +#include +namespace bayesnet { + class TensorUtils { + public: + static std::vector> to_matrix(const torch::Tensor& X) + { + // Ensure tensor is contiguous in memory + auto X_contig = X.contiguous(); + + // Access tensor data pointer directly + auto data_ptr = X_contig.data_ptr(); + + // IF you are using int64_t as the data type, use the following line + //auto data_ptr = X_contig.data_ptr(); + //std::vector> data(X.size(0), std::vector(X.size(1))); + + // Prepare output container + std::vector> data(X.size(0), std::vector(X.size(1))); + + // Fill the 2D vector in a single loop using pointer arithmetic + int rows = X.size(0); + int cols = X.size(1); + for (int i = 0; i < rows; ++i) { + std::copy(data_ptr + i * cols, data_ptr + (i + 1) * cols, data[i].begin()); + } + return data; + } + template + static std::vector to_vector(const torch::Tensor& y) + { + // Ensure the tensor is contiguous in memory + auto y_contig = y.contiguous(); + + // Access data pointer + auto data_ptr = y_contig.data_ptr(); + + // Prepare output container + std::vector data(y.size(0)); + + // Copy data efficiently + std::copy(data_ptr, data_ptr + y.size(0), data.begin()); + + return data; + } + }; +} + +#endif // TENSORUTILS_HPP \ No newline at end of file