commit inicial
This commit is contained in:
260
bayesnet/utils/BayesMetrics.cc
Normal file
260
bayesnet/utils/BayesMetrics.cc
Normal file
@@ -0,0 +1,260 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <tuple>
|
||||
#include "Mst.h"
|
||||
#include "BayesMetrics.h"
|
||||
namespace bayesnet {
|
||||
//samples is n+1xm tensor used to fit the model
|
||||
Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
|
||||
: samples(samples)
|
||||
, className(className)
|
||||
, features(features)
|
||||
, classNumStates(classNumStates)
|
||||
{
|
||||
}
|
||||
//samples is n+1xm std::vector used to fit the model
|
||||
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
|
||||
: samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
|
||||
, className(className)
|
||||
, features(features)
|
||||
, classNumStates(classNumStates)
|
||||
{
|
||||
for (int i = 0; i < vsamples.size(); ++i) {
|
||||
samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32));
|
||||
}
|
||||
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
|
||||
}
|
||||
std::vector<std::pair<int, int>> Metrics::SelectKPairs(const torch::Tensor& weights, std::vector<int>& featuresExcluded, bool ascending, unsigned k)
|
||||
{
|
||||
// Return the K Best features
|
||||
auto n = features.size();
|
||||
// compute scores
|
||||
scoresKPairs.clear();
|
||||
pairsKBest.clear();
|
||||
auto labels = samples.index({ -1, "..." });
|
||||
for (int i = 0; i < n - 1; ++i) {
|
||||
if (std::find(featuresExcluded.begin(), featuresExcluded.end(), i) != featuresExcluded.end()) {
|
||||
continue;
|
||||
}
|
||||
for (int j = i + 1; j < n; ++j) {
|
||||
if (std::find(featuresExcluded.begin(), featuresExcluded.end(), j) != featuresExcluded.end()) {
|
||||
continue;
|
||||
}
|
||||
auto key = std::make_pair(i, j);
|
||||
auto value = conditionalMutualInformation(samples.index({ i, "..." }), samples.index({ j, "..." }), labels, weights);
|
||||
scoresKPairs.push_back({ key, value });
|
||||
}
|
||||
}
|
||||
// sort scores
|
||||
if (ascending) {
|
||||
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
|
||||
{ return a.second < b.second; });
|
||||
|
||||
} else {
|
||||
sort(scoresKPairs.begin(), scoresKPairs.end(), [](auto& a, auto& b)
|
||||
{ return a.second > b.second; });
|
||||
}
|
||||
for (auto& [pairs, score] : scoresKPairs) {
|
||||
pairsKBest.push_back(pairs);
|
||||
}
|
||||
if (k != 0 && k < pairsKBest.size()) {
|
||||
if (ascending) {
|
||||
int limit = pairsKBest.size() - k;
|
||||
for (int i = 0; i < limit; i++) {
|
||||
pairsKBest.erase(pairsKBest.begin());
|
||||
scoresKPairs.erase(scoresKPairs.begin());
|
||||
}
|
||||
} else {
|
||||
pairsKBest.resize(k);
|
||||
scoresKPairs.resize(k);
|
||||
}
|
||||
}
|
||||
return pairsKBest;
|
||||
}
|
||||
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
|
||||
{
|
||||
// Return the K Best features
|
||||
auto n = features.size();
|
||||
if (k == 0) {
|
||||
k = n;
|
||||
}
|
||||
// compute scores
|
||||
scoresKBest.clear();
|
||||
featuresKBest.clear();
|
||||
auto label = samples.index({ -1, "..." });
|
||||
for (int i = 0; i < n; ++i) {
|
||||
scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
|
||||
featuresKBest.push_back(i);
|
||||
}
|
||||
// sort & reduce scores and features
|
||||
if (ascending) {
|
||||
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
|
||||
{ return scoresKBest[i] < scoresKBest[j]; });
|
||||
sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
|
||||
if (k < n) {
|
||||
for (int i = 0; i < n - k; ++i) {
|
||||
featuresKBest.erase(featuresKBest.begin());
|
||||
scoresKBest.erase(scoresKBest.begin());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
|
||||
{ return scoresKBest[i] > scoresKBest[j]; });
|
||||
sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
|
||||
featuresKBest.resize(k);
|
||||
scoresKBest.resize(k);
|
||||
}
|
||||
return featuresKBest;
|
||||
}
|
||||
std::vector<double> Metrics::getScoresKBest() const
|
||||
{
|
||||
return scoresKBest;
|
||||
}
|
||||
std::vector<std::pair<std::pair<int, int>, double>> Metrics::getScoresKPairs() const
|
||||
{
|
||||
return scoresKPairs;
|
||||
}
|
||||
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
|
||||
{
|
||||
auto result = std::vector<double>();
|
||||
auto source = std::vector<std::string>(features);
|
||||
source.push_back(className);
|
||||
auto combinations = doCombinations(source);
|
||||
// Compute class prior
|
||||
auto margin = torch::zeros({ classNumStates }, torch::kFloat);
|
||||
for (int value = 0; value < classNumStates; ++value) {
|
||||
auto mask = samples.index({ -1, "..." }) == value;
|
||||
margin[value] = mask.sum().item<double>() / samples.size(1);
|
||||
}
|
||||
for (auto [first, second] : combinations) {
|
||||
int index_first = find(features.begin(), features.end(), first) - features.begin();
|
||||
int index_second = find(features.begin(), features.end(), second) - features.begin();
|
||||
double accumulated = 0;
|
||||
for (int value = 0; value < classNumStates; ++value) {
|
||||
auto mask = samples.index({ -1, "..." }) == value;
|
||||
auto first_dataset = samples.index({ index_first, mask });
|
||||
auto second_dataset = samples.index({ index_second, mask });
|
||||
auto weights_dataset = weights.index({ mask });
|
||||
auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
|
||||
auto pb = margin[value].item<double>();
|
||||
accumulated += pb * mi;
|
||||
}
|
||||
result.push_back(accumulated);
|
||||
}
|
||||
long n_vars = source.size();
|
||||
auto matrix = torch::zeros({ n_vars, n_vars });
|
||||
auto indices = torch::triu_indices(n_vars, n_vars, 1);
|
||||
for (auto i = 0; i < result.size(); ++i) {
|
||||
auto x = indices[0][i];
|
||||
auto y = indices[1][i];
|
||||
matrix[x][y] = result[i];
|
||||
matrix[y][x] = result[i];
|
||||
}
|
||||
return matrix;
|
||||
}
|
||||
// Measured in nats (natural logarithm (log) base e)
|
||||
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
|
||||
double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
|
||||
{
|
||||
torch::Tensor counts = feature.bincount(weights);
|
||||
double totalWeight = counts.sum().item<double>();
|
||||
torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
|
||||
torch::Tensor logProbs = torch::log(probs);
|
||||
torch::Tensor entropy = -probs * logProbs;
|
||||
return entropy.nansum().item<double>();
|
||||
}
|
||||
// H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
|
||||
double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
|
||||
{
|
||||
int numSamples = firstFeature.sizes()[0];
|
||||
torch::Tensor featureCounts = secondFeature.bincount(weights);
|
||||
std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
|
||||
double totalWeight = 0;
|
||||
for (auto i = 0; i < numSamples; i++) {
|
||||
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
|
||||
totalWeight += weights[i].item<float>();
|
||||
}
|
||||
if (totalWeight == 0)
|
||||
return 0;
|
||||
double entropyValue = 0;
|
||||
for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
|
||||
double p_f = featureCounts[value].item<double>() / totalWeight;
|
||||
double entropy_f = 0;
|
||||
for (auto& [label, jointCount] : jointCounts[value]) {
|
||||
double p_l_f = jointCount / featureCounts[value].item<double>();
|
||||
if (p_l_f > 0) {
|
||||
entropy_f -= p_l_f * log(p_l_f);
|
||||
} else {
|
||||
entropy_f = 0;
|
||||
}
|
||||
}
|
||||
entropyValue += p_f * entropy_f;
|
||||
}
|
||||
return entropyValue;
|
||||
}
|
||||
// H(X|Y,C) = sum_{y in Y, c in C} p(x,c) H(X|Y=y,C=c)
|
||||
double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
|
||||
{
|
||||
// Ensure the tensors are of the same length
|
||||
assert(firstFeature.size(0) == secondFeature.size(0) && firstFeature.size(0) == labels.size(0) && firstFeature.size(0) == weights.size(0));
|
||||
// Convert tensors to vectors for easier processing
|
||||
auto firstFeatureData = firstFeature.accessor<int, 1>();
|
||||
auto secondFeatureData = secondFeature.accessor<int, 1>();
|
||||
auto labelsData = labels.accessor<int, 1>();
|
||||
auto weightsData = weights.accessor<double, 1>();
|
||||
int numSamples = firstFeature.size(0);
|
||||
// Maps for joint and marginal probabilities
|
||||
std::map<std::tuple<int, int, int>, double> jointCount;
|
||||
std::map<std::tuple<int, int>, double> marginalCount;
|
||||
// Compute joint and marginal counts
|
||||
for (int i = 0; i < numSamples; ++i) {
|
||||
auto keyJoint = std::make_tuple(firstFeatureData[i], labelsData[i], secondFeatureData[i]);
|
||||
auto keyMarginal = std::make_tuple(firstFeatureData[i], labelsData[i]);
|
||||
|
||||
jointCount[keyJoint] += weightsData[i];
|
||||
marginalCount[keyMarginal] += weightsData[i];
|
||||
}
|
||||
// Total weight sum
|
||||
double totalWeight = torch::sum(weights).item<double>();
|
||||
if (totalWeight == 0)
|
||||
return 0;
|
||||
// Compute the conditional entropy
|
||||
double conditionalEntropy = 0.0;
|
||||
for (const auto& [keyJoint, jointFreq] : jointCount) {
|
||||
auto [x, c, y] = keyJoint;
|
||||
auto keyMarginal = std::make_tuple(x, c);
|
||||
//double p_xc = marginalCount[keyMarginal] / totalWeight;
|
||||
double p_y_given_xc = jointFreq / marginalCount[keyMarginal];
|
||||
if (p_y_given_xc > 0) {
|
||||
conditionalEntropy -= (jointFreq / totalWeight) * std::log(p_y_given_xc);
|
||||
}
|
||||
}
|
||||
return conditionalEntropy;
|
||||
}
|
||||
// I(X;Y) = H(Y) - H(Y|X) ; I(X;Y) >= 0
|
||||
double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
|
||||
{
|
||||
return std::max(entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights), 0.0);
|
||||
}
|
||||
// I(X;Y|C) = H(X|C) - H(X|Y,C) >= 0
|
||||
double Metrics::conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights)
|
||||
{
|
||||
return std::max(conditionalEntropy(firstFeature, labels, weights) - conditionalEntropy(firstFeature, secondFeature, labels, weights), 0.0);
|
||||
}
|
||||
/*
|
||||
Compute the maximum spanning tree considering the weights as distances
|
||||
and the indices of the weights as nodes of this square matrix using
|
||||
Kruskal algorithm
|
||||
*/
|
||||
std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
|
||||
{
|
||||
auto mst = MST(features, weights, root);
|
||||
return mst.maximumSpanningTree();
|
||||
}
|
||||
}
|
62
bayesnet/utils/BayesMetrics.h
Normal file
62
bayesnet/utils/BayesMetrics.h
Normal file
@@ -0,0 +1,62 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BAYESNET_METRICS_H
|
||||
#define BAYESNET_METRICS_H
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
class Metrics {
|
||||
public:
|
||||
Metrics() = default;
|
||||
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
|
||||
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
|
||||
std::vector<std::pair<int, int>> SelectKPairs(const torch::Tensor& weights, std::vector<int>& featuresExcluded, bool ascending = false, unsigned k = 0);
|
||||
std::vector<double> getScoresKBest() const;
|
||||
std::vector<std::pair<std::pair<int, int>, double>> getScoresKPairs() const;
|
||||
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
double conditionalMutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
|
||||
torch::Tensor conditionalEdge(const torch::Tensor& weights);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
// Measured in nats (natural logarithm (log) base e)
|
||||
// Elements of Information Theory, 2nd Edition, Thomas M. Cover, Joy A. Thomas p. 14
|
||||
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& labels, const torch::Tensor& weights);
|
||||
protected:
|
||||
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
|
||||
std::string className;
|
||||
std::vector<std::string> features;
|
||||
template <class T>
|
||||
std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
|
||||
{
|
||||
std::vector<std::pair<T, T>> result;
|
||||
for (int i = 0; i < source.size() - 1; ++i) {
|
||||
T temp = source[i];
|
||||
for (int j = i + 1; j < source.size(); ++j) {
|
||||
result.push_back({ temp, source[j] });
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
template <class T>
|
||||
T pop_first(std::vector<T>& v)
|
||||
{
|
||||
T temp = v[0];
|
||||
v.erase(v.begin());
|
||||
return temp;
|
||||
}
|
||||
private:
|
||||
int classNumStates = 0;
|
||||
std::vector<double> scoresKBest;
|
||||
std::vector<int> featuresKBest; // sorted indices of the features
|
||||
std::vector<std::pair<int, int>> pairsKBest; // sorted indices of the pairs
|
||||
std::vector<std::pair<std::pair<int, int>, double>> scoresKPairs;
|
||||
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
|
||||
};
|
||||
}
|
||||
#endif
|
54
bayesnet/utils/CountingSemaphore.h
Normal file
54
bayesnet/utils/CountingSemaphore.h
Normal file
@@ -0,0 +1,54 @@
|
||||
#ifndef COUNTING_SEMAPHORE_H
|
||||
#define COUNTING_SEMAPHORE_H
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <algorithm>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <thread>
|
||||
|
||||
class CountingSemaphore {
|
||||
public:
|
||||
static CountingSemaphore& getInstance()
|
||||
{
|
||||
static CountingSemaphore instance;
|
||||
return instance;
|
||||
}
|
||||
// Delete copy constructor and assignment operator
|
||||
CountingSemaphore(const CountingSemaphore&) = delete;
|
||||
CountingSemaphore& operator=(const CountingSemaphore&) = delete;
|
||||
void acquire()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
cv_.wait(lock, [this]() { return count_ > 0; });
|
||||
--count_;
|
||||
}
|
||||
void release()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
++count_;
|
||||
if (count_ <= max_count_) {
|
||||
cv_.notify_one();
|
||||
}
|
||||
}
|
||||
uint getCount() const
|
||||
{
|
||||
return count_;
|
||||
}
|
||||
uint getMaxCount() const
|
||||
{
|
||||
return max_count_;
|
||||
}
|
||||
private:
|
||||
CountingSemaphore()
|
||||
: max_count_(std::max(1u, static_cast<uint>(0.95 * std::thread::hardware_concurrency()))),
|
||||
count_(max_count_)
|
||||
{
|
||||
}
|
||||
std::mutex mtx_;
|
||||
std::condition_variable cv_;
|
||||
const uint max_count_;
|
||||
uint count_;
|
||||
};
|
||||
#endif
|
120
bayesnet/utils/Mst.cc
Normal file
120
bayesnet/utils/Mst.cc
Normal file
@@ -0,0 +1,120 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include "Mst.h"
|
||||
/*
|
||||
Based on the code from https://www.softwaretestinghelp.com/minimum-spanning-tree-tutorial/
|
||||
|
||||
*/
|
||||
|
||||
namespace bayesnet {
|
||||
Graph::Graph(int V) : V(V), parent(std::vector<int>(V))
|
||||
{
|
||||
for (int i = 0; i < V; i++)
|
||||
parent[i] = i;
|
||||
G.clear();
|
||||
T.clear();
|
||||
}
|
||||
void Graph::addEdge(int u, int v, float wt)
|
||||
{
|
||||
G.push_back({ wt, { u, v } });
|
||||
}
|
||||
int Graph::find_set(int i)
|
||||
{
|
||||
// If i is the parent of itself
|
||||
if (i == parent[i])
|
||||
return i;
|
||||
else
|
||||
//else recursively find the parent of i
|
||||
return find_set(parent[i]);
|
||||
}
|
||||
void Graph::union_set(int u, int v)
|
||||
{
|
||||
parent[u] = parent[v];
|
||||
}
|
||||
void Graph::kruskal_algorithm()
|
||||
{
|
||||
// sort the edges ordered on decreasing weight
|
||||
stable_sort(G.begin(), G.end(), [](const auto& left, const auto& right) {return left.first > right.first;});
|
||||
for (int i = 0; i < G.size(); i++) {
|
||||
int uSt, vEd;
|
||||
uSt = find_set(G[i].second.first);
|
||||
vEd = find_set(G[i].second.second);
|
||||
if (uSt != vEd) {
|
||||
T.push_back(G[i]); // add to mst std::vector
|
||||
union_set(uSt, vEd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MST::insertElement(std::list<int>& variables, int variable)
|
||||
{
|
||||
if (std::find(variables.begin(), variables.end(), variable) == variables.end()) {
|
||||
variables.push_front(variable);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, int>> MST::reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
|
||||
{
|
||||
// Create the edges of a DAG from the MST
|
||||
// replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted
|
||||
auto result = std::vector<std::pair<int, int>>();
|
||||
auto visited = std::vector<int>();
|
||||
auto nextVariables = std::list<int>();
|
||||
nextVariables.push_front(root_original);
|
||||
while (nextVariables.size() > 0) {
|
||||
int root = nextVariables.front();
|
||||
nextVariables.pop_front();
|
||||
for (int i = 0; i < T.size(); ++i) {
|
||||
auto [weight, edge] = T[i];
|
||||
auto [from, to] = edge;
|
||||
if (from == root || to == root) {
|
||||
visited.insert(visited.begin(), i);
|
||||
if (from == root) {
|
||||
result.push_back({ from, to });
|
||||
insertElement(nextVariables, to);
|
||||
} else {
|
||||
result.push_back({ to, from });
|
||||
insertElement(nextVariables, from);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Remove visited
|
||||
for (int i = 0; i < visited.size(); ++i) {
|
||||
T.erase(T.begin() + visited[i]);
|
||||
}
|
||||
visited.clear();
|
||||
}
|
||||
if (T.size() > 0) {
|
||||
for (int i = 0; i < T.size(); ++i) {
|
||||
auto [weight, edge] = T[i];
|
||||
auto [from, to] = edge;
|
||||
result.push_back({ from, to });
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
MST::MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
|
||||
std::vector<std::pair<int, int>> MST::maximumSpanningTree()
|
||||
{
|
||||
auto num_features = features.size();
|
||||
Graph g(num_features);
|
||||
// Make a complete graph
|
||||
for (int i = 0; i < num_features - 1; ++i) {
|
||||
for (int j = i + 1; j < num_features; ++j) {
|
||||
g.addEdge(i, j, weights[i][j].item<float>());
|
||||
}
|
||||
}
|
||||
g.kruskal_algorithm();
|
||||
auto mst = g.get_mst();
|
||||
return reorder(mst, root);
|
||||
}
|
||||
|
||||
}
|
40
bayesnet/utils/Mst.h
Normal file
40
bayesnet/utils/Mst.h
Normal file
@@ -0,0 +1,40 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef MST_H
|
||||
#define MST_H
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
class MST {
|
||||
public:
|
||||
MST() = default;
|
||||
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
|
||||
void insertElement(std::list<int>& variables, int variable);
|
||||
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original);
|
||||
std::vector<std::pair<int, int>> maximumSpanningTree();
|
||||
private:
|
||||
torch::Tensor weights;
|
||||
std::vector<std::string> features;
|
||||
int root = 0;
|
||||
};
|
||||
class Graph {
|
||||
public:
|
||||
explicit Graph(int V);
|
||||
void addEdge(int u, int v, float wt);
|
||||
int find_set(int i);
|
||||
void union_set(int u, int v);
|
||||
void kruskal_algorithm();
|
||||
std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; }
|
||||
private:
|
||||
int V; // number of nodes in graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
|
||||
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
|
||||
std::vector<int> parent;
|
||||
};
|
||||
}
|
||||
#endif
|
51
bayesnet/utils/TensorUtils.h
Normal file
51
bayesnet/utils/TensorUtils.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef TENSORUTILS_H
|
||||
#define TENSORUTILS_H
|
||||
#include <torch/torch.h>
|
||||
#include <vector>
|
||||
namespace bayesnet {
|
||||
class TensorUtils {
|
||||
public:
|
||||
static std::vector<std::vector<int>> to_matrix(const torch::Tensor& X)
|
||||
{
|
||||
// Ensure tensor is contiguous in memory
|
||||
auto X_contig = X.contiguous();
|
||||
|
||||
// Access tensor data pointer directly
|
||||
auto data_ptr = X_contig.data_ptr<int>();
|
||||
|
||||
// IF you are using int64_t as the data type, use the following line
|
||||
//auto data_ptr = X_contig.data_ptr<int64_t>();
|
||||
//std::vector<std::vector<int64_t>> data(X.size(0), std::vector<int64_t>(X.size(1)));
|
||||
|
||||
// Prepare output container
|
||||
std::vector<std::vector<int>> data(X.size(0), std::vector<int>(X.size(1)));
|
||||
|
||||
// Fill the 2D vector in a single loop using pointer arithmetic
|
||||
int rows = X.size(0);
|
||||
int cols = X.size(1);
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
std::copy(data_ptr + i * cols, data_ptr + (i + 1) * cols, data[i].begin());
|
||||
}
|
||||
return data;
|
||||
}
|
||||
template <typename T>
|
||||
static std::vector<T> to_vector(const torch::Tensor& y)
|
||||
{
|
||||
// Ensure the tensor is contiguous in memory
|
||||
auto y_contig = y.contiguous();
|
||||
|
||||
// Access data pointer
|
||||
auto data_ptr = y_contig.data_ptr<T>();
|
||||
|
||||
// Prepare output container
|
||||
std::vector<T> data(y.size(0));
|
||||
|
||||
// Copy data efficiently
|
||||
std::copy(data_ptr, data_ptr + y.size(0), data.begin());
|
||||
|
||||
return data;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // TENSORUTILS_H
|
44
bayesnet/utils/bayesnetUtils.cc
Normal file
44
bayesnet/utils/bayesnetUtils.cc
Normal file
@@ -0,0 +1,44 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
|
||||
#include "bayesnetUtils.h"
|
||||
namespace bayesnet {
|
||||
// Return the indices in descending order
|
||||
std::vector<int> argsort(std::vector<double>& nums)
|
||||
{
|
||||
int n = nums.size();
|
||||
std::vector<int> indices(n);
|
||||
iota(indices.begin(), indices.end(), 0);
|
||||
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
|
||||
return indices;
|
||||
}
|
||||
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor)
|
||||
{
|
||||
// convert mxn tensor to mxn std::vector
|
||||
std::vector<std::vector<double>> result;
|
||||
// Iterate over cols
|
||||
for (int i = 0; i < dtensor.size(0); ++i) {
|
||||
auto col_tensor = dtensor.index({ i, "..." });
|
||||
auto col = std::vector<double>(col_tensor.data_ptr<float>(), col_tensor.data_ptr<float>() + dtensor.size(1));
|
||||
result.push_back(col);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector, bool transpose)
|
||||
{
|
||||
// convert nxm std::vector to mxn tensor if transpose
|
||||
long int m = transpose ? vector[0].size() : vector.size();
|
||||
long int n = transpose ? vector.size() : vector[0].size();
|
||||
auto tensor = torch::zeros({ m, n }, torch::kInt32);
|
||||
for (int i = 0; i < m; ++i) {
|
||||
for (int j = 0; j < n; ++j) {
|
||||
tensor[i][j] = transpose ? vector[j][i] : vector[i][j];
|
||||
}
|
||||
}
|
||||
return tensor;
|
||||
}
|
||||
}
|
16
bayesnet/utils/bayesnetUtils.h
Normal file
16
bayesnet/utils/bayesnetUtils.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// ***************************************************************
|
||||
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||
// SPDX-FileType: SOURCE
|
||||
// SPDX-License-Identifier: MIT
|
||||
// ***************************************************************
|
||||
|
||||
#ifndef BAYESNET_UTILS_H
|
||||
#define BAYESNET_UTILS_H
|
||||
#include <vector>
|
||||
#include <torch/torch.h>
|
||||
namespace bayesnet {
|
||||
std::vector<int> argsort(std::vector<double>& nums);
|
||||
std::vector<std::vector<double>> tensorToVectorDouble(torch::Tensor& dtensor);
|
||||
torch::Tensor vectorToTensor(std::vector<std::vector<int>>& vector, bool transpose = true);
|
||||
}
|
||||
#endif //BAYESNET_UTILS_H
|
Reference in New Issue
Block a user