27 KiB
27 KiB
<html lang="en">
<head>
</head>
</html>
LCOV - code coverage report | ||||||||||||||||||||||
![]() | ||||||||||||||||||||||
|
||||||||||||||||||||||
![]() |
Line data Source code 1 : // *************************************************************** 2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez 3 : // SPDX-FileType: SOURCE 4 : // SPDX-License-Identifier: MIT 5 : // *************************************************************** 6 : 7 : #include "Mst.h" 8 : #include "BayesMetrics.h" 9 : namespace bayesnet { 10 : //samples is n+1xm tensor used to fit the model 11 3957 : Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates) 12 3957 : : samples(samples) 13 3957 : , features(features) 14 3957 : , className(className) 15 3957 : , classNumStates(classNumStates) 16 : { 17 3957 : } 18 : //samples is n+1xm std::vector used to fit the model 19 176 : Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates) 20 176 : : features(features) 21 176 : , className(className) 22 176 : , classNumStates(classNumStates) 23 352 : , samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32)) 24 : { 25 1408 : for (int i = 0; i < vsamples.size(); ++i) { 26 4928 : samples.index_put_({ i, "..." }, torch::tensor(vsamples[i], torch::kInt32)); 27 : } 28 704 : samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32)); 29 1584 : } 30 1099 : std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k) 31 : { 32 : // Return the K Best features 33 1099 : auto n = features.size(); 34 1099 : if (k == 0) { 35 0 : k = n; 36 : } 37 : // compute scores 38 1099 : scoresKBest.clear(); 39 1099 : featuresKBest.clear(); 40 3297 : auto label = samples.index({ -1, "..." }); 41 37425 : for (int i = 0; i < n; ++i) { 42 108978 : scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights)); 43 36326 : featuresKBest.push_back(i); 44 : } 45 : // sort & reduce scores and features 46 1099 : if (ascending) { 47 245 : sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j) 48 5931 : { return scoresKBest[i] < scoresKBest[j]; }); 49 245 : sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>()); 50 245 : if (k < n) { 51 308 : for (int i = 0; i < n - k; ++i) { 52 220 : featuresKBest.erase(featuresKBest.begin()); 53 220 : scoresKBest.erase(scoresKBest.begin()); 54 : } 55 : } 56 : } else { 57 854 : sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j) 58 168709 : { return scoresKBest[i] > scoresKBest[j]; }); 59 854 : sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>()); 60 854 : featuresKBest.resize(k); 61 854 : scoresKBest.resize(k); 62 : } 63 2198 : return featuresKBest; 64 38524 : } 65 88 : std::vector<double> Metrics::getScoresKBest() const 66 : { 67 88 : return scoresKBest; 68 : } 69 : 70 374 : torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights) 71 : { 72 374 : auto result = std::vector<double>(); 73 374 : auto source = std::vector<std::string>(features); 74 374 : source.push_back(className); 75 374 : auto combinations = doCombinations(source); 76 : // Compute class prior 77 374 : auto margin = torch::zeros({ classNumStates }, torch::kFloat); 78 2024 : for (int value = 0; value < classNumStates; ++value) { 79 6600 : auto mask = samples.index({ -1, "..." }) == value; 80 1650 : margin[value] = mask.sum().item<double>() / samples.size(1); 81 1650 : } 82 10098 : for (auto [first, second] : combinations) { 83 9724 : int index_first = find(features.begin(), features.end(), first) - features.begin(); 84 9724 : int index_second = find(features.begin(), features.end(), second) - features.begin(); 85 9724 : double accumulated = 0; 86 57640 : for (int value = 0; value < classNumStates; ++value) { 87 191664 : auto mask = samples.index({ -1, "..." }) == value; 88 143748 : auto first_dataset = samples.index({ index_first, mask }); 89 143748 : auto second_dataset = samples.index({ index_second, mask }); 90 95832 : auto weights_dataset = weights.index({ mask }); 91 95832 : auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset); 92 47916 : auto pb = margin[value].item<double>(); 93 47916 : accumulated += pb * mi; 94 47916 : } 95 9724 : result.push_back(accumulated); 96 9724 : } 97 374 : long n_vars = source.size(); 98 374 : auto matrix = torch::zeros({ n_vars, n_vars }); 99 374 : auto indices = torch::triu_indices(n_vars, n_vars, 1); 100 10098 : for (auto i = 0; i < result.size(); ++i) { 101 9724 : auto x = indices[0][i]; 102 9724 : auto y = indices[1][i]; 103 9724 : matrix[x][y] = result[i]; 104 9724 : matrix[y][x] = result[i]; 105 9724 : } 106 748 : return matrix; 107 241604 : } 108 : // To use in Python 109 0 : std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_) 110 : { 111 0 : const torch::Tensor weights = torch::tensor(weights_); 112 0 : auto matrix = conditionalEdge(weights); 113 0 : std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel()); 114 0 : return v; 115 0 : } 116 101565 : double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights) 117 : { 118 101565 : torch::Tensor counts = feature.bincount(weights); 119 101565 : double totalWeight = counts.sum().item<double>(); 120 101565 : torch::Tensor probs = counts.to(torch::kFloat) / totalWeight; 121 101565 : torch::Tensor logProbs = torch::log(probs); 122 101565 : torch::Tensor entropy = -probs * logProbs; 123 203130 : return entropy.nansum().item<double>(); 124 101565 : } 125 : // H(Y|X) = sum_{x in X} p(x) H(Y|X=x) 126 91263 : double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights) 127 : { 128 91263 : int numSamples = firstFeature.sizes()[0]; 129 91263 : torch::Tensor featureCounts = secondFeature.bincount(weights); 130 91263 : std::unordered_map<int, std::unordered_map<int, double>> jointCounts; 131 91263 : double totalWeight = 0; 132 11715815 : for (auto i = 0; i < numSamples; i++) { 133 11624552 : jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>(); 134 11624552 : totalWeight += weights[i].item<float>(); 135 : } 136 91263 : if (totalWeight == 0) 137 0 : return 0; 138 91263 : double entropyValue = 0; 139 311456 : for (int value = 0; value < featureCounts.sizes()[0]; ++value) { 140 220193 : double p_f = featureCounts[value].item<double>() / totalWeight; 141 220193 : double entropy_f = 0; 142 655015 : for (auto& [label, jointCount] : jointCounts[value]) { 143 434822 : double p_l_f = jointCount / featureCounts[value].item<double>(); 144 434822 : if (p_l_f > 0) { 145 434822 : entropy_f -= p_l_f * log(p_l_f); 146 : } else { 147 0 : entropy_f = 0; 148 : } 149 : } 150 220193 : entropyValue += p_f * entropy_f; 151 : } 152 91263 : return entropyValue; 153 91263 : } 154 : // I(X;Y) = H(Y) - H(Y|X) 155 91263 : double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights) 156 : { 157 91263 : return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights); 158 : } 159 : /* 160 : Compute the maximum spanning tree considering the weights as distances 161 : and the indices of the weights as nodes of this square matrix using 162 : Kruskal algorithm 163 : */ 164 319 : std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root) 165 : { 166 319 : auto mst = MST(features, weights, root); 167 638 : return mst.maximumSpanningTree(); 168 319 : } 169 : } |
![]() |
Generated by: LCOV version 2.0-1 |
</html>