LCOV - coverage.info - bayesnet/utils/BayesMetrics.cc

LCOV - code coverage report

Current view:	top level - bayesnet/utils - BayesMetrics.cc (source / functions)		Coverage	Total	Hit
Test:	coverage.info	Lines:	92.5 %	120	111
Test Date:	2024-04-29 20:48:03	Functions:	91.7 %	12	11

            Line data    Source code

       1              : // ***************************************************************
       2              : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
       3              : // SPDX-FileType: SOURCE
       4              : // SPDX-License-Identifier: MIT
       5              : // ***************************************************************
       6              : 
       7              : #include "Mst.h"
       8              : #include "BayesMetrics.h"
       9              : namespace bayesnet {
      10              :     //samples is n+1xm tensor used to fit the model
      11         3957 :     Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      12         3957 :         : samples(samples)
      13         3957 :         , features(features)
      14         3957 :         , className(className)
      15         3957 :         , classNumStates(classNumStates)
      16              :     {
      17         3957 :     }
      18              :     //samples is n+1xm std::vector used to fit the model
      19          176 :     Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      20          176 :         : features(features)
      21          176 :         , className(className)
      22          176 :         , classNumStates(classNumStates)
      23          352 :         , samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
      24              :     {
      25         1408 :         for (int i = 0; i < vsamples.size(); ++i) {
      26         4928 :             samples.index_put_({ i,  "..." }, torch::tensor(vsamples[i], torch::kInt32));
      27              :         }
      28          704 :         samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
      29         1584 :     }
      30         1099 :     std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
      31              :     {
      32              :         // Return the K Best features 
      33         1099 :         auto n = features.size();
      34         1099 :         if (k == 0) {
      35            0 :             k = n;
      36              :         }
      37              :         // compute scores
      38         1099 :         scoresKBest.clear();
      39         1099 :         featuresKBest.clear();
      40         3297 :         auto label = samples.index({ -1, "..." });
      41        37425 :         for (int i = 0; i < n; ++i) {
      42       108978 :             scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
      43        36326 :             featuresKBest.push_back(i);
      44              :         }
      45              :         // sort & reduce scores and features
      46         1099 :         if (ascending) {
      47          245 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      48         5931 :                 { return scoresKBest[i] < scoresKBest[j]; });
      49          245 :             sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
      50          245 :             if (k < n) {
      51          308 :                 for (int i = 0; i < n - k; ++i) {
      52          220 :                     featuresKBest.erase(featuresKBest.begin());
      53          220 :                     scoresKBest.erase(scoresKBest.begin());
      54              :                 }
      55              :             }
      56              :         } else {
      57          854 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      58       168709 :                 { return scoresKBest[i] > scoresKBest[j]; });
      59          854 :             sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
      60          854 :             featuresKBest.resize(k);
      61          854 :             scoresKBest.resize(k);
      62              :         }
      63         2198 :         return featuresKBest;
      64        38524 :     }
      65           88 :     std::vector<double> Metrics::getScoresKBest() const
      66              :     {
      67           88 :         return scoresKBest;
      68              :     }
      69              : 
      70          374 :     torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
      71              :     {
      72          374 :         auto result = std::vector<double>();
      73          374 :         auto source = std::vector<std::string>(features);
      74          374 :         source.push_back(className);
      75          374 :         auto combinations = doCombinations(source);
      76              :         // Compute class prior
      77          374 :         auto margin = torch::zeros({ classNumStates }, torch::kFloat);
      78         2024 :         for (int value = 0; value < classNumStates; ++value) {
      79         6600 :             auto mask = samples.index({ -1,  "..." }) == value;
      80         1650 :             margin[value] = mask.sum().item<double>() / samples.size(1);
      81         1650 :         }
      82        10098 :         for (auto [first, second] : combinations) {
      83         9724 :             int index_first = find(features.begin(), features.end(), first) - features.begin();
      84         9724 :             int index_second = find(features.begin(), features.end(), second) - features.begin();
      85         9724 :             double accumulated = 0;
      86        57640 :             for (int value = 0; value < classNumStates; ++value) {
      87       191664 :                 auto mask = samples.index({ -1, "..." }) == value;
      88       143748 :                 auto first_dataset = samples.index({ index_first, mask });
      89       143748 :                 auto second_dataset = samples.index({ index_second, mask });
      90        95832 :                 auto weights_dataset = weights.index({ mask });
      91        95832 :                 auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
      92        47916 :                 auto pb = margin[value].item<double>();
      93        47916 :                 accumulated += pb * mi;
      94        47916 :             }
      95         9724 :             result.push_back(accumulated);
      96         9724 :         }
      97          374 :         long n_vars = source.size();
      98          374 :         auto matrix = torch::zeros({ n_vars, n_vars });
      99          374 :         auto indices = torch::triu_indices(n_vars, n_vars, 1);
     100        10098 :         for (auto i = 0; i < result.size(); ++i) {
     101         9724 :             auto x = indices[0][i];
     102         9724 :             auto y = indices[1][i];
     103         9724 :             matrix[x][y] = result[i];
     104         9724 :             matrix[y][x] = result[i];
     105         9724 :         }
     106          748 :         return matrix;
     107       241604 :     }
     108              :     // To use in Python
     109            0 :     std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_)
     110              :     {
     111            0 :         const torch::Tensor weights = torch::tensor(weights_);
     112            0 :         auto matrix = conditionalEdge(weights);
     113            0 :         std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
     114            0 :         return v;
     115            0 :     }
     116       101565 :     double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
     117              :     {
     118       101565 :         torch::Tensor counts = feature.bincount(weights);
     119       101565 :         double totalWeight = counts.sum().item<double>();
     120       101565 :         torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
     121       101565 :         torch::Tensor logProbs = torch::log(probs);
     122       101565 :         torch::Tensor entropy = -probs * logProbs;
     123       203130 :         return entropy.nansum().item<double>();
     124       101565 :     }
     125              :     // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
     126        91263 :     double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     127              :     {
     128        91263 :         int numSamples = firstFeature.sizes()[0];
     129        91263 :         torch::Tensor featureCounts = secondFeature.bincount(weights);
     130        91263 :         std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
     131        91263 :         double totalWeight = 0;
     132     11715815 :         for (auto i = 0; i < numSamples; i++) {
     133     11624552 :             jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
     134     11624552 :             totalWeight += weights[i].item<float>();
     135              :         }
     136        91263 :         if (totalWeight == 0)
     137            0 :             return 0;
     138        91263 :         double entropyValue = 0;
     139       311456 :         for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
     140       220193 :             double p_f = featureCounts[value].item<double>() / totalWeight;
     141       220193 :             double entropy_f = 0;
     142       655015 :             for (auto& [label, jointCount] : jointCounts[value]) {
     143       434822 :                 double p_l_f = jointCount / featureCounts[value].item<double>();
     144       434822 :                 if (p_l_f > 0) {
     145       434822 :                     entropy_f -= p_l_f * log(p_l_f);
     146              :                 } else {
     147            0 :                     entropy_f = 0;
     148              :                 }
     149              :             }
     150       220193 :             entropyValue += p_f * entropy_f;
     151              :         }
     152        91263 :         return entropyValue;
     153        91263 :     }
     154              :     // I(X;Y) = H(Y) - H(Y|X)
     155        91263 :     double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     156              :     {
     157        91263 :         return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
     158              :     }
     159              :     /*
     160              :     Compute the maximum spanning tree considering the weights as distances
     161              :     and the indices of the weights as nodes of this square matrix using
     162              :     Kruskal algorithm
     163              :     */
     164          319 :     std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
     165              :     {
     166          319 :         auto mst = MST(features, weights, root);
     167          638 :         return mst.maximumSpanningTree();
     168          319 :     }
     169              : }

Generated by: LCOV version 2.0-1