LCOV - coverage.info - bayesnet/utils/BayesMetrics.cc

LCOV - code coverage report

Current view:	top level - bayesnet/utils - BayesMetrics.cc (source / functions)		Coverage	Total	Hit
Test:	coverage.info	Lines:	97.4 %	114	111
Test Date:	2024-04-30 20:26:57	Functions:	100.0 %	11	11

            Line data    Source code

       1              : // ***************************************************************
       2              : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
       3              : // SPDX-FileType: SOURCE
       4              : // SPDX-License-Identifier: MIT
       5              : // ***************************************************************
       6              : 
       7              : #include "Mst.h"
       8              : #include "BayesMetrics.h"
       9              : namespace bayesnet {
      10              :     //samples is n+1xm tensor used to fit the model
      11          744 :     Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      12          744 :         : samples(samples)
      13          744 :         , className(className)
      14          744 :         , features(features)
      15          744 :         , classNumStates(classNumStates)
      16              :     {
      17          744 :     }
      18              :     //samples is n+1xm std::vector used to fit the model
      19           32 :     Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      20           32 :         : samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
      21           32 :         , className(className)
      22           32 :         , features(features)
      23           32 :         , classNumStates(classNumStates)
      24              :     {
      25          256 :         for (int i = 0; i < vsamples.size(); ++i) {
      26          896 :             samples.index_put_({ i,  "..." }, torch::tensor(vsamples[i], torch::kInt32));
      27              :         }
      28          128 :         samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
      29          288 :     }
      30          230 :     std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
      31              :     {
      32              :         // Return the K Best features 
      33          230 :         auto n = features.size();
      34          230 :         if (k == 0) {
      35            0 :             k = n;
      36              :         }
      37              :         // compute scores
      38          230 :         scoresKBest.clear();
      39          230 :         featuresKBest.clear();
      40          690 :         auto label = samples.index({ -1, "..." });
      41         5192 :         for (int i = 0; i < n; ++i) {
      42        14886 :             scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
      43         4962 :             featuresKBest.push_back(i);
      44              :         }
      45              :         // sort & reduce scores and features
      46          230 :         if (ascending) {
      47           38 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      48          906 :                 { return scoresKBest[i] < scoresKBest[j]; });
      49           38 :             sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
      50           38 :             if (k < n) {
      51           56 :                 for (int i = 0; i < n - k; ++i) {
      52           40 :                     featuresKBest.erase(featuresKBest.begin());
      53           40 :                     scoresKBest.erase(scoresKBest.begin());
      54              :                 }
      55              :             }
      56              :         } else {
      57          192 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      58        32404 :                 { return scoresKBest[i] > scoresKBest[j]; });
      59          192 :             sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
      60          192 :             featuresKBest.resize(k);
      61          192 :             scoresKBest.resize(k);
      62              :         }
      63          460 :         return featuresKBest;
      64         5422 :     }
      65           16 :     std::vector<double> Metrics::getScoresKBest() const
      66              :     {
      67           16 :         return scoresKBest;
      68              :     }
      69              : 
      70           68 :     torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
      71              :     {
      72           68 :         auto result = std::vector<double>();
      73           68 :         auto source = std::vector<std::string>(features);
      74           68 :         source.push_back(className);
      75           68 :         auto combinations = doCombinations(source);
      76              :         // Compute class prior
      77           68 :         auto margin = torch::zeros({ classNumStates }, torch::kFloat);
      78          368 :         for (int value = 0; value < classNumStates; ++value) {
      79         1200 :             auto mask = samples.index({ -1,  "..." }) == value;
      80          300 :             margin[value] = mask.sum().item<double>() / samples.size(1);
      81          300 :         }
      82         1836 :         for (auto [first, second] : combinations) {
      83         1768 :             int index_first = find(features.begin(), features.end(), first) - features.begin();
      84         1768 :             int index_second = find(features.begin(), features.end(), second) - features.begin();
      85         1768 :             double accumulated = 0;
      86        10480 :             for (int value = 0; value < classNumStates; ++value) {
      87        34848 :                 auto mask = samples.index({ -1, "..." }) == value;
      88        26136 :                 auto first_dataset = samples.index({ index_first, mask });
      89        26136 :                 auto second_dataset = samples.index({ index_second, mask });
      90        17424 :                 auto weights_dataset = weights.index({ mask });
      91        17424 :                 auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
      92         8712 :                 auto pb = margin[value].item<double>();
      93         8712 :                 accumulated += pb * mi;
      94         8712 :             }
      95         1768 :             result.push_back(accumulated);
      96         1768 :         }
      97           68 :         long n_vars = source.size();
      98           68 :         auto matrix = torch::zeros({ n_vars, n_vars });
      99           68 :         auto indices = torch::triu_indices(n_vars, n_vars, 1);
     100         1836 :         for (auto i = 0; i < result.size(); ++i) {
     101         1768 :             auto x = indices[0][i];
     102         1768 :             auto y = indices[1][i];
     103         1768 :             matrix[x][y] = result[i];
     104         1768 :             matrix[y][x] = result[i];
     105         1768 :         }
     106          136 :         return matrix;
     107        43928 :     }
     108        16480 :     double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
     109              :     {
     110        16480 :         torch::Tensor counts = feature.bincount(weights);
     111        16480 :         double totalWeight = counts.sum().item<double>();
     112        16480 :         torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
     113        16480 :         torch::Tensor logProbs = torch::log(probs);
     114        16480 :         torch::Tensor entropy = -probs * logProbs;
     115        32960 :         return entropy.nansum().item<double>();
     116        16480 :     }
     117              :     // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
     118        14836 :     double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     119              :     {
     120        14836 :         int numSamples = firstFeature.sizes()[0];
     121        14836 :         torch::Tensor featureCounts = secondFeature.bincount(weights);
     122        14836 :         std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
     123        14836 :         double totalWeight = 0;
     124      2946924 :         for (auto i = 0; i < numSamples; i++) {
     125      2932088 :             jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
     126      2932088 :             totalWeight += weights[i].item<float>();
     127              :         }
     128        14836 :         if (totalWeight == 0)
     129            0 :             return 0;
     130        14836 :         double entropyValue = 0;
     131        73754 :         for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
     132        58918 :             double p_f = featureCounts[value].item<double>() / totalWeight;
     133        58918 :             double entropy_f = 0;
     134       198966 :             for (auto& [label, jointCount] : jointCounts[value]) {
     135       140048 :                 double p_l_f = jointCount / featureCounts[value].item<double>();
     136       140048 :                 if (p_l_f > 0) {
     137       140048 :                     entropy_f -= p_l_f * log(p_l_f);
     138              :                 } else {
     139            0 :                     entropy_f = 0;
     140              :                 }
     141              :             }
     142        58918 :             entropyValue += p_f * entropy_f;
     143              :         }
     144        14836 :         return entropyValue;
     145        14836 :     }
     146              :     // I(X;Y) = H(Y) - H(Y|X)
     147        14836 :     double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     148              :     {
     149        14836 :         return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
     150              :     }
     151              :     /*
     152              :     Compute the maximum spanning tree considering the weights as distances
     153              :     and the indices of the weights as nodes of this square matrix using
     154              :     Kruskal algorithm
     155              :     */
     156           58 :     std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
     157              :     {
     158           58 :         auto mst = MST(features, weights, root);
     159          116 :         return mst.maximumSpanningTree();
     160           58 :     }
     161              : }

Generated by: LCOV version 2.0-1