LCOV - coverage.info - bayesnet/utils/BayesMetrics.cc

LCOV - code coverage report

Current view:	top level - bayesnet/utils - BayesMetrics.cc (source / functions)		Coverage	Total	Hit
Test:	coverage.info	Lines:	97.4 %	114	111
Test Date:	2024-04-30 13:59:18	Functions:	100.0 %	11	11

            Line data    Source code

       1              : // ***************************************************************
       2              : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
       3              : // SPDX-FileType: SOURCE
       4              : // SPDX-License-Identifier: MIT
       5              : // ***************************************************************
       6              : 
       7              : #include "Mst.h"
       8              : #include "BayesMetrics.h"
       9              : namespace bayesnet {
      10              :     //samples is n+1xm tensor used to fit the model
      11         2248 :     Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      12         2248 :         : samples(samples)
      13         2248 :         , className(className)
      14         2248 :         , features(features)
      15         2248 :         , classNumStates(classNumStates)
      16              :     {
      17         2248 :     }
      18              :     //samples is n+1xm std::vector used to fit the model
      19           96 :     Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      20           96 :         : samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
      21           96 :         , className(className)
      22           96 :         , features(features)
      23           96 :         , classNumStates(classNumStates)
      24              :     {
      25          768 :         for (int i = 0; i < vsamples.size(); ++i) {
      26         2688 :             samples.index_put_({ i,  "..." }, torch::tensor(vsamples[i], torch::kInt32));
      27              :         }
      28          384 :         samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
      29          864 :     }
      30          690 :     std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
      31              :     {
      32              :         // Return the K Best features 
      33          690 :         auto n = features.size();
      34          690 :         if (k == 0) {
      35            0 :             k = n;
      36              :         }
      37              :         // compute scores
      38          690 :         scoresKBest.clear();
      39          690 :         featuresKBest.clear();
      40         2070 :         auto label = samples.index({ -1, "..." });
      41        15576 :         for (int i = 0; i < n; ++i) {
      42        44658 :             scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
      43        14886 :             featuresKBest.push_back(i);
      44              :         }
      45              :         // sort & reduce scores and features
      46          690 :         if (ascending) {
      47          114 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      48         2718 :                 { return scoresKBest[i] < scoresKBest[j]; });
      49          114 :             sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
      50          114 :             if (k < n) {
      51          168 :                 for (int i = 0; i < n - k; ++i) {
      52          120 :                     featuresKBest.erase(featuresKBest.begin());
      53          120 :                     scoresKBest.erase(scoresKBest.begin());
      54              :                 }
      55              :             }
      56              :         } else {
      57          576 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      58        97212 :                 { return scoresKBest[i] > scoresKBest[j]; });
      59          576 :             sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
      60          576 :             featuresKBest.resize(k);
      61          576 :             scoresKBest.resize(k);
      62              :         }
      63         1380 :         return featuresKBest;
      64        16266 :     }
      65           48 :     std::vector<double> Metrics::getScoresKBest() const
      66              :     {
      67           48 :         return scoresKBest;
      68              :     }
      69              : 
      70          204 :     torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
      71              :     {
      72          204 :         auto result = std::vector<double>();
      73          204 :         auto source = std::vector<std::string>(features);
      74          204 :         source.push_back(className);
      75          204 :         auto combinations = doCombinations(source);
      76              :         // Compute class prior
      77          204 :         auto margin = torch::zeros({ classNumStates }, torch::kFloat);
      78         1104 :         for (int value = 0; value < classNumStates; ++value) {
      79         3600 :             auto mask = samples.index({ -1,  "..." }) == value;
      80          900 :             margin[value] = mask.sum().item<double>() / samples.size(1);
      81          900 :         }
      82         5508 :         for (auto [first, second] : combinations) {
      83         5304 :             int index_first = find(features.begin(), features.end(), first) - features.begin();
      84         5304 :             int index_second = find(features.begin(), features.end(), second) - features.begin();
      85         5304 :             double accumulated = 0;
      86        31440 :             for (int value = 0; value < classNumStates; ++value) {
      87       104544 :                 auto mask = samples.index({ -1, "..." }) == value;
      88        78408 :                 auto first_dataset = samples.index({ index_first, mask });
      89        78408 :                 auto second_dataset = samples.index({ index_second, mask });
      90        52272 :                 auto weights_dataset = weights.index({ mask });
      91        52272 :                 auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
      92        26136 :                 auto pb = margin[value].item<double>();
      93        26136 :                 accumulated += pb * mi;
      94        26136 :             }
      95         5304 :             result.push_back(accumulated);
      96         5304 :         }
      97          204 :         long n_vars = source.size();
      98          204 :         auto matrix = torch::zeros({ n_vars, n_vars });
      99          204 :         auto indices = torch::triu_indices(n_vars, n_vars, 1);
     100         5508 :         for (auto i = 0; i < result.size(); ++i) {
     101         5304 :             auto x = indices[0][i];
     102         5304 :             auto y = indices[1][i];
     103         5304 :             matrix[x][y] = result[i];
     104         5304 :             matrix[y][x] = result[i];
     105         5304 :         }
     106          408 :         return matrix;
     107       131784 :     }
     108        50295 :     double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
     109              :     {
     110        50295 :         torch::Tensor counts = feature.bincount(weights);
     111        50295 :         double totalWeight = counts.sum().item<double>();
     112        50295 :         torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
     113        50295 :         torch::Tensor logProbs = torch::log(probs);
     114        50295 :         torch::Tensor entropy = -probs * logProbs;
     115       100590 :         return entropy.nansum().item<double>();
     116        50295 :     }
     117              :     // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
     118        44793 :     double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     119              :     {
     120        44793 :         int numSamples = firstFeature.sizes()[0];
     121        44793 :         torch::Tensor featureCounts = secondFeature.bincount(weights);
     122        44793 :         std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
     123        44793 :         double totalWeight = 0;
     124      8954403 :         for (auto i = 0; i < numSamples; i++) {
     125      8909610 :             jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
     126      8909610 :             totalWeight += weights[i].item<float>();
     127              :         }
     128        44793 :         if (totalWeight == 0)
     129            0 :             return 0;
     130        44793 :         double entropyValue = 0;
     131       222747 :         for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
     132       177954 :             double p_f = featureCounts[value].item<double>() / totalWeight;
     133       177954 :             double entropy_f = 0;
     134       601680 :             for (auto& [label, jointCount] : jointCounts[value]) {
     135       423726 :                 double p_l_f = jointCount / featureCounts[value].item<double>();
     136       423726 :                 if (p_l_f > 0) {
     137       423726 :                     entropy_f -= p_l_f * log(p_l_f);
     138              :                 } else {
     139            0 :                     entropy_f = 0;
     140              :                 }
     141              :             }
     142       177954 :             entropyValue += p_f * entropy_f;
     143              :         }
     144        44793 :         return entropyValue;
     145        44793 :     }
     146              :     // I(X;Y) = H(Y) - H(Y|X)
     147        44793 :     double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     148              :     {
     149        44793 :         return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
     150              :     }
     151              :     /*
     152              :     Compute the maximum spanning tree considering the weights as distances
     153              :     and the indices of the weights as nodes of this square matrix using
     154              :     Kruskal algorithm
     155              :     */
     156          174 :     std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
     157              :     {
     158          174 :         auto mst = MST(features, weights, root);
     159          348 :         return mst.maximumSpanningTree();
     160          174 :     }
     161              : }

Generated by: LCOV version 2.0-1