LCOV - BayesNet Coverage Report - bayesnet/utils/BayesMetrics.cc

LCOV - code coverage report

Current view:	top level - bayesnet/utils - BayesMetrics.cc (source / functions)		Coverage	Total	Hit
Test:	BayesNet Coverage Report	Lines:	98.2 %	114	112
Test Date:	2024-05-06 17:54:04	Functions:	100.0 %	11	11
Legend:	Lines: hit not hit

            Line data    Source code

       1              : // ***************************************************************
       2              : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
       3              : // SPDX-FileType: SOURCE
       4              : // SPDX-License-Identifier: MIT
       5              : // ***************************************************************
       6              : 
       7              : #include "Mst.h"
       8              : #include "BayesMetrics.h"
       9              : namespace bayesnet {
      10              :     //samples is n+1xm tensor used to fit the model
      11         2123 :     Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      12         2123 :         : samples(samples)
      13         2123 :         , className(className)
      14         2123 :         , features(features)
      15         2123 :         , classNumStates(classNumStates)
      16              :     {
      17         2123 :     }
      18              :     //samples is n+1xm std::vector used to fit the model
      19           96 :     Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      20           96 :         : samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
      21           96 :         , className(className)
      22           96 :         , features(features)
      23           96 :         , classNumStates(classNumStates)
      24              :     {
      25          768 :         for (int i = 0; i < vsamples.size(); ++i) {
      26         2688 :             samples.index_put_({ i,  "..." }, torch::tensor(vsamples[i], torch::kInt32));
      27              :         }
      28          384 :         samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
      29          864 :     }
      30          478 :     std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
      31              :     {
      32              :         // Return the K Best features 
      33          478 :         auto n = features.size();
      34          478 :         if (k == 0) {
      35            6 :             k = n;
      36              :         }
      37              :         // compute scores
      38          478 :         scoresKBest.clear();
      39          478 :         featuresKBest.clear();
      40         1434 :         auto label = samples.index({ -1, "..." });
      41        10522 :         for (int i = 0; i < n; ++i) {
      42        30132 :             scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
      43        10044 :             featuresKBest.push_back(i);
      44              :         }
      45              :         // sort & reduce scores and features
      46          478 :         if (ascending) {
      47           94 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      48         2088 :                 { return scoresKBest[i] < scoresKBest[j]; });
      49           94 :             sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
      50           94 :             if (k < n) {
      51          154 :                 for (int i = 0; i < n - k; ++i) {
      52          110 :                     featuresKBest.erase(featuresKBest.begin());
      53          110 :                     scoresKBest.erase(scoresKBest.begin());
      54              :                 }
      55              :             }
      56              :         } else {
      57          384 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      58        64808 :                 { return scoresKBest[i] > scoresKBest[j]; });
      59          384 :             sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
      60          384 :             featuresKBest.resize(k);
      61          384 :             scoresKBest.resize(k);
      62              :         }
      63          956 :         return featuresKBest;
      64        11000 :     }
      65           48 :     std::vector<double> Metrics::getScoresKBest() const
      66              :     {
      67           48 :         return scoresKBest;
      68              :     }
      69              : 
      70          152 :     torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
      71              :     {
      72          152 :         auto result = std::vector<double>();
      73          152 :         auto source = std::vector<std::string>(features);
      74          152 :         source.push_back(className);
      75          152 :         auto combinations = doCombinations(source);
      76              :         // Compute class prior
      77          152 :         auto margin = torch::zeros({ classNumStates }, torch::kFloat);
      78          828 :         for (int value = 0; value < classNumStates; ++value) {
      79         2704 :             auto mask = samples.index({ -1,  "..." }) == value;
      80          676 :             margin[value] = mask.sum().item<double>() / samples.size(1);
      81          676 :         }
      82         4164 :         for (auto [first, second] : combinations) {
      83         4012 :             int index_first = find(features.begin(), features.end(), first) - features.begin();
      84         4012 :             int index_second = find(features.begin(), features.end(), second) - features.begin();
      85         4012 :             double accumulated = 0;
      86        23820 :             for (int value = 0; value < classNumStates; ++value) {
      87        79232 :                 auto mask = samples.index({ -1, "..." }) == value;
      88        59424 :                 auto first_dataset = samples.index({ index_first, mask });
      89        59424 :                 auto second_dataset = samples.index({ index_second, mask });
      90        39616 :                 auto weights_dataset = weights.index({ mask });
      91        39616 :                 auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
      92        19808 :                 auto pb = margin[value].item<double>();
      93        19808 :                 accumulated += pb * mi;
      94        19808 :             }
      95         4012 :             result.push_back(accumulated);
      96         4012 :         }
      97          152 :         long n_vars = source.size();
      98          152 :         auto matrix = torch::zeros({ n_vars, n_vars });
      99          152 :         auto indices = torch::triu_indices(n_vars, n_vars, 1);
     100         4164 :         for (auto i = 0; i < result.size(); ++i) {
     101         4012 :             auto x = indices[0][i];
     102         4012 :             auto y = indices[1][i];
     103         4012 :             matrix[x][y] = result[i];
     104         4012 :             matrix[y][x] = result[i];
     105         4012 :         }
     106          304 :         return matrix;
     107        99868 :     }
     108        41732 :     double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
     109              :     {
     110        41732 :         torch::Tensor counts = feature.bincount(weights);
     111        41732 :         double totalWeight = counts.sum().item<double>();
     112        41732 :         torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
     113        41732 :         torch::Tensor logProbs = torch::log(probs);
     114        41732 :         torch::Tensor entropy = -probs * logProbs;
     115        83464 :         return entropy.nansum().item<double>();
     116        41732 :     }
     117              :     // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
     118        34276 :     double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     119              :     {
     120        34276 :         int numSamples = firstFeature.sizes()[0];
     121        34276 :         torch::Tensor featureCounts = secondFeature.bincount(weights);
     122        34276 :         std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
     123        34276 :         double totalWeight = 0;
     124      6993324 :         for (auto i = 0; i < numSamples; i++) {
     125      6959048 :             jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
     126      6959048 :             totalWeight += weights[i].item<float>();
     127              :         }
     128        34276 :         if (totalWeight == 0)
     129            0 :             return 0;
     130        34276 :         double entropyValue = 0;
     131       168251 :         for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
     132       133975 :             double p_f = featureCounts[value].item<double>() / totalWeight;
     133       133975 :             double entropy_f = 0;
     134       454356 :             for (auto& [label, jointCount] : jointCounts[value]) {
     135       320381 :                 double p_l_f = jointCount / featureCounts[value].item<double>();
     136       320381 :                 if (p_l_f > 0) {
     137       320381 :                     entropy_f -= p_l_f * log(p_l_f);
     138              :                 } else {
     139            0 :                     entropy_f = 0;
     140              :                 }
     141              :             }
     142       133975 :             entropyValue += p_f * entropy_f;
     143              :         }
     144        34276 :         return entropyValue;
     145        34276 :     }
     146              :     // I(X;Y) = H(Y) - H(Y|X)
     147        34276 :     double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     148              :     {
     149        34276 :         return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
     150              :     }
     151              :     /*
     152              :     Compute the maximum spanning tree considering the weights as distances
     153              :     and the indices of the weights as nodes of this square matrix using
     154              :     Kruskal algorithm
     155              :     */
     156          148 :     std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
     157              :     {
     158          148 :         auto mst = MST(features, weights, root);
     159          296 :         return mst.maximumSpanningTree();
     160          148 :     }
     161              : }

Generated by: LCOV version 2.0-1