LCOV - coverage.info - bayesnet/utils/BayesMetrics.cc

LCOV - code coverage report

Current view:	top level - bayesnet/utils - BayesMetrics.cc (source / functions)		Coverage	Total	Hit
Test:	coverage.info	Lines:	92.5 %	120	111
Test Date:	2024-04-21 17:30:26	Functions:	91.7 %	12	11

            Line data    Source code

       1              : // ***************************************************************
       2              : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
       3              : // SPDX-FileType: SOURCE
       4              : // SPDX-License-Identifier: MIT
       5              : // ***************************************************************
       6              : 
       7              : #include "Mst.h"
       8              : #include "BayesMetrics.h"
       9              : namespace bayesnet {
      10              :     //samples is n+1xm tensor used to fit the model
      11          338 :     Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      12          338 :         : samples(samples)
      13          338 :         , features(features)
      14          338 :         , className(className)
      15          338 :         , classNumStates(classNumStates)
      16              :     {
      17          338 :     }
      18              :     //samples is n+1xm std::vector used to fit the model
      19           16 :     Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
      20           16 :         : features(features)
      21           16 :         , className(className)
      22           16 :         , classNumStates(classNumStates)
      23           32 :         , samples(torch::zeros({ static_cast<int>(vsamples.size() + 1), static_cast<int>(vsamples[0].size()) }, torch::kInt32))
      24              :     {
      25          128 :         for (int i = 0; i < vsamples.size(); ++i) {
      26          448 :             samples.index_put_({ i,  "..." }, torch::tensor(vsamples[i], torch::kInt32));
      27              :         }
      28           64 :         samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
      29          144 :     }
      30           90 :     std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
      31              :     {
      32              :         // Return the K Best features 
      33           90 :         auto n = features.size();
      34           90 :         if (k == 0) {
      35            0 :             k = n;
      36              :         }
      37              :         // compute scores
      38           90 :         scoresKBest.clear();
      39           90 :         featuresKBest.clear();
      40          270 :         auto label = samples.index({ -1, "..." });
      41         2837 :         for (int i = 0; i < n; ++i) {
      42         8241 :             scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
      43         2747 :             featuresKBest.push_back(i);
      44              :         }
      45              :         // sort & reduce scores and features
      46           90 :         if (ascending) {
      47           19 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      48          453 :                 { return scoresKBest[i] < scoresKBest[j]; });
      49           19 :             sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
      50           19 :             if (k < n) {
      51           28 :                 for (int i = 0; i < n - k; ++i) {
      52           20 :                     featuresKBest.erase(featuresKBest.begin());
      53           20 :                     scoresKBest.erase(scoresKBest.begin());
      54              :                 }
      55              :             }
      56              :         } else {
      57           71 :             sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
      58        12619 :                 { return scoresKBest[i] > scoresKBest[j]; });
      59           71 :             sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
      60           71 :             featuresKBest.resize(k);
      61           71 :             scoresKBest.resize(k);
      62              :         }
      63          180 :         return featuresKBest;
      64         2927 :     }
      65            8 :     std::vector<double> Metrics::getScoresKBest() const
      66              :     {
      67            8 :         return scoresKBest;
      68              :     }
      69              : 
      70           34 :     torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
      71              :     {
      72           34 :         auto result = std::vector<double>();
      73           34 :         auto source = std::vector<std::string>(features);
      74           34 :         source.push_back(className);
      75           34 :         auto combinations = doCombinations(source);
      76              :         // Compute class prior
      77           34 :         auto margin = torch::zeros({ classNumStates }, torch::kFloat);
      78          184 :         for (int value = 0; value < classNumStates; ++value) {
      79          600 :             auto mask = samples.index({ -1,  "..." }) == value;
      80          150 :             margin[value] = mask.sum().item<double>() / samples.size(1);
      81          150 :         }
      82          918 :         for (auto [first, second] : combinations) {
      83          884 :             int index_first = find(features.begin(), features.end(), first) - features.begin();
      84          884 :             int index_second = find(features.begin(), features.end(), second) - features.begin();
      85          884 :             double accumulated = 0;
      86         5240 :             for (int value = 0; value < classNumStates; ++value) {
      87        17424 :                 auto mask = samples.index({ -1, "..." }) == value;
      88        13068 :                 auto first_dataset = samples.index({ index_first, mask });
      89        13068 :                 auto second_dataset = samples.index({ index_second, mask });
      90         8712 :                 auto weights_dataset = weights.index({ mask });
      91         8712 :                 auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
      92         4356 :                 auto pb = margin[value].item<double>();
      93         4356 :                 accumulated += pb * mi;
      94         4356 :             }
      95          884 :             result.push_back(accumulated);
      96          884 :         }
      97           34 :         long n_vars = source.size();
      98           34 :         auto matrix = torch::zeros({ n_vars, n_vars });
      99           34 :         auto indices = torch::triu_indices(n_vars, n_vars, 1);
     100          918 :         for (auto i = 0; i < result.size(); ++i) {
     101          884 :             auto x = indices[0][i];
     102          884 :             auto y = indices[1][i];
     103          884 :             matrix[x][y] = result[i];
     104          884 :             matrix[y][x] = result[i];
     105          884 :         }
     106           68 :         return matrix;
     107        21964 :     }
     108              :     // To use in Python
     109            0 :     std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_)
     110              :     {
     111            0 :         const torch::Tensor weights = torch::tensor(weights_);
     112            0 :         auto matrix = conditionalEdge(weights);
     113            0 :         std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
     114            0 :         return v;
     115            0 :     }
     116         8506 :     double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
     117              :     {
     118         8506 :         torch::Tensor counts = feature.bincount(weights);
     119         8506 :         double totalWeight = counts.sum().item<double>();
     120         8506 :         torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
     121         8506 :         torch::Tensor logProbs = torch::log(probs);
     122         8506 :         torch::Tensor entropy = -probs * logProbs;
     123        17012 :         return entropy.nansum().item<double>();
     124         8506 :     }
     125              :     // H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
     126         7684 :     double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     127              :     {
     128         7684 :         int numSamples = firstFeature.sizes()[0];
     129         7684 :         torch::Tensor featureCounts = secondFeature.bincount(weights);
     130         7684 :         std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
     131         7684 :         double totalWeight = 0;
     132       991784 :         for (auto i = 0; i < numSamples; i++) {
     133       984100 :             jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
     134       984100 :             totalWeight += weights[i].item<float>();
     135              :         }
     136         7684 :         if (totalWeight == 0)
     137            0 :             return 0;
     138         7684 :         double entropyValue = 0;
     139        26852 :         for (int value = 0; value < featureCounts.sizes()[0]; ++value) {
     140        19168 :             double p_f = featureCounts[value].item<double>() / totalWeight;
     141        19168 :             double entropy_f = 0;
     142        57129 :             for (auto& [label, jointCount] : jointCounts[value]) {
     143        37961 :                 double p_l_f = jointCount / featureCounts[value].item<double>();
     144        37961 :                 if (p_l_f > 0) {
     145        37961 :                     entropy_f -= p_l_f * log(p_l_f);
     146              :                 } else {
     147            0 :                     entropy_f = 0;
     148              :                 }
     149              :             }
     150        19168 :             entropyValue += p_f * entropy_f;
     151              :         }
     152         7684 :         return entropyValue;
     153         7684 :     }
     154              :     // I(X;Y) = H(Y) - H(Y|X)
     155         7684 :     double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
     156              :     {
     157         7684 :         return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
     158              :     }
     159              :     /*
     160              :     Compute the maximum spanning tree considering the weights as distances
     161              :     and the indices of the weights as nodes of this square matrix using
     162              :     Kruskal algorithm
     163              :     */
     164           29 :     std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
     165              :     {
     166           29 :         auto mst = MST(features, weights, root);
     167           58 :         return mst.maximumSpanningTree();
     168           29 :     }
     169              : }

Generated by: LCOV version 2.0-1