LCOV - code coverage report
Current view: top level - bayesnet/feature_selection - FeatureSelect.cc (source / functions) Coverage Total Hit
Test: coverage.info Lines: 100.0 % 44 44
Test Date: 2024-04-30 13:59:18 Functions: 100.0 % 8 8

            Line data    Source code
       1              : // ***************************************************************
       2              : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
       3              : // SPDX-FileType: SOURCE
       4              : // SPDX-License-Identifier: MIT
       5              : // ***************************************************************
       6              : 
       7              : #include <limits>
       8              : #include "bayesnet/utils/bayesnetUtils.h"
       9              : #include "FeatureSelect.h"
      10              : namespace bayesnet {
      11          154 :     FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
      12          154 :         Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
      13              : 
      14              :     {
      15          154 :     }
      16          108 :     void FeatureSelect::initialize()
      17              :     {
      18          108 :         selectedFeatures.clear();
      19          108 :         selectedScores.clear();
      20          108 :     }
      21         2751 :     double FeatureSelect::symmetricalUncertainty(int a, int b)
      22              :     {
      23              :         /*
      24              :         Compute symmetrical uncertainty. Normalize* information gain (mutual
      25              :         information) with the entropies of the features in order to compensate
      26              :         the bias due to high cardinality features. *Range [0, 1]
      27              :         (https://www.sciencedirect.com/science/article/pii/S0020025519303603)
      28              :         */
      29         8253 :         auto x = samples.index({ a, "..." });
      30         8253 :         auto y = samples.index({ b, "..." });
      31         2751 :         auto mu = mutualInformation(x, y, weights);
      32         2751 :         auto hx = entropy(x, weights);
      33         2751 :         auto hy = entropy(y, weights);
      34         2751 :         return 2.0 * mu / (hx + hy);
      35         8253 :     }
      36          108 :     void FeatureSelect::computeSuLabels()
      37              :     {
      38              :         // Compute Simmetrical Uncertainty between features and labels
      39              :         // https://en.wikipedia.org/wiki/Symmetric_uncertainty
      40          906 :         for (int i = 0; i < features.size(); ++i) {
      41          798 :             suLabels.push_back(symmetricalUncertainty(i, -1));
      42              :         }
      43          108 :     }
      44         6499 :     double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature)
      45              :     {
      46              :         // Compute Simmetrical Uncertainty between features
      47              :         // https://en.wikipedia.org/wiki/Symmetric_uncertainty
      48              :         try {
      49         6499 :             return suFeatures.at({ firstFeature, secondFeature });
      50              :         }
      51         1953 :         catch (const std::out_of_range& e) {
      52         1953 :             double result = symmetricalUncertainty(firstFeature, secondFeature);
      53         1953 :             suFeatures[{firstFeature, secondFeature}] = result;
      54         1953 :             return result;
      55         1953 :         }
      56              :     }
      57         1047 :     double FeatureSelect::computeMeritCFS()
      58              :     {
      59         1047 :         double rcf = 0;
      60         4816 :         for (auto feature : selectedFeatures) {
      61         3769 :             rcf += suLabels[feature];
      62              :         }
      63         1047 :         double rff = 0;
      64         1047 :         int n = selectedFeatures.size();
      65         6907 :         for (const auto& item : doCombinations(selectedFeatures)) {
      66         5860 :             rff += computeSuFeatures(item.first, item.second);
      67         1047 :         }
      68         1047 :         return rcf / sqrt(n + (n * n - n) * rff);
      69              :     }
      70          116 :     std::vector<int> FeatureSelect::getFeatures() const
      71              :     {
      72          116 :         if (!fitted) {
      73            8 :             throw std::runtime_error("FeatureSelect not fitted");
      74              :         }
      75          108 :         return selectedFeatures;
      76              :     }
      77          116 :     std::vector<double> FeatureSelect::getScores() const
      78              :     {
      79          116 :         if (!fitted) {
      80            8 :             throw std::runtime_error("FeatureSelect not fitted");
      81              :         }
      82          108 :         return selectedScores;
      83              :     }
      84              : }
        

Generated by: LCOV version 2.0-1