Files
BayesNet/html/bayesnet/feature_selection/FeatureSelect.cc.gcov.html

14 KiB

<html lang="en"> <head> </head>
LCOV - code coverage report
Current view: top level - bayesnet/feature_selection - FeatureSelect.cc (source / functions) Coverage Total Hit
Test: coverage.info Lines: 100.0 % 44 44
Test Date: 2024-04-30 20:26:57 Functions: 100.0 % 8 8

            Line data    Source code
       1              : // ***************************************************************
       2              : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
       3              : // SPDX-FileType: SOURCE
       4              : // SPDX-License-Identifier: MIT
       5              : // ***************************************************************
       6              : 
       7              : #include <limits>
       8              : #include "bayesnet/utils/bayesnetUtils.h"
       9              : #include "FeatureSelect.h"
      10              : namespace bayesnet {
      11           46 :     FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
      12           46 :         Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
      13              : 
      14              :     {
      15           46 :     }
      16           32 :     void FeatureSelect::initialize()
      17              :     {
      18           32 :         selectedFeatures.clear();
      19           32 :         selectedScores.clear();
      20           32 :     }
      21          822 :     double FeatureSelect::symmetricalUncertainty(int a, int b)
      22              :     {
      23              :         /*
      24              :         Compute symmetrical uncertainty. Normalize* information gain (mutual
      25              :         information) with the entropies of the features in order to compensate
      26              :         the bias due to high cardinality features. *Range [0, 1]
      27              :         (https://www.sciencedirect.com/science/article/pii/S0020025519303603)
      28              :         */
      29         2466 :         auto x = samples.index({ a, "..." });
      30         2466 :         auto y = samples.index({ b, "..." });
      31          822 :         auto mu = mutualInformation(x, y, weights);
      32          822 :         auto hx = entropy(x, weights);
      33          822 :         auto hy = entropy(y, weights);
      34          822 :         return 2.0 * mu / (hx + hy);
      35         2466 :     }
      36           32 :     void FeatureSelect::computeSuLabels()
      37              :     {
      38              :         // Compute Simmetrical Uncertainty between features and labels
      39              :         // https://en.wikipedia.org/wiki/Symmetric_uncertainty
      40          270 :         for (int i = 0; i < features.size(); ++i) {
      41          238 :             suLabels.push_back(symmetricalUncertainty(i, -1));
      42              :         }
      43           32 :     }
      44         1960 :     double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature)
      45              :     {
      46              :         // Compute Simmetrical Uncertainty between features
      47              :         // https://en.wikipedia.org/wiki/Symmetric_uncertainty
      48              :         try {
      49         1960 :             return suFeatures.at({ firstFeature, secondFeature });
      50              :         }
      51          584 :         catch (const std::out_of_range& e) {
      52          584 :             double result = symmetricalUncertainty(firstFeature, secondFeature);
      53          584 :             suFeatures[{firstFeature, secondFeature}] = result;
      54          584 :             return result;
      55          584 :         }
      56              :     }
      57          316 :     double FeatureSelect::computeMeritCFS()
      58              :     {
      59          316 :         double rcf = 0;
      60         1454 :         for (auto feature : selectedFeatures) {
      61         1138 :             rcf += suLabels[feature];
      62              :         }
      63          316 :         double rff = 0;
      64          316 :         int n = selectedFeatures.size();
      65         2086 :         for (const auto& item : doCombinations(selectedFeatures)) {
      66         1770 :             rff += computeSuFeatures(item.first, item.second);
      67          316 :         }
      68          316 :         return rcf / sqrt(n + (n * n - n) * rff);
      69              :     }
      70           36 :     std::vector<int> FeatureSelect::getFeatures() const
      71              :     {
      72           36 :         if (!fitted) {
      73            4 :             throw std::runtime_error("FeatureSelect not fitted");
      74              :         }
      75           32 :         return selectedFeatures;
      76              :     }
      77           36 :     std::vector<double> FeatureSelect::getScores() const
      78              :     {
      79           36 :         if (!fitted) {
      80            4 :             throw std::runtime_error("FeatureSelect not fitted");
      81              :         }
      82           32 :         return selectedScores;
      83              :     }
      84              : }
        

Generated by: LCOV version 2.0-1

</html>