Line data Source code
1 : // ***************************************************************
2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
3 : // SPDX-FileType: SOURCE
4 : // SPDX-License-Identifier: MIT
5 : // ***************************************************************
6 :
7 : #include <limits>
8 : #include "bayesnet/utils/bayesnetUtils.h"
9 : #include "FeatureSelect.h"
10 : namespace bayesnet {
11 154 : FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
12 154 : Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
13 :
14 : {
15 154 : }
16 108 : void FeatureSelect::initialize()
17 : {
18 108 : selectedFeatures.clear();
19 108 : selectedScores.clear();
20 108 : }
21 2751 : double FeatureSelect::symmetricalUncertainty(int a, int b)
22 : {
23 : /*
24 : Compute symmetrical uncertainty. Normalize* information gain (mutual
25 : information) with the entropies of the features in order to compensate
26 : the bias due to high cardinality features. *Range [0, 1]
27 : (https://www.sciencedirect.com/science/article/pii/S0020025519303603)
28 : */
29 8253 : auto x = samples.index({ a, "..." });
30 8253 : auto y = samples.index({ b, "..." });
31 2751 : auto mu = mutualInformation(x, y, weights);
32 2751 : auto hx = entropy(x, weights);
33 2751 : auto hy = entropy(y, weights);
34 2751 : return 2.0 * mu / (hx + hy);
35 8253 : }
36 108 : void FeatureSelect::computeSuLabels()
37 : {
38 : // Compute Simmetrical Uncertainty between features and labels
39 : // https://en.wikipedia.org/wiki/Symmetric_uncertainty
40 906 : for (int i = 0; i < features.size(); ++i) {
41 798 : suLabels.push_back(symmetricalUncertainty(i, -1));
42 : }
43 108 : }
44 6499 : double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature)
45 : {
46 : // Compute Simmetrical Uncertainty between features
47 : // https://en.wikipedia.org/wiki/Symmetric_uncertainty
48 : try {
49 6499 : return suFeatures.at({ firstFeature, secondFeature });
50 : }
51 1953 : catch (const std::out_of_range& e) {
52 1953 : double result = symmetricalUncertainty(firstFeature, secondFeature);
53 1953 : suFeatures[{firstFeature, secondFeature}] = result;
54 1953 : return result;
55 1953 : }
56 : }
57 1047 : double FeatureSelect::computeMeritCFS()
58 : {
59 1047 : double rcf = 0;
60 4816 : for (auto feature : selectedFeatures) {
61 3769 : rcf += suLabels[feature];
62 : }
63 1047 : double rff = 0;
64 1047 : int n = selectedFeatures.size();
65 6907 : for (const auto& item : doCombinations(selectedFeatures)) {
66 5860 : rff += computeSuFeatures(item.first, item.second);
67 1047 : }
68 1047 : return rcf / sqrt(n + (n * n - n) * rff);
69 : }
70 116 : std::vector<int> FeatureSelect::getFeatures() const
71 : {
72 116 : if (!fitted) {
73 8 : throw std::runtime_error("FeatureSelect not fitted");
74 : }
75 108 : return selectedFeatures;
76 : }
77 116 : std::vector<double> FeatureSelect::getScores() const
78 : {
79 116 : if (!fitted) {
80 8 : throw std::runtime_error("FeatureSelect not fitted");
81 : }
82 108 : return selectedScores;
83 : }
84 : }
|