Line data Source code
1 : // ***************************************************************
2 : // SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
3 : // SPDX-FileType: SOURCE
4 : // SPDX-License-Identifier: MIT
5 : // ***************************************************************
6 :
7 : #include <limits>
8 : #include "bayesnet/utils/bayesnetUtils.h"
9 : #include "CFS.h"
10 : namespace bayesnet {
11 70 : void CFS::fit()
12 : {
13 70 : initialize();
14 70 : computeSuLabels();
15 70 : auto featureOrder = argsort(suLabels); // sort descending order
16 70 : auto continueCondition = true;
17 70 : auto feature = featureOrder[0];
18 70 : selectedFeatures.push_back(feature);
19 70 : selectedScores.push_back(suLabels[feature]);
20 70 : featureOrder.erase(featureOrder.begin());
21 398 : while (continueCondition) {
22 328 : double merit = std::numeric_limits<double>::lowest();
23 328 : int bestFeature = -1;
24 1929 : for (auto feature : featureOrder) {
25 1601 : selectedFeatures.push_back(feature);
26 : // Compute merit with selectedFeatures
27 1601 : auto meritNew = computeMeritCFS();
28 1601 : if (meritNew > merit) {
29 663 : merit = meritNew;
30 663 : bestFeature = feature;
31 : }
32 1601 : selectedFeatures.pop_back();
33 : }
34 328 : if (bestFeature == -1) {
35 : // meritNew has to be nan due to constant features
36 0 : break;
37 : }
38 328 : selectedFeatures.push_back(bestFeature);
39 328 : selectedScores.push_back(merit);
40 328 : featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), bestFeature), featureOrder.end());
41 328 : continueCondition = computeContinueCondition(featureOrder);
42 : }
43 70 : fitted = true;
44 70 : }
45 328 : bool CFS::computeContinueCondition(const std::vector<int>& featureOrder)
46 : {
47 328 : if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) {
48 11 : return false;
49 : }
50 317 : if (selectedScores.size() >= 5) {
51 : /*
52 : "To prevent the best first search from exploring the entire
53 : feature subset search space, a stopping criterion is imposed.
54 : The search will terminate if five consecutive fully expanded
55 : subsets show no improvement over the current best subset."
56 : as stated in Mark A.Hall Thesis
57 : */
58 118 : double item_ant = std::numeric_limits<double>::lowest();
59 118 : int num = 0;
60 118 : std::vector<double> lastFive(selectedScores.end() - 5, selectedScores.end());
61 472 : for (auto item : lastFive) {
62 413 : if (item_ant == std::numeric_limits<double>::lowest()) {
63 118 : item_ant = item;
64 : }
65 413 : if (item > item_ant) {
66 59 : break;
67 : } else {
68 354 : num++;
69 354 : item_ant = item;
70 : }
71 : }
72 118 : if (num == 5) {
73 59 : return false;
74 : }
75 118 : }
76 258 : return true;
77 : }
78 : }
|