BayesNet/bayesnet/feature_selection/CFS.cc

78 lines
2.9 KiB
C++
Raw Normal View History

2024-04-11 16:02:49 +00:00
// ***************************************************************
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: MIT
// ***************************************************************
2023-10-11 19:17:26 +00:00
#include <limits>
2024-03-08 21:20:54 +00:00
#include "bayesnet/utils/bayesnetUtils.h"
#include "CFS.h"
2023-10-11 19:17:26 +00:00
namespace bayesnet {
void CFS::fit()
{
initialize();
2023-10-11 19:17:26 +00:00
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto continueCondition = true;
auto feature = featureOrder[0];
selectedFeatures.push_back(feature);
selectedScores.push_back(suLabels[feature]);
2024-04-02 20:53:00 +00:00
featureOrder.erase(featureOrder.begin());
2023-10-11 19:17:26 +00:00
while (continueCondition) {
2023-11-08 17:45:35 +00:00
double merit = std::numeric_limits<double>::lowest();
2023-10-11 19:17:26 +00:00
int bestFeature = -1;
for (auto feature : featureOrder) {
selectedFeatures.push_back(feature);
// Compute merit with selectedFeatures
auto meritNew = computeMeritCFS();
2023-10-11 19:17:26 +00:00
if (meritNew > merit) {
merit = meritNew;
bestFeature = feature;
}
selectedFeatures.pop_back();
2023-10-11 19:17:26 +00:00
}
2023-10-13 10:29:25 +00:00
if (bestFeature == -1) {
2023-10-13 11:46:22 +00:00
// meritNew has to be nan due to constant features
break;
2023-10-13 10:29:25 +00:00
}
selectedFeatures.push_back(bestFeature);
selectedScores.push_back(merit);
2023-10-13 10:29:25 +00:00
featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), bestFeature), featureOrder.end());
2023-10-11 19:17:26 +00:00
continueCondition = computeContinueCondition(featureOrder);
}
fitted = true;
}
2023-11-08 17:45:35 +00:00
bool CFS::computeContinueCondition(const std::vector<int>& featureOrder)
2023-10-11 19:17:26 +00:00
{
if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) {
2023-10-11 19:17:26 +00:00
return false;
}
if (selectedScores.size() >= 5) {
2023-10-11 19:17:26 +00:00
/*
"To prevent the best first search from exploring the entire
feature subset search space, a stopping criterion is imposed.
The search will terminate if five consecutive fully expanded
subsets show no improvement over the current best subset."
as stated in Mark A.Hall Thesis
*/
2023-11-08 17:45:35 +00:00
double item_ant = std::numeric_limits<double>::lowest();
2023-10-11 19:17:26 +00:00
int num = 0;
2023-11-08 17:45:35 +00:00
std::vector<double> lastFive(selectedScores.end() - 5, selectedScores.end());
2023-10-11 19:17:26 +00:00
for (auto item : lastFive) {
2023-11-08 17:45:35 +00:00
if (item_ant == std::numeric_limits<double>::lowest()) {
2023-10-11 19:17:26 +00:00
item_ant = item;
}
if (item > item_ant) {
break;
} else {
num++;
item_ant = item;
}
}
if (num == 5) {
return false;
}
}
return true;
}
}