Add L1FS feature selection
This commit is contained in:
@@ -20,7 +20,7 @@
|
||||
#include "bayesnet/ensembles/AODELd.h"
|
||||
#include "bayesnet/ensembles/BoostAODE.h"
|
||||
|
||||
const std::string ACTUAL_VERSION = "1.1.1";
|
||||
const std::string ACTUAL_VERSION = "1.1.2";
|
||||
|
||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||
{
|
||||
|
@@ -12,6 +12,7 @@
|
||||
#include "bayesnet/feature_selection/CFS.h"
|
||||
#include "bayesnet/feature_selection/FCBF.h"
|
||||
#include "bayesnet/feature_selection/IWSS.h"
|
||||
#include "bayesnet/feature_selection/L1FS.h"
|
||||
#include "TestUtils.h"
|
||||
|
||||
bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold, int max_features = 0)
|
||||
@@ -23,14 +24,16 @@ bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector,
|
||||
return new bayesnet::FCBF(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
||||
} else if (selector == "IWSS") {
|
||||
return new bayesnet::IWSS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
||||
} else if (selector == "L1FS") {
|
||||
// For L1FS, threshold is used as alpha parameter
|
||||
return new bayesnet::L1FS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TEST_CASE("Features Selected", "[FeatureSelection]")
|
||||
{
|
||||
// std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
|
||||
std::string file_name = GENERATE("ecoli");
|
||||
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
|
||||
|
||||
auto raw = RawDatasets(file_name, true);
|
||||
|
||||
@@ -48,14 +51,19 @@ TEST_CASE("Features Selected", "[FeatureSelection]")
|
||||
{ {"glass", "FCBF" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.304911, 0.302109, 0.281621, 0.253297} } },
|
||||
{ {"iris", "FCBF"}, {{ 3, 2 }, {0.870521, 0.816401} }},
|
||||
{ {"ecoli", "FCBF"}, {{ 5, 0, 1, 4, 2 }, {0.512319, 0.350406, 0.260905, 0.203132, 0.11229} }},
|
||||
{ {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }}
|
||||
{ {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }},
|
||||
{ {"glass", "L1FS" }, { { 2, 3, 5}, { 0.365513, 0.304911, 0.302109 } } },
|
||||
{ {"iris", "L1FS"}, {{ 3, 2, 1, 0 }, { 0.570928, 0.37569, 0.0774792, 0.00835904 }}},
|
||||
{ {"ecoli", "L1FS"}, {{ 0, 1, 6, 5, 2, 3 }, {0.490179, 0.365944, 0.291177, 0.199171, 0.0400928, 0.0192575} }},
|
||||
{ {"diabetes", "L1FS"}, {{ 1, 5, 4 }, {0.132858, 0.083191, 0.0486187} }}
|
||||
};
|
||||
double threshold;
|
||||
std::string selector;
|
||||
std::vector<std::pair<std::string, double>> selectors = {
|
||||
{ "CFS", 0.0 },
|
||||
{ "IWSS", 0.1 },
|
||||
{ "FCBF", 1e-7 }
|
||||
{ "FCBF", 1e-7 },
|
||||
{ "L1FS", 0.01 }
|
||||
};
|
||||
for (const auto item : selectors) {
|
||||
selector = item.first; threshold = item.second;
|
||||
@@ -77,17 +85,144 @@ TEST_CASE("Features Selected", "[FeatureSelection]")
|
||||
delete featureSelector;
|
||||
}
|
||||
}
|
||||
SECTION("Test L1FS")
|
||||
{
|
||||
bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
|
||||
raw.dataset, raw.features, raw.className,
|
||||
raw.features.size(), raw.classNumStates, raw.weights,
|
||||
0.01, 1000, 1e-4, true
|
||||
);
|
||||
featureSelector->fit();
|
||||
|
||||
std::vector<int> selected_features = featureSelector->getFeatures();
|
||||
std::vector<double> selected_scores = featureSelector->getScores();
|
||||
|
||||
// Check if features are selected
|
||||
REQUIRE(selected_features.size() > 0);
|
||||
REQUIRE(selected_scores.size() == selected_features.size());
|
||||
|
||||
// Scores should be non-negative (absolute coefficient values)
|
||||
for (double score : selected_scores) {
|
||||
REQUIRE(score >= 0.0);
|
||||
}
|
||||
|
||||
// Scores should be in descending order
|
||||
// std::cout << file_name << " " << selected_features << std::endl << "{";
|
||||
for (size_t i = 1; i < selected_scores.size(); i++) {
|
||||
// std::cout << selected_scores[i - 1] << ", ";
|
||||
REQUIRE(selected_scores[i - 1] >= selected_scores[i]);
|
||||
}
|
||||
// std::cout << selected_scores[selected_scores.size() - 1];
|
||||
// std::cout << "}" << std::endl;
|
||||
delete featureSelector;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("L1FS Features Selected", "[FeatureSelection]")
|
||||
{
|
||||
auto raw = RawDatasets("ecoli", true);
|
||||
|
||||
SECTION("Test L1FS with different alpha values")
|
||||
{
|
||||
std::vector<double> alphas = { 0.01, 0.1, 0.5 };
|
||||
|
||||
for (double alpha : alphas) {
|
||||
bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
|
||||
raw.dataset, raw.features, raw.className,
|
||||
raw.features.size(), raw.classNumStates, raw.weights,
|
||||
alpha, 1000, 1e-4, true
|
||||
);
|
||||
featureSelector->fit();
|
||||
|
||||
INFO("Alpha: " << alpha);
|
||||
|
||||
std::vector<int> selected_features = featureSelector->getFeatures();
|
||||
std::vector<double> selected_scores = featureSelector->getScores();
|
||||
|
||||
// Higher alpha should lead to fewer features
|
||||
REQUIRE(selected_features.size() > 0);
|
||||
REQUIRE(selected_features.size() <= raw.features.size());
|
||||
REQUIRE(selected_scores.size() == selected_features.size());
|
||||
|
||||
// Scores should be non-negative (absolute coefficient values)
|
||||
for (double score : selected_scores) {
|
||||
REQUIRE(score >= 0.0);
|
||||
}
|
||||
|
||||
// Scores should be in descending order
|
||||
for (size_t i = 1; i < selected_scores.size(); i++) {
|
||||
REQUIRE(selected_scores[i - 1] >= selected_scores[i]);
|
||||
}
|
||||
|
||||
delete featureSelector;
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Test L1FS with max features limit")
|
||||
{
|
||||
int max_features = 2;
|
||||
bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
|
||||
raw.dataset, raw.features, raw.className,
|
||||
max_features, raw.classNumStates, raw.weights,
|
||||
0.1, 1000, 1e-4, true
|
||||
);
|
||||
featureSelector->fit();
|
||||
|
||||
std::vector<int> selected_features = featureSelector->getFeatures();
|
||||
REQUIRE(selected_features.size() <= max_features);
|
||||
|
||||
delete featureSelector;
|
||||
}
|
||||
|
||||
SECTION("Test L1FS getCoefficients method")
|
||||
{
|
||||
bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
|
||||
raw.dataset, raw.features, raw.className,
|
||||
raw.features.size(), raw.classNumStates, raw.weights,
|
||||
0.1, 1000, 1e-4, true
|
||||
);
|
||||
|
||||
// Should throw before fitting
|
||||
REQUIRE_THROWS_AS(featureSelector->getCoefficients(), std::runtime_error);
|
||||
REQUIRE_THROWS_WITH(featureSelector->getCoefficients(), "L1FS not fitted");
|
||||
|
||||
featureSelector->fit();
|
||||
|
||||
// Should work after fitting
|
||||
auto coefficients = featureSelector->getCoefficients();
|
||||
REQUIRE(coefficients.size() == raw.features.size());
|
||||
|
||||
delete featureSelector;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Oddities", "[FeatureSelection]")
|
||||
{
|
||||
auto raw = RawDatasets("iris", true);
|
||||
|
||||
// FCBF Limits
|
||||
REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
|
||||
|
||||
// IWSS Limits
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
|
||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
|
||||
|
||||
// L1FS Limits
|
||||
REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -0.1), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -0.1), "Alpha (regularization strength) must be non-negative");
|
||||
|
||||
REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 0), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 0), "Maximum iterations must be positive");
|
||||
|
||||
REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, 0.0), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, 0.0), "Tolerance must be positive");
|
||||
|
||||
REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, -1e-4), std::invalid_argument);
|
||||
REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, -1e-4), "Tolerance must be positive");
|
||||
|
||||
// Not fitted error
|
||||
auto selector = build_selector(raw, "CFS", 0);
|
||||
const std::string message = "FeatureSelect not fitted";
|
||||
@@ -97,6 +232,7 @@ TEST_CASE("Oddities", "[FeatureSelection]")
|
||||
REQUIRE_THROWS_WITH(selector->getScores(), message);
|
||||
delete selector;
|
||||
}
|
||||
|
||||
TEST_CASE("Test threshold limits", "[FeatureSelection]")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
@@ -113,4 +249,77 @@ TEST_CASE("Test threshold limits", "[FeatureSelection]")
|
||||
selector->fit();
|
||||
REQUIRE(selector->getFeatures().size() == 5);
|
||||
delete selector;
|
||||
|
||||
// L1FS with different alpha values
|
||||
selector = build_selector(raw, "L1FS", 0.01); // Low alpha - more features
|
||||
selector->fit();
|
||||
int num_features_low_alpha = selector->getFeatures().size();
|
||||
delete selector;
|
||||
|
||||
selector = build_selector(raw, "L1FS", 0.9); // High alpha - fewer features
|
||||
selector->fit();
|
||||
int num_features_high_alpha = selector->getFeatures().size();
|
||||
REQUIRE(num_features_high_alpha <= num_features_low_alpha);
|
||||
delete selector;
|
||||
|
||||
// L1FS with max features limit
|
||||
selector = build_selector(raw, "L1FS", 0.01, 4);
|
||||
selector->fit();
|
||||
REQUIRE(selector->getFeatures().size() <= 4);
|
||||
delete selector;
|
||||
}
|
||||
|
||||
TEST_CASE("L1FS Regression vs Classification", "[FeatureSelection]")
|
||||
{
|
||||
SECTION("Regression Task")
|
||||
{
|
||||
auto raw = RawDatasets("diabetes", true);
|
||||
// diabetes dataset should be treated as regression (classNumStates > 2)
|
||||
bayesnet::L1FS* l1fs = new bayesnet::L1FS(
|
||||
raw.dataset, raw.features, raw.className,
|
||||
raw.features.size(), raw.classNumStates, raw.weights,
|
||||
0.1, 1000, 1e-4, true
|
||||
);
|
||||
l1fs->fit();
|
||||
|
||||
auto features = l1fs->getFeatures();
|
||||
REQUIRE(features.size() > 0);
|
||||
|
||||
delete l1fs;
|
||||
}
|
||||
|
||||
SECTION("Binary Classification Task")
|
||||
{
|
||||
// Create a simple binary classification dataset
|
||||
int n_samples = 100;
|
||||
int n_features = 5;
|
||||
|
||||
torch::Tensor X = torch::randn({ n_features, n_samples });
|
||||
torch::Tensor y = (X[0] + X[2] > 0).to(torch::kFloat32);
|
||||
torch::Tensor samples = torch::cat({ X, y.unsqueeze(0) }, 0);
|
||||
|
||||
std::vector<std::string> features;
|
||||
for (int i = 0; i < n_features; ++i) {
|
||||
features.push_back("feature_" + std::to_string(i));
|
||||
}
|
||||
|
||||
torch::Tensor weights = torch::ones({ n_samples });
|
||||
|
||||
bayesnet::L1FS* l1fs = new bayesnet::L1FS(
|
||||
samples, features, "target",
|
||||
n_features, 2, weights, // 2 states = binary classification
|
||||
0.1, 1000, 1e-4, true
|
||||
);
|
||||
l1fs->fit();
|
||||
|
||||
auto selected_features = l1fs->getFeatures();
|
||||
REQUIRE(selected_features.size() > 0);
|
||||
|
||||
// Features 0 and 2 should be among the top selected
|
||||
bool has_feature_0 = std::find(selected_features.begin(), selected_features.end(), 0) != selected_features.end();
|
||||
bool has_feature_2 = std::find(selected_features.begin(), selected_features.end(), 2) != selected_features.end();
|
||||
REQUIRE((has_feature_0 || has_feature_2));
|
||||
|
||||
delete l1fs;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user