Add L1FS feature selection
This commit is contained in:
@@ -14,10 +14,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
- Fix CFS metric expression in the FeatureSelection class.
|
- Fix CFS metric expression in the FeatureSelection class.
|
||||||
- Fix the vcpkg configuration in building the library.
|
- Fix the vcpkg configuration in building the library.
|
||||||
- Fix the sample app to use the vcpkg configuration.
|
- Fix the sample app to use the vcpkg configuration.
|
||||||
- Add predict_proba method to all Ld classifiers.
|
|
||||||
- Refactor the computeCPT method in the Node class with libtorch vectorized operations.
|
- Refactor the computeCPT method in the Node class with libtorch vectorized operations.
|
||||||
- Refactor the sample to use local discretization models.
|
- Refactor the sample to use local discretization models.
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Add predict_proba method to all Ld classifiers.
|
||||||
|
- Add L1FS feature selection methods to the FeatureSelection class.
|
||||||
|
|
||||||
## [1.1.0] - 2025-04-27
|
## [1.1.0] - 2025-04-27
|
||||||
|
|
||||||
### Internal
|
### Internal
|
||||||
|
279
bayesnet/feature_selection/L1FS.cc
Normal file
279
bayesnet/feature_selection/L1FS.cc
Normal file
@@ -0,0 +1,279 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
#include <numeric>
|
||||||
|
#include "bayesnet/utils/bayesnetUtils.h"
|
||||||
|
#include "L1FS.h"
|
||||||
|
|
||||||
|
namespace bayesnet {
|
||||||
|
using namespace torch::indexing;
|
||||||
|
|
||||||
|
L1FS::L1FS(const torch::Tensor& samples,
|
||||||
|
const std::vector<std::string>& features,
|
||||||
|
const std::string& className,
|
||||||
|
const int maxFeatures,
|
||||||
|
const int classNumStates,
|
||||||
|
const torch::Tensor& weights,
|
||||||
|
const double alpha,
|
||||||
|
const int maxIter,
|
||||||
|
const double tolerance,
|
||||||
|
const bool fitIntercept)
|
||||||
|
: FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights),
|
||||||
|
alpha(alpha), maxIter(maxIter), tolerance(tolerance), fitIntercept(fitIntercept)
|
||||||
|
{
|
||||||
|
if (alpha < 0) {
|
||||||
|
throw std::invalid_argument("Alpha (regularization strength) must be non-negative");
|
||||||
|
}
|
||||||
|
if (maxIter < 1) {
|
||||||
|
throw std::invalid_argument("Maximum iterations must be positive");
|
||||||
|
}
|
||||||
|
if (tolerance <= 0) {
|
||||||
|
throw std::invalid_argument("Tolerance must be positive");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine if this is a regression or classification task
|
||||||
|
// For simplicity, assume binary classification if classNumStates == 2
|
||||||
|
// and regression otherwise (this can be refined based on your needs)
|
||||||
|
isRegression = (classNumStates > 2 || classNumStates == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void L1FS::fit()
|
||||||
|
{
|
||||||
|
initialize();
|
||||||
|
|
||||||
|
// Prepare data
|
||||||
|
int n_samples = samples.size(1);
|
||||||
|
int n_features = features.size();
|
||||||
|
|
||||||
|
// Extract features (all rows except last)
|
||||||
|
auto X = samples.index({ Slice(0, n_features), Slice() }).t().contiguous();
|
||||||
|
|
||||||
|
// Extract labels (last row)
|
||||||
|
auto y = samples.index({ -1, Slice() }).contiguous();
|
||||||
|
|
||||||
|
// Convert to float for numerical operations
|
||||||
|
X = X.to(torch::kFloat32);
|
||||||
|
y = y.to(torch::kFloat32);
|
||||||
|
|
||||||
|
// Normalize features for better convergence
|
||||||
|
auto X_mean = X.mean(0);
|
||||||
|
auto X_std = X.std(0);
|
||||||
|
X_std = torch::where(X_std == 0, torch::ones_like(X_std), X_std);
|
||||||
|
X = (X - X_mean) / X_std;
|
||||||
|
|
||||||
|
if (isRegression) {
|
||||||
|
// Normalize y for regression
|
||||||
|
auto y_mean = y.mean();
|
||||||
|
auto y_std = y.std();
|
||||||
|
if (y_std.item<double>() > 0) {
|
||||||
|
y = (y - y_mean) / y_std;
|
||||||
|
}
|
||||||
|
fitLasso(X, y, weights);
|
||||||
|
} else {
|
||||||
|
// For binary classification
|
||||||
|
fitL1Logistic(X, y, weights);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Select features based on non-zero coefficients
|
||||||
|
std::vector<std::pair<int, double>> featureImportance;
|
||||||
|
for (int i = 0; i < n_features; ++i) {
|
||||||
|
double coef_magnitude = std::abs(coefficients[i]);
|
||||||
|
if (coef_magnitude > 1e-10) { // Threshold for numerical zero
|
||||||
|
featureImportance.push_back({ i, coef_magnitude });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If all coefficients are zero (high regularization), select based on original feature-class correlation
|
||||||
|
if (featureImportance.empty() && maxFeatures > 0) {
|
||||||
|
// Compute SU with labels as fallback
|
||||||
|
computeSuLabels();
|
||||||
|
auto featureOrder = argsort(suLabels);
|
||||||
|
|
||||||
|
// Select top features by SU score
|
||||||
|
int numToSelect = std::min(static_cast<int>(featureOrder.size()),
|
||||||
|
std::min(maxFeatures, 3)); // At most 3 features as fallback
|
||||||
|
|
||||||
|
for (int i = 0; i < numToSelect; ++i) {
|
||||||
|
selectedFeatures.push_back(featureOrder[i]);
|
||||||
|
selectedScores.push_back(suLabels[featureOrder[i]]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Sort by importance (absolute coefficient value)
|
||||||
|
std::sort(featureImportance.begin(), featureImportance.end(),
|
||||||
|
[](const auto& a, const auto& b) { return a.second > b.second; });
|
||||||
|
|
||||||
|
// Select top features up to maxFeatures
|
||||||
|
int numToSelect = std::min(static_cast<int>(featureImportance.size()),
|
||||||
|
maxFeatures);
|
||||||
|
|
||||||
|
for (int i = 0; i < numToSelect; ++i) {
|
||||||
|
selectedFeatures.push_back(featureImportance[i].first);
|
||||||
|
selectedScores.push_back(featureImportance[i].second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fitted = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void L1FS::fitLasso(const torch::Tensor& X, const torch::Tensor& y,
|
||||||
|
const torch::Tensor& sampleWeights)
|
||||||
|
{
|
||||||
|
int n_samples = X.size(0);
|
||||||
|
int n_features = X.size(1);
|
||||||
|
|
||||||
|
// Initialize coefficients
|
||||||
|
coefficients.resize(n_features, 0.0);
|
||||||
|
double intercept = 0.0;
|
||||||
|
|
||||||
|
// Ensure consistent types
|
||||||
|
torch::Tensor weights = sampleWeights.to(torch::kFloat32);
|
||||||
|
|
||||||
|
// Coordinate descent for Lasso
|
||||||
|
torch::Tensor residuals = y.clone();
|
||||||
|
if (fitIntercept) {
|
||||||
|
intercept = (y * weights).sum().item<float>() / weights.sum().item<float>();
|
||||||
|
residuals = y - intercept;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Precompute feature norms
|
||||||
|
std::vector<double> featureNorms(n_features);
|
||||||
|
for (int j = 0; j < n_features; ++j) {
|
||||||
|
auto Xj = X.index({ Slice(), j });
|
||||||
|
featureNorms[j] = (Xj * Xj * weights).sum().item<float>();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Coordinate descent iterations
|
||||||
|
for (int iter = 0; iter < maxIter; ++iter) {
|
||||||
|
double maxChange = 0.0;
|
||||||
|
|
||||||
|
// Update each coordinate
|
||||||
|
for (int j = 0; j < n_features; ++j) {
|
||||||
|
auto Xj = X.index({ Slice(), j });
|
||||||
|
|
||||||
|
// Compute partial residuals (excluding feature j)
|
||||||
|
torch::Tensor partialResiduals = residuals + coefficients[j] * Xj;
|
||||||
|
|
||||||
|
// Compute rho (correlation with residuals)
|
||||||
|
double rho = (Xj * partialResiduals * weights).sum().item<float>();
|
||||||
|
|
||||||
|
// Soft thresholding
|
||||||
|
double oldCoef = coefficients[j];
|
||||||
|
coefficients[j] = softThreshold(rho, alpha) / featureNorms[j];
|
||||||
|
|
||||||
|
// Update residuals
|
||||||
|
residuals = partialResiduals - coefficients[j] * Xj;
|
||||||
|
|
||||||
|
maxChange = std::max(maxChange, std::abs(coefficients[j] - oldCoef));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update intercept if needed
|
||||||
|
if (fitIntercept) {
|
||||||
|
double oldIntercept = intercept;
|
||||||
|
intercept = (residuals * weights).sum().item<float>() /
|
||||||
|
weights.sum().item<float>();
|
||||||
|
residuals = residuals - (intercept - oldIntercept);
|
||||||
|
maxChange = std::max(maxChange, std::abs(intercept - oldIntercept));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check convergence
|
||||||
|
if (maxChange < tolerance) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void L1FS::fitL1Logistic(const torch::Tensor& X, const torch::Tensor& y,
|
||||||
|
const torch::Tensor& sampleWeights)
|
||||||
|
{
|
||||||
|
int n_samples = X.size(0);
|
||||||
|
int n_features = X.size(1);
|
||||||
|
|
||||||
|
// Initialize coefficients
|
||||||
|
torch::Tensor coef = torch::zeros({ n_features }, torch::kFloat32);
|
||||||
|
double intercept = 0.0;
|
||||||
|
|
||||||
|
// Ensure consistent types
|
||||||
|
torch::Tensor weights = sampleWeights.to(torch::kFloat32);
|
||||||
|
|
||||||
|
// Learning rate (can be adaptive)
|
||||||
|
double learningRate = 0.01;
|
||||||
|
|
||||||
|
// Proximal gradient descent
|
||||||
|
for (int iter = 0; iter < maxIter; ++iter) {
|
||||||
|
// Compute predictions
|
||||||
|
torch::Tensor linearPred = X.matmul(coef);
|
||||||
|
if (fitIntercept) {
|
||||||
|
linearPred = linearPred + intercept;
|
||||||
|
}
|
||||||
|
torch::Tensor pred = sigmoid(linearPred);
|
||||||
|
|
||||||
|
// Compute gradient
|
||||||
|
torch::Tensor diff = pred - y;
|
||||||
|
torch::Tensor grad = X.t().matmul(diff * weights) / n_samples;
|
||||||
|
|
||||||
|
// Gradient descent step
|
||||||
|
torch::Tensor coef_new = coef - learningRate * grad;
|
||||||
|
|
||||||
|
// Proximal step (soft thresholding)
|
||||||
|
for (int j = 0; j < n_features; ++j) {
|
||||||
|
coef_new[j] = softThreshold(coef_new[j].item<float>(),
|
||||||
|
learningRate * alpha);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update intercept if needed
|
||||||
|
if (fitIntercept) {
|
||||||
|
double grad_intercept = (diff * weights).sum().item<float>() / n_samples;
|
||||||
|
intercept -= learningRate * grad_intercept;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check convergence
|
||||||
|
double change = (coef_new - coef).abs().max().item<float>();
|
||||||
|
coef = coef_new;
|
||||||
|
|
||||||
|
if (change < tolerance) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adaptive learning rate (optional)
|
||||||
|
if (iter % 100 == 0) {
|
||||||
|
learningRate *= 0.9;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store final coefficients
|
||||||
|
coefficients.resize(n_features);
|
||||||
|
for (int j = 0; j < n_features; ++j) {
|
||||||
|
coefficients[j] = coef[j].item<float>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double L1FS::softThreshold(double x, double lambda) const
|
||||||
|
{
|
||||||
|
if (x > lambda) {
|
||||||
|
return x - lambda;
|
||||||
|
} else if (x < -lambda) {
|
||||||
|
return x + lambda;
|
||||||
|
} else {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
torch::Tensor L1FS::sigmoid(const torch::Tensor& z) const
|
||||||
|
{
|
||||||
|
return 1.0 / (1.0 + torch::exp(-z));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<double> L1FS::getCoefficients() const
|
||||||
|
{
|
||||||
|
if (!fitted) {
|
||||||
|
throw std::runtime_error("L1FS not fitted");
|
||||||
|
}
|
||||||
|
return coefficients;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace bayesnet
|
83
bayesnet/feature_selection/L1FS.h
Normal file
83
bayesnet/feature_selection/L1FS.h
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
// ***************************************************************
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
|
||||||
|
// SPDX-FileType: SOURCE
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
// ***************************************************************
|
||||||
|
|
||||||
|
#ifndef L1FS_H
|
||||||
|
#define L1FS_H
|
||||||
|
#include <torch/torch.h>
|
||||||
|
#include <vector>
|
||||||
|
#include "bayesnet/feature_selection/FeatureSelect.h"
|
||||||
|
|
||||||
|
namespace bayesnet {
|
||||||
|
/**
|
||||||
|
* L1-Regularized Feature Selection (L1FS)
|
||||||
|
*
|
||||||
|
* This class implements feature selection using L1-regularized linear models.
|
||||||
|
* For classification tasks, it uses one-vs-rest logistic regression with L1 penalty.
|
||||||
|
* For regression tasks, it uses Lasso regression.
|
||||||
|
*
|
||||||
|
* The L1 penalty induces sparsity in the model coefficients, effectively
|
||||||
|
* performing feature selection by setting irrelevant feature weights to zero.
|
||||||
|
*/
|
||||||
|
class L1FS : public FeatureSelect {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Constructor for L1FS
|
||||||
|
* @param samples n+1xm tensor where samples[-1] is the target variable
|
||||||
|
* @param features vector of feature names
|
||||||
|
* @param className name of the class/target variable
|
||||||
|
* @param maxFeatures maximum number of features to select (0 = all)
|
||||||
|
* @param classNumStates number of states for classification (ignored for regression)
|
||||||
|
* @param weights sample weights
|
||||||
|
* @param alpha L1 regularization strength (higher = more sparsity)
|
||||||
|
* @param maxIter maximum iterations for optimization
|
||||||
|
* @param tolerance convergence tolerance
|
||||||
|
* @param fitIntercept whether to fit an intercept term
|
||||||
|
*/
|
||||||
|
L1FS(const torch::Tensor& samples,
|
||||||
|
const std::vector<std::string>& features,
|
||||||
|
const std::string& className,
|
||||||
|
const int maxFeatures,
|
||||||
|
const int classNumStates,
|
||||||
|
const torch::Tensor& weights,
|
||||||
|
const double alpha = 1.0,
|
||||||
|
const int maxIter = 1000,
|
||||||
|
const double tolerance = 1e-4,
|
||||||
|
const bool fitIntercept = true);
|
||||||
|
|
||||||
|
virtual ~L1FS() {};
|
||||||
|
|
||||||
|
void fit() override;
|
||||||
|
|
||||||
|
// Get the learned coefficients for each feature
|
||||||
|
std::vector<double> getCoefficients() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
double alpha; // L1 regularization strength
|
||||||
|
int maxIter; // Maximum iterations for optimization
|
||||||
|
double tolerance; // Convergence tolerance
|
||||||
|
bool fitIntercept; // Whether to fit intercept
|
||||||
|
bool isRegression; // Task type (regression vs classification)
|
||||||
|
|
||||||
|
std::vector<double> coefficients; // Learned coefficients
|
||||||
|
|
||||||
|
// Coordinate descent for Lasso regression
|
||||||
|
void fitLasso(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& sampleWeights);
|
||||||
|
|
||||||
|
// Proximal gradient descent for L1-regularized logistic regression
|
||||||
|
void fitL1Logistic(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& sampleWeights);
|
||||||
|
|
||||||
|
// Soft thresholding operator for L1 regularization
|
||||||
|
double softThreshold(double x, double lambda) const;
|
||||||
|
|
||||||
|
// Logistic function
|
||||||
|
torch::Tensor sigmoid(const torch::Tensor& z) const;
|
||||||
|
|
||||||
|
// Compute logistic loss
|
||||||
|
double logisticLoss(const torch::Tensor& X, const torch::Tensor& y,
|
||||||
|
const torch::Tensor& coef, const torch::Tensor& sampleWeights) const;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
#endif
|
@@ -20,7 +20,7 @@
|
|||||||
#include "bayesnet/ensembles/AODELd.h"
|
#include "bayesnet/ensembles/AODELd.h"
|
||||||
#include "bayesnet/ensembles/BoostAODE.h"
|
#include "bayesnet/ensembles/BoostAODE.h"
|
||||||
|
|
||||||
const std::string ACTUAL_VERSION = "1.1.1";
|
const std::string ACTUAL_VERSION = "1.1.2";
|
||||||
|
|
||||||
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
|
||||||
{
|
{
|
||||||
|
@@ -12,6 +12,7 @@
|
|||||||
#include "bayesnet/feature_selection/CFS.h"
|
#include "bayesnet/feature_selection/CFS.h"
|
||||||
#include "bayesnet/feature_selection/FCBF.h"
|
#include "bayesnet/feature_selection/FCBF.h"
|
||||||
#include "bayesnet/feature_selection/IWSS.h"
|
#include "bayesnet/feature_selection/IWSS.h"
|
||||||
|
#include "bayesnet/feature_selection/L1FS.h"
|
||||||
#include "TestUtils.h"
|
#include "TestUtils.h"
|
||||||
|
|
||||||
bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold, int max_features = 0)
|
bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold, int max_features = 0)
|
||||||
@@ -23,14 +24,16 @@ bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector,
|
|||||||
return new bayesnet::FCBF(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
return new bayesnet::FCBF(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
||||||
} else if (selector == "IWSS") {
|
} else if (selector == "IWSS") {
|
||||||
return new bayesnet::IWSS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
return new bayesnet::IWSS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
||||||
|
} else if (selector == "L1FS") {
|
||||||
|
// For L1FS, threshold is used as alpha parameter
|
||||||
|
return new bayesnet::L1FS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Features Selected", "[FeatureSelection]")
|
TEST_CASE("Features Selected", "[FeatureSelection]")
|
||||||
{
|
{
|
||||||
// std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
|
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
|
||||||
std::string file_name = GENERATE("ecoli");
|
|
||||||
|
|
||||||
auto raw = RawDatasets(file_name, true);
|
auto raw = RawDatasets(file_name, true);
|
||||||
|
|
||||||
@@ -48,14 +51,19 @@ TEST_CASE("Features Selected", "[FeatureSelection]")
|
|||||||
{ {"glass", "FCBF" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.304911, 0.302109, 0.281621, 0.253297} } },
|
{ {"glass", "FCBF" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.304911, 0.302109, 0.281621, 0.253297} } },
|
||||||
{ {"iris", "FCBF"}, {{ 3, 2 }, {0.870521, 0.816401} }},
|
{ {"iris", "FCBF"}, {{ 3, 2 }, {0.870521, 0.816401} }},
|
||||||
{ {"ecoli", "FCBF"}, {{ 5, 0, 1, 4, 2 }, {0.512319, 0.350406, 0.260905, 0.203132, 0.11229} }},
|
{ {"ecoli", "FCBF"}, {{ 5, 0, 1, 4, 2 }, {0.512319, 0.350406, 0.260905, 0.203132, 0.11229} }},
|
||||||
{ {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }}
|
{ {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }},
|
||||||
|
{ {"glass", "L1FS" }, { { 2, 3, 5}, { 0.365513, 0.304911, 0.302109 } } },
|
||||||
|
{ {"iris", "L1FS"}, {{ 3, 2, 1, 0 }, { 0.570928, 0.37569, 0.0774792, 0.00835904 }}},
|
||||||
|
{ {"ecoli", "L1FS"}, {{ 0, 1, 6, 5, 2, 3 }, {0.490179, 0.365944, 0.291177, 0.199171, 0.0400928, 0.0192575} }},
|
||||||
|
{ {"diabetes", "L1FS"}, {{ 1, 5, 4 }, {0.132858, 0.083191, 0.0486187} }}
|
||||||
};
|
};
|
||||||
double threshold;
|
double threshold;
|
||||||
std::string selector;
|
std::string selector;
|
||||||
std::vector<std::pair<std::string, double>> selectors = {
|
std::vector<std::pair<std::string, double>> selectors = {
|
||||||
{ "CFS", 0.0 },
|
{ "CFS", 0.0 },
|
||||||
{ "IWSS", 0.1 },
|
{ "IWSS", 0.1 },
|
||||||
{ "FCBF", 1e-7 }
|
{ "FCBF", 1e-7 },
|
||||||
|
{ "L1FS", 0.01 }
|
||||||
};
|
};
|
||||||
for (const auto item : selectors) {
|
for (const auto item : selectors) {
|
||||||
selector = item.first; threshold = item.second;
|
selector = item.first; threshold = item.second;
|
||||||
@@ -77,17 +85,144 @@ TEST_CASE("Features Selected", "[FeatureSelection]")
|
|||||||
delete featureSelector;
|
delete featureSelector;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
SECTION("Test L1FS")
|
||||||
|
{
|
||||||
|
bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
|
||||||
|
raw.dataset, raw.features, raw.className,
|
||||||
|
raw.features.size(), raw.classNumStates, raw.weights,
|
||||||
|
0.01, 1000, 1e-4, true
|
||||||
|
);
|
||||||
|
featureSelector->fit();
|
||||||
|
|
||||||
|
std::vector<int> selected_features = featureSelector->getFeatures();
|
||||||
|
std::vector<double> selected_scores = featureSelector->getScores();
|
||||||
|
|
||||||
|
// Check if features are selected
|
||||||
|
REQUIRE(selected_features.size() > 0);
|
||||||
|
REQUIRE(selected_scores.size() == selected_features.size());
|
||||||
|
|
||||||
|
// Scores should be non-negative (absolute coefficient values)
|
||||||
|
for (double score : selected_scores) {
|
||||||
|
REQUIRE(score >= 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scores should be in descending order
|
||||||
|
// std::cout << file_name << " " << selected_features << std::endl << "{";
|
||||||
|
for (size_t i = 1; i < selected_scores.size(); i++) {
|
||||||
|
// std::cout << selected_scores[i - 1] << ", ";
|
||||||
|
REQUIRE(selected_scores[i - 1] >= selected_scores[i]);
|
||||||
|
}
|
||||||
|
// std::cout << selected_scores[selected_scores.size() - 1];
|
||||||
|
// std::cout << "}" << std::endl;
|
||||||
|
delete featureSelector;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("L1FS Features Selected", "[FeatureSelection]")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("ecoli", true);
|
||||||
|
|
||||||
|
SECTION("Test L1FS with different alpha values")
|
||||||
|
{
|
||||||
|
std::vector<double> alphas = { 0.01, 0.1, 0.5 };
|
||||||
|
|
||||||
|
for (double alpha : alphas) {
|
||||||
|
bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
|
||||||
|
raw.dataset, raw.features, raw.className,
|
||||||
|
raw.features.size(), raw.classNumStates, raw.weights,
|
||||||
|
alpha, 1000, 1e-4, true
|
||||||
|
);
|
||||||
|
featureSelector->fit();
|
||||||
|
|
||||||
|
INFO("Alpha: " << alpha);
|
||||||
|
|
||||||
|
std::vector<int> selected_features = featureSelector->getFeatures();
|
||||||
|
std::vector<double> selected_scores = featureSelector->getScores();
|
||||||
|
|
||||||
|
// Higher alpha should lead to fewer features
|
||||||
|
REQUIRE(selected_features.size() > 0);
|
||||||
|
REQUIRE(selected_features.size() <= raw.features.size());
|
||||||
|
REQUIRE(selected_scores.size() == selected_features.size());
|
||||||
|
|
||||||
|
// Scores should be non-negative (absolute coefficient values)
|
||||||
|
for (double score : selected_scores) {
|
||||||
|
REQUIRE(score >= 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scores should be in descending order
|
||||||
|
for (size_t i = 1; i < selected_scores.size(); i++) {
|
||||||
|
REQUIRE(selected_scores[i - 1] >= selected_scores[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete featureSelector;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SECTION("Test L1FS with max features limit")
|
||||||
|
{
|
||||||
|
int max_features = 2;
|
||||||
|
bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
|
||||||
|
raw.dataset, raw.features, raw.className,
|
||||||
|
max_features, raw.classNumStates, raw.weights,
|
||||||
|
0.1, 1000, 1e-4, true
|
||||||
|
);
|
||||||
|
featureSelector->fit();
|
||||||
|
|
||||||
|
std::vector<int> selected_features = featureSelector->getFeatures();
|
||||||
|
REQUIRE(selected_features.size() <= max_features);
|
||||||
|
|
||||||
|
delete featureSelector;
|
||||||
|
}
|
||||||
|
|
||||||
|
SECTION("Test L1FS getCoefficients method")
|
||||||
|
{
|
||||||
|
bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
|
||||||
|
raw.dataset, raw.features, raw.className,
|
||||||
|
raw.features.size(), raw.classNumStates, raw.weights,
|
||||||
|
0.1, 1000, 1e-4, true
|
||||||
|
);
|
||||||
|
|
||||||
|
// Should throw before fitting
|
||||||
|
REQUIRE_THROWS_AS(featureSelector->getCoefficients(), std::runtime_error);
|
||||||
|
REQUIRE_THROWS_WITH(featureSelector->getCoefficients(), "L1FS not fitted");
|
||||||
|
|
||||||
|
featureSelector->fit();
|
||||||
|
|
||||||
|
// Should work after fitting
|
||||||
|
auto coefficients = featureSelector->getCoefficients();
|
||||||
|
REQUIRE(coefficients.size() == raw.features.size());
|
||||||
|
|
||||||
|
delete featureSelector;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE("Oddities", "[FeatureSelection]")
|
TEST_CASE("Oddities", "[FeatureSelection]")
|
||||||
{
|
{
|
||||||
auto raw = RawDatasets("iris", true);
|
auto raw = RawDatasets("iris", true);
|
||||||
|
|
||||||
// FCBF Limits
|
// FCBF Limits
|
||||||
REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
|
REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
|
||||||
REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
|
REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
|
||||||
|
|
||||||
|
// IWSS Limits
|
||||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
|
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
|
||||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
|
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
|
||||||
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
|
REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
|
||||||
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
|
REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
|
||||||
|
|
||||||
|
// L1FS Limits
|
||||||
|
REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -0.1), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -0.1), "Alpha (regularization strength) must be non-negative");
|
||||||
|
|
||||||
|
REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 0), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 0), "Maximum iterations must be positive");
|
||||||
|
|
||||||
|
REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, 0.0), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, 0.0), "Tolerance must be positive");
|
||||||
|
|
||||||
|
REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, -1e-4), std::invalid_argument);
|
||||||
|
REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, -1e-4), "Tolerance must be positive");
|
||||||
|
|
||||||
// Not fitted error
|
// Not fitted error
|
||||||
auto selector = build_selector(raw, "CFS", 0);
|
auto selector = build_selector(raw, "CFS", 0);
|
||||||
const std::string message = "FeatureSelect not fitted";
|
const std::string message = "FeatureSelect not fitted";
|
||||||
@@ -97,6 +232,7 @@ TEST_CASE("Oddities", "[FeatureSelection]")
|
|||||||
REQUIRE_THROWS_WITH(selector->getScores(), message);
|
REQUIRE_THROWS_WITH(selector->getScores(), message);
|
||||||
delete selector;
|
delete selector;
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Test threshold limits", "[FeatureSelection]")
|
TEST_CASE("Test threshold limits", "[FeatureSelection]")
|
||||||
{
|
{
|
||||||
auto raw = RawDatasets("diabetes", true);
|
auto raw = RawDatasets("diabetes", true);
|
||||||
@@ -113,4 +249,77 @@ TEST_CASE("Test threshold limits", "[FeatureSelection]")
|
|||||||
selector->fit();
|
selector->fit();
|
||||||
REQUIRE(selector->getFeatures().size() == 5);
|
REQUIRE(selector->getFeatures().size() == 5);
|
||||||
delete selector;
|
delete selector;
|
||||||
|
|
||||||
|
// L1FS with different alpha values
|
||||||
|
selector = build_selector(raw, "L1FS", 0.01); // Low alpha - more features
|
||||||
|
selector->fit();
|
||||||
|
int num_features_low_alpha = selector->getFeatures().size();
|
||||||
|
delete selector;
|
||||||
|
|
||||||
|
selector = build_selector(raw, "L1FS", 0.9); // High alpha - fewer features
|
||||||
|
selector->fit();
|
||||||
|
int num_features_high_alpha = selector->getFeatures().size();
|
||||||
|
REQUIRE(num_features_high_alpha <= num_features_low_alpha);
|
||||||
|
delete selector;
|
||||||
|
|
||||||
|
// L1FS with max features limit
|
||||||
|
selector = build_selector(raw, "L1FS", 0.01, 4);
|
||||||
|
selector->fit();
|
||||||
|
REQUIRE(selector->getFeatures().size() <= 4);
|
||||||
|
delete selector;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("L1FS Regression vs Classification", "[FeatureSelection]")
|
||||||
|
{
|
||||||
|
SECTION("Regression Task")
|
||||||
|
{
|
||||||
|
auto raw = RawDatasets("diabetes", true);
|
||||||
|
// diabetes dataset should be treated as regression (classNumStates > 2)
|
||||||
|
bayesnet::L1FS* l1fs = new bayesnet::L1FS(
|
||||||
|
raw.dataset, raw.features, raw.className,
|
||||||
|
raw.features.size(), raw.classNumStates, raw.weights,
|
||||||
|
0.1, 1000, 1e-4, true
|
||||||
|
);
|
||||||
|
l1fs->fit();
|
||||||
|
|
||||||
|
auto features = l1fs->getFeatures();
|
||||||
|
REQUIRE(features.size() > 0);
|
||||||
|
|
||||||
|
delete l1fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
SECTION("Binary Classification Task")
|
||||||
|
{
|
||||||
|
// Create a simple binary classification dataset
|
||||||
|
int n_samples = 100;
|
||||||
|
int n_features = 5;
|
||||||
|
|
||||||
|
torch::Tensor X = torch::randn({ n_features, n_samples });
|
||||||
|
torch::Tensor y = (X[0] + X[2] > 0).to(torch::kFloat32);
|
||||||
|
torch::Tensor samples = torch::cat({ X, y.unsqueeze(0) }, 0);
|
||||||
|
|
||||||
|
std::vector<std::string> features;
|
||||||
|
for (int i = 0; i < n_features; ++i) {
|
||||||
|
features.push_back("feature_" + std::to_string(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
torch::Tensor weights = torch::ones({ n_samples });
|
||||||
|
|
||||||
|
bayesnet::L1FS* l1fs = new bayesnet::L1FS(
|
||||||
|
samples, features, "target",
|
||||||
|
n_features, 2, weights, // 2 states = binary classification
|
||||||
|
0.1, 1000, 1e-4, true
|
||||||
|
);
|
||||||
|
l1fs->fit();
|
||||||
|
|
||||||
|
auto selected_features = l1fs->getFeatures();
|
||||||
|
REQUIRE(selected_features.size() > 0);
|
||||||
|
|
||||||
|
// Features 0 and 2 should be among the top selected
|
||||||
|
bool has_feature_0 = std::find(selected_features.begin(), selected_features.end(), 0) != selected_features.end();
|
||||||
|
bool has_feature_2 = std::find(selected_features.begin(), selected_features.end(), 2) != selected_features.end();
|
||||||
|
REQUIRE((has_feature_0 || has_feature_2));
|
||||||
|
|
||||||
|
delete l1fs;
|
||||||
|
}
|
||||||
}
|
}
|
Reference in New Issue
Block a user