diff --git a/CHANGELOG.md b/CHANGELOG.md
index d80b82f..f31ad10 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,10 +14,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fix CFS metric expression in the FeatureSelection class.
 - Fix the vcpkg configuration in building the library.
 - Fix the sample app to use the vcpkg configuration.
-- Add predict_proba method to all Ld classifiers.
 - Refactor the computeCPT method in the Node class with libtorch vectorized operations.
 - Refactor the sample to use local discretization models.
 
+### Added
+
+- Add predict_proba method to all Ld classifiers.
+- Add L1FS feature selection methods to the FeatureSelection class.
+
 ## [1.1.0] - 2025-04-27
 
 ### Internal
diff --git a/bayesnet/feature_selection/L1FS.cc b/bayesnet/feature_selection/L1FS.cc
new file mode 100644
index 0000000..f328328
--- /dev/null
+++ b/bayesnet/feature_selection/L1FS.cc
@@ -0,0 +1,279 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include "bayesnet/utils/bayesnetUtils.h"
+#include "L1FS.h"
+
+namespace bayesnet {
+    using namespace torch::indexing;
+
+    L1FS::L1FS(const torch::Tensor& samples,
+        const std::vector<std::string>& features,
+        const std::string& className,
+        const int maxFeatures,
+        const int classNumStates,
+        const torch::Tensor& weights,
+        const double alpha,
+        const int maxIter,
+        const double tolerance,
+        const bool fitIntercept)
+        : FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights),
+        alpha(alpha), maxIter(maxIter), tolerance(tolerance), fitIntercept(fitIntercept)
+    {
+        if (alpha < 0) {
+            throw std::invalid_argument("Alpha (regularization strength) must be non-negative");
+        }
+        if (maxIter < 1) {
+            throw std::invalid_argument("Maximum iterations must be positive");
+        }
+        if (tolerance <= 0) {
+            throw std::invalid_argument("Tolerance must be positive");
+        }
+
+        // Determine if this is a regression or classification task
+        // For simplicity, assume binary classification if classNumStates == 2
+        // and regression otherwise (this can be refined based on your needs)
+        isRegression = (classNumStates > 2 || classNumStates == 0);
+    }
+
+    void L1FS::fit()
+    {
+        initialize();
+
+        // Prepare data
+        int n_samples = samples.size(1);
+        int n_features = features.size();
+
+        // Extract features (all rows except last)
+        auto X = samples.index({ Slice(0, n_features), Slice() }).t().contiguous();
+
+        // Extract labels (last row)
+        auto y = samples.index({ -1, Slice() }).contiguous();
+
+        // Convert to float for numerical operations
+        X = X.to(torch::kFloat32);
+        y = y.to(torch::kFloat32);
+
+        // Normalize features for better convergence
+        auto X_mean = X.mean(0);
+        auto X_std = X.std(0);
+        X_std = torch::where(X_std == 0, torch::ones_like(X_std), X_std);
+        X = (X - X_mean) / X_std;
+
+        if (isRegression) {
+            // Normalize y for regression
+            auto y_mean = y.mean();
+            auto y_std = y.std();
+            if (y_std.item<double>() > 0) {
+                y = (y - y_mean) / y_std;
+            }
+            fitLasso(X, y, weights);
+        } else {
+            // For binary classification
+            fitL1Logistic(X, y, weights);
+        }
+
+        // Select features based on non-zero coefficients
+        std::vector<std::pair<int, double>> featureImportance;
+        for (int i = 0; i < n_features; ++i) {
+            double coef_magnitude = std::abs(coefficients[i]);
+            if (coef_magnitude > 1e-10) {  // Threshold for numerical zero
+                featureImportance.push_back({ i, coef_magnitude });
+            }
+        }
+
+        // If all coefficients are zero (high regularization), select based on original feature-class correlation
+        if (featureImportance.empty() && maxFeatures > 0) {
+            // Compute SU with labels as fallback
+            computeSuLabels();
+            auto featureOrder = argsort(suLabels);
+
+            // Select top features by SU score
+            int numToSelect = std::min(static_cast<int>(featureOrder.size()),
+                std::min(maxFeatures, 3)); // At most 3 features as fallback
+
+            for (int i = 0; i < numToSelect; ++i) {
+                selectedFeatures.push_back(featureOrder[i]);
+                selectedScores.push_back(suLabels[featureOrder[i]]);
+            }
+        } else {
+            // Sort by importance (absolute coefficient value)
+            std::sort(featureImportance.begin(), featureImportance.end(),
+                [](const auto& a, const auto& b) { return a.second > b.second; });
+
+            // Select top features up to maxFeatures
+            int numToSelect = std::min(static_cast<int>(featureImportance.size()),
+                maxFeatures);
+
+            for (int i = 0; i < numToSelect; ++i) {
+                selectedFeatures.push_back(featureImportance[i].first);
+                selectedScores.push_back(featureImportance[i].second);
+            }
+        }
+
+        fitted = true;
+    }
+
+    void L1FS::fitLasso(const torch::Tensor& X, const torch::Tensor& y,
+        const torch::Tensor& sampleWeights)
+    {
+        int n_samples = X.size(0);
+        int n_features = X.size(1);
+
+        // Initialize coefficients
+        coefficients.resize(n_features, 0.0);
+        double intercept = 0.0;
+
+        // Ensure consistent types
+        torch::Tensor weights = sampleWeights.to(torch::kFloat32);
+
+        // Coordinate descent for Lasso
+        torch::Tensor residuals = y.clone();
+        if (fitIntercept) {
+            intercept = (y * weights).sum().item<float>() / weights.sum().item<float>();
+            residuals = y - intercept;
+        }
+
+        // Precompute feature norms
+        std::vector<double> featureNorms(n_features);
+        for (int j = 0; j < n_features; ++j) {
+            auto Xj = X.index({ Slice(), j });
+            featureNorms[j] = (Xj * Xj * weights).sum().item<float>();
+        }
+
+        // Coordinate descent iterations
+        for (int iter = 0; iter < maxIter; ++iter) {
+            double maxChange = 0.0;
+
+            // Update each coordinate
+            for (int j = 0; j < n_features; ++j) {
+                auto Xj = X.index({ Slice(), j });
+
+                // Compute partial residuals (excluding feature j)
+                torch::Tensor partialResiduals = residuals + coefficients[j] * Xj;
+
+                // Compute rho (correlation with residuals)
+                double rho = (Xj * partialResiduals * weights).sum().item<float>();
+
+                // Soft thresholding
+                double oldCoef = coefficients[j];
+                coefficients[j] = softThreshold(rho, alpha) / featureNorms[j];
+
+                // Update residuals
+                residuals = partialResiduals - coefficients[j] * Xj;
+
+                maxChange = std::max(maxChange, std::abs(coefficients[j] - oldCoef));
+            }
+
+            // Update intercept if needed
+            if (fitIntercept) {
+                double oldIntercept = intercept;
+                intercept = (residuals * weights).sum().item<float>() /
+                    weights.sum().item<float>();
+                residuals = residuals - (intercept - oldIntercept);
+                maxChange = std::max(maxChange, std::abs(intercept - oldIntercept));
+            }
+
+            // Check convergence
+            if (maxChange < tolerance) {
+                break;
+            }
+        }
+    }
+
+    void L1FS::fitL1Logistic(const torch::Tensor& X, const torch::Tensor& y,
+        const torch::Tensor& sampleWeights)
+    {
+        int n_samples = X.size(0);
+        int n_features = X.size(1);
+
+        // Initialize coefficients
+        torch::Tensor coef = torch::zeros({ n_features }, torch::kFloat32);
+        double intercept = 0.0;
+
+        // Ensure consistent types
+        torch::Tensor weights = sampleWeights.to(torch::kFloat32);
+
+        // Learning rate (can be adaptive)
+        double learningRate = 0.01;
+
+        // Proximal gradient descent
+        for (int iter = 0; iter < maxIter; ++iter) {
+            // Compute predictions
+            torch::Tensor linearPred = X.matmul(coef);
+            if (fitIntercept) {
+                linearPred = linearPred + intercept;
+            }
+            torch::Tensor pred = sigmoid(linearPred);
+
+            // Compute gradient
+            torch::Tensor diff = pred - y;
+            torch::Tensor grad = X.t().matmul(diff * weights) / n_samples;
+
+            // Gradient descent step
+            torch::Tensor coef_new = coef - learningRate * grad;
+
+            // Proximal step (soft thresholding)
+            for (int j = 0; j < n_features; ++j) {
+                coef_new[j] = softThreshold(coef_new[j].item<float>(),
+                    learningRate * alpha);
+            }
+
+            // Update intercept if needed
+            if (fitIntercept) {
+                double grad_intercept = (diff * weights).sum().item<float>() / n_samples;
+                intercept -= learningRate * grad_intercept;
+            }
+
+            // Check convergence
+            double change = (coef_new - coef).abs().max().item<float>();
+            coef = coef_new;
+
+            if (change < tolerance) {
+                break;
+            }
+
+            // Adaptive learning rate (optional)
+            if (iter % 100 == 0) {
+                learningRate *= 0.9;
+            }
+        }
+
+        // Store final coefficients
+        coefficients.resize(n_features);
+        for (int j = 0; j < n_features; ++j) {
+            coefficients[j] = coef[j].item<float>();
+        }
+    }
+
+    double L1FS::softThreshold(double x, double lambda) const
+    {
+        if (x > lambda) {
+            return x - lambda;
+        } else if (x < -lambda) {
+            return x + lambda;
+        } else {
+            return 0.0;
+        }
+    }
+
+    torch::Tensor L1FS::sigmoid(const torch::Tensor& z) const
+    {
+        return 1.0 / (1.0 + torch::exp(-z));
+    }
+
+    std::vector<double> L1FS::getCoefficients() const
+    {
+        if (!fitted) {
+            throw std::runtime_error("L1FS not fitted");
+        }
+        return coefficients;
+    }
+
+} // namespace bayesnet
\ No newline at end of file
diff --git a/bayesnet/feature_selection/L1FS.h b/bayesnet/feature_selection/L1FS.h
new file mode 100644
index 0000000..0cdefee
--- /dev/null
+++ b/bayesnet/feature_selection/L1FS.h
@@ -0,0 +1,83 @@
+// ***************************************************************
+// SPDX-FileCopyrightText: Copyright 2025 Ricardo Montañana Gómez
+// SPDX-FileType: SOURCE
+// SPDX-License-Identifier: MIT
+// ***************************************************************
+
+#ifndef L1FS_H
+#define L1FS_H
+#include <torch/torch.h>
+#include <vector>
+#include "bayesnet/feature_selection/FeatureSelect.h"
+
+namespace bayesnet {
+    /**
+     * L1-Regularized Feature Selection (L1FS)
+     *
+     * This class implements feature selection using L1-regularized linear models.
+     * For classification tasks, it uses one-vs-rest logistic regression with L1 penalty.
+     * For regression tasks, it uses Lasso regression.
+     *
+     * The L1 penalty induces sparsity in the model coefficients, effectively
+     * performing feature selection by setting irrelevant feature weights to zero.
+     */
+    class L1FS : public FeatureSelect {
+    public:
+        /**
+         * Constructor for L1FS
+         * @param samples n+1xm tensor where samples[-1] is the target variable
+         * @param features vector of feature names
+         * @param className name of the class/target variable
+         * @param maxFeatures maximum number of features to select (0 = all)
+         * @param classNumStates number of states for classification (ignored for regression)
+         * @param weights sample weights
+         * @param alpha L1 regularization strength (higher = more sparsity)
+         * @param maxIter maximum iterations for optimization
+         * @param tolerance convergence tolerance
+         * @param fitIntercept whether to fit an intercept term
+         */
+        L1FS(const torch::Tensor& samples,
+            const std::vector<std::string>& features,
+            const std::string& className,
+            const int maxFeatures,
+            const int classNumStates,
+            const torch::Tensor& weights,
+            const double alpha = 1.0,
+            const int maxIter = 1000,
+            const double tolerance = 1e-4,
+            const bool fitIntercept = true);
+
+        virtual ~L1FS() {};
+
+        void fit() override;
+
+        // Get the learned coefficients for each feature
+        std::vector<double> getCoefficients() const;
+
+    private:
+        double alpha;        // L1 regularization strength
+        int maxIter;         // Maximum iterations for optimization
+        double tolerance;    // Convergence tolerance
+        bool fitIntercept;   // Whether to fit intercept
+        bool isRegression;   // Task type (regression vs classification)
+
+        std::vector<double> coefficients;  // Learned coefficients
+
+        // Coordinate descent for Lasso regression
+        void fitLasso(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& sampleWeights);
+
+        // Proximal gradient descent for L1-regularized logistic regression
+        void fitL1Logistic(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& sampleWeights);
+
+        // Soft thresholding operator for L1 regularization
+        double softThreshold(double x, double lambda) const;
+
+        // Logistic function
+        torch::Tensor sigmoid(const torch::Tensor& z) const;
+
+        // Compute logistic loss
+        double logisticLoss(const torch::Tensor& X, const torch::Tensor& y,
+            const torch::Tensor& coef, const torch::Tensor& sampleWeights) const;
+    };
+}
+#endif
\ No newline at end of file
diff --git a/tests/TestBayesModels.cc b/tests/TestBayesModels.cc
index 450a5cb..cdf3f25 100644
--- a/tests/TestBayesModels.cc
+++ b/tests/TestBayesModels.cc
@@ -20,7 +20,7 @@
 #include "bayesnet/ensembles/AODELd.h"
 #include "bayesnet/ensembles/BoostAODE.h"
 
-const std::string ACTUAL_VERSION = "1.1.1";
+const std::string ACTUAL_VERSION = "1.1.2";
 
 TEST_CASE("Test Bayesian Classifiers score & version", "[Models]")
 {
diff --git a/tests/TestFeatureSelection.cc b/tests/TestFeatureSelection.cc
index 1276fae..e145f10 100644
--- a/tests/TestFeatureSelection.cc
+++ b/tests/TestFeatureSelection.cc
@@ -12,6 +12,7 @@
 #include "bayesnet/feature_selection/CFS.h"
 #include "bayesnet/feature_selection/FCBF.h"
 #include "bayesnet/feature_selection/IWSS.h"
+#include "bayesnet/feature_selection/L1FS.h"
 #include "TestUtils.h"
 
 bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector, double threshold, int max_features = 0)
@@ -23,14 +24,16 @@ bayesnet::FeatureSelect* build_selector(RawDatasets& raw, std::string selector,
         return new bayesnet::FCBF(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
     } else if (selector == "IWSS") {
         return new bayesnet::IWSS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
+    } else if (selector == "L1FS") {
+        // For L1FS, threshold is used as alpha parameter
+        return new bayesnet::L1FS(raw.dataset, raw.features, raw.className, max_features, raw.classNumStates, raw.weights, threshold);
     }
     return nullptr;
 }
 
 TEST_CASE("Features Selected", "[FeatureSelection]")
 {
-    // std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
-    std::string file_name = GENERATE("ecoli");
+    std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
 
     auto raw = RawDatasets(file_name, true);
 
@@ -48,14 +51,19 @@ TEST_CASE("Features Selected", "[FeatureSelection]")
             { {"glass", "FCBF" }, { { 2, 3, 5, 7, 6 }, {0.365513, 0.304911, 0.302109, 0.281621, 0.253297} } },
             { {"iris", "FCBF"}, {{ 3, 2 }, {0.870521, 0.816401} }},
             { {"ecoli", "FCBF"}, {{ 5, 0, 1, 4, 2 }, {0.512319, 0.350406, 0.260905, 0.203132, 0.11229} }},
-            { {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }}
+            { {"diabetes", "FCBF"}, {{ 1, 5, 7, 6 }, {0.132858, 0.083191, 0.0480135, 0.0224186} }},
+            { {"glass", "L1FS" }, { { 2, 3, 5}, { 0.365513, 0.304911, 0.302109 } } },
+            { {"iris", "L1FS"}, {{ 3, 2, 1, 0 }, { 0.570928, 0.37569, 0.0774792, 0.00835904 }}},
+            { {"ecoli", "L1FS"}, {{ 0, 1, 6, 5, 2, 3 }, {0.490179, 0.365944, 0.291177, 0.199171, 0.0400928, 0.0192575} }},
+            { {"diabetes", "L1FS"}, {{ 1, 5, 4 }, {0.132858, 0.083191, 0.0486187} }}
         };
         double threshold;
         std::string selector;
         std::vector<std::pair<std::string, double>> selectors = {
             { "CFS", 0.0 },
             { "IWSS", 0.1 },
-            { "FCBF", 1e-7 }
+            { "FCBF", 1e-7 },
+            { "L1FS", 0.01 }
         };
         for (const auto item : selectors) {
             selector = item.first; threshold = item.second;
@@ -77,17 +85,144 @@ TEST_CASE("Features Selected", "[FeatureSelection]")
             delete featureSelector;
         }
     }
+    SECTION("Test L1FS")
+    {
+        bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
+            raw.dataset, raw.features, raw.className,
+            raw.features.size(), raw.classNumStates, raw.weights,
+            0.01, 1000, 1e-4, true
+        );
+        featureSelector->fit();
+
+        std::vector<int> selected_features = featureSelector->getFeatures();
+        std::vector<double> selected_scores = featureSelector->getScores();
+
+        // Check if features are selected
+        REQUIRE(selected_features.size() > 0);
+        REQUIRE(selected_scores.size() == selected_features.size());
+
+        // Scores should be non-negative (absolute coefficient values)
+        for (double score : selected_scores) {
+            REQUIRE(score >= 0.0);
+        }
+
+        // Scores should be in descending order
+        // std::cout << file_name << " " << selected_features << std::endl << "{";
+        for (size_t i = 1; i < selected_scores.size(); i++) {
+            // std::cout << selected_scores[i - 1] << ", ";
+            REQUIRE(selected_scores[i - 1] >= selected_scores[i]);
+        }
+        // std::cout << selected_scores[selected_scores.size() - 1];
+        // std::cout << "}" << std::endl;
+        delete featureSelector;
+    }
 }
+
+TEST_CASE("L1FS Features Selected", "[FeatureSelection]")
+{
+    auto raw = RawDatasets("ecoli", true);
+
+    SECTION("Test L1FS with different alpha values")
+    {
+        std::vector<double> alphas = { 0.01, 0.1, 0.5 };
+
+        for (double alpha : alphas) {
+            bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
+                raw.dataset, raw.features, raw.className,
+                raw.features.size(), raw.classNumStates, raw.weights,
+                alpha, 1000, 1e-4, true
+            );
+            featureSelector->fit();
+
+            INFO("Alpha: " << alpha);
+
+            std::vector<int> selected_features = featureSelector->getFeatures();
+            std::vector<double> selected_scores = featureSelector->getScores();
+
+            // Higher alpha should lead to fewer features
+            REQUIRE(selected_features.size() > 0);
+            REQUIRE(selected_features.size() <= raw.features.size());
+            REQUIRE(selected_scores.size() == selected_features.size());
+
+            // Scores should be non-negative (absolute coefficient values)
+            for (double score : selected_scores) {
+                REQUIRE(score >= 0.0);
+            }
+
+            // Scores should be in descending order
+            for (size_t i = 1; i < selected_scores.size(); i++) {
+                REQUIRE(selected_scores[i - 1] >= selected_scores[i]);
+            }
+
+            delete featureSelector;
+        }
+    }
+
+    SECTION("Test L1FS with max features limit")
+    {
+        int max_features = 2;
+        bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
+            raw.dataset, raw.features, raw.className,
+            max_features, raw.classNumStates, raw.weights,
+            0.1, 1000, 1e-4, true
+        );
+        featureSelector->fit();
+
+        std::vector<int> selected_features = featureSelector->getFeatures();
+        REQUIRE(selected_features.size() <= max_features);
+
+        delete featureSelector;
+    }
+
+    SECTION("Test L1FS getCoefficients method")
+    {
+        bayesnet::L1FS* featureSelector = new bayesnet::L1FS(
+            raw.dataset, raw.features, raw.className,
+            raw.features.size(), raw.classNumStates, raw.weights,
+            0.1, 1000, 1e-4, true
+        );
+
+        // Should throw before fitting
+        REQUIRE_THROWS_AS(featureSelector->getCoefficients(), std::runtime_error);
+        REQUIRE_THROWS_WITH(featureSelector->getCoefficients(), "L1FS not fitted");
+
+        featureSelector->fit();
+
+        // Should work after fitting
+        auto coefficients = featureSelector->getCoefficients();
+        REQUIRE(coefficients.size() == raw.features.size());
+
+        delete featureSelector;
+    }
+}
+
 TEST_CASE("Oddities", "[FeatureSelection]")
 {
     auto raw = RawDatasets("iris", true);
+
     // FCBF Limits
     REQUIRE_THROWS_AS(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), std::invalid_argument);
     REQUIRE_THROWS_WITH(bayesnet::FCBF(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1e-8), "Threshold cannot be less than 1e-7");
+
+    // IWSS Limits
     REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), std::invalid_argument);
     REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -1e4), "Threshold has to be in [0, 0.5]");
     REQUIRE_THROWS_AS(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), std::invalid_argument);
     REQUIRE_THROWS_WITH(bayesnet::IWSS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 0.501), "Threshold has to be in [0, 0.5]");
+
+    // L1FS Limits
+    REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -0.1), std::invalid_argument);
+    REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, -0.1), "Alpha (regularization strength) must be non-negative");
+
+    REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 0), std::invalid_argument);
+    REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 0), "Maximum iterations must be positive");
+
+    REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, 0.0), std::invalid_argument);
+    REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, 0.0), "Tolerance must be positive");
+
+    REQUIRE_THROWS_AS(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, -1e-4), std::invalid_argument);
+    REQUIRE_THROWS_WITH(bayesnet::L1FS(raw.dataset, raw.features, raw.className, raw.features.size(), raw.classNumStates, raw.weights, 1.0, 1000, -1e-4), "Tolerance must be positive");
+
     // Not fitted error
     auto selector = build_selector(raw, "CFS", 0);
     const std::string message = "FeatureSelect not fitted";
@@ -97,6 +232,7 @@ TEST_CASE("Oddities", "[FeatureSelection]")
     REQUIRE_THROWS_WITH(selector->getScores(), message);
     delete selector;
 }
+
 TEST_CASE("Test threshold limits", "[FeatureSelection]")
 {
     auto raw = RawDatasets("diabetes", true);
@@ -113,4 +249,77 @@ TEST_CASE("Test threshold limits", "[FeatureSelection]")
     selector->fit();
     REQUIRE(selector->getFeatures().size() == 5);
     delete selector;
+
+    // L1FS with different alpha values
+    selector = build_selector(raw, "L1FS", 0.01);  // Low alpha - more features
+    selector->fit();
+    int num_features_low_alpha = selector->getFeatures().size();
+    delete selector;
+
+    selector = build_selector(raw, "L1FS", 0.9);   // High alpha - fewer features
+    selector->fit();
+    int num_features_high_alpha = selector->getFeatures().size();
+    REQUIRE(num_features_high_alpha <= num_features_low_alpha);
+    delete selector;
+
+    // L1FS with max features limit
+    selector = build_selector(raw, "L1FS", 0.01, 4);
+    selector->fit();
+    REQUIRE(selector->getFeatures().size() <= 4);
+    delete selector;
+}
+
+TEST_CASE("L1FS Regression vs Classification", "[FeatureSelection]")
+{
+    SECTION("Regression Task")
+    {
+        auto raw = RawDatasets("diabetes", true);
+        // diabetes dataset should be treated as regression (classNumStates > 2)
+        bayesnet::L1FS* l1fs = new bayesnet::L1FS(
+            raw.dataset, raw.features, raw.className,
+            raw.features.size(), raw.classNumStates, raw.weights,
+            0.1, 1000, 1e-4, true
+        );
+        l1fs->fit();
+
+        auto features = l1fs->getFeatures();
+        REQUIRE(features.size() > 0);
+
+        delete l1fs;
+    }
+
+    SECTION("Binary Classification Task")
+    {
+        // Create a simple binary classification dataset
+        int n_samples = 100;
+        int n_features = 5;
+
+        torch::Tensor X = torch::randn({ n_features, n_samples });
+        torch::Tensor y = (X[0] + X[2] > 0).to(torch::kFloat32);
+        torch::Tensor samples = torch::cat({ X, y.unsqueeze(0) }, 0);
+
+        std::vector<std::string> features;
+        for (int i = 0; i < n_features; ++i) {
+            features.push_back("feature_" + std::to_string(i));
+        }
+
+        torch::Tensor weights = torch::ones({ n_samples });
+
+        bayesnet::L1FS* l1fs = new bayesnet::L1FS(
+            samples, features, "target",
+            n_features, 2, weights,  // 2 states = binary classification
+            0.1, 1000, 1e-4, true
+        );
+        l1fs->fit();
+
+        auto selected_features = l1fs->getFeatures();
+        REQUIRE(selected_features.size() > 0);
+
+        // Features 0 and 2 should be among the top selected
+        bool has_feature_0 = std::find(selected_features.begin(), selected_features.end(), 0) != selected_features.end();
+        bool has_feature_2 = std::find(selected_features.begin(), selected_features.end(), 2) != selected_features.end();
+        REQUIRE((has_feature_0 || has_feature_2));
+
+        delete l1fs;
+    }
 }
\ No newline at end of file