Add constant features treatment

Update readme
Update Setup and __init__
2025-08-17 08:35:52 +00:00 · 2023-10-13 13:35:24 +02:00 · 2022-05-19 17:52:54 +02:00 · 2022-05-19 17:46:00 +02:00 · 2022-05-19 17:36:59 +02:00 · 2022-05-19 17:27:40 +02:00
16 changed files with 230 additions and 37 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -12,11 +12,13 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [macos-latest, ubuntu-latest]
-        python: [3.8]
+        os: [ubuntu-latest]
+        python: ["3.10"]

    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
      - name: Set up Python ${{ matrix.python }}
        uses: actions/setup-python@v2
        with:
@@ -26,14 +28,37 @@ jobs:
          pip install -q --upgrade pip
          pip install -q cython
          pip install -q numpy
-          pip install -q git+git://github.com/doctorado-ml/mdlp
-          pip install -q -r requirements.txt
-          pip install -q --upgrade codecov coverage black flake8 codacy-coverage
+          pip install -q git+https://github.com/doctorado-ml/mdlp
+          pip install -q -r requirements/dev.txt
+          pip install -q --upgrade codecov coverage black flake8 codacy-coverage unittest-xml-reporting
      - name: Lint
        run: |
          black --check --diff mufs
          flake8 --count mufs
      - name: Tests & coverage
        run: |
-          coverage run -m unittest -v mufs.tests
+          mkdir .report
+          coverage run -m xmlrunner -v mufs.tests -o .report
+          coverage xml -i -o .report/coverage.xml
          coverage report -m --fail-under=100
+      - name: Get project version
+        run: echo "project_version=$(git describe --tags --abbrev=0)" >> $GITHUB_ENV
+      - name: Override Coverage Source Path for Sonar
+        run: sed -i 's/\/home\/runner\/work\/mufs\/mufs\//\/github\/workspace\//g' .report/coverage.xml
+      - name: SonarQube scanner
+        uses: sonarsource/sonarqube-scan-action@master
+        env:
+          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
+          SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
+        with:
+          args: >
+            -Dsonar.projectVersion=${{ env.project_version }}
+            -Dsonar.python.coverage.reportPaths=.report/coverage.xml
+            -Dsonar.python.xunit.reportPath=.report/TEST*
+        # If you wish to fail your job when the Quality Gate is red, uncomment the
+        # following lines. This would typically be used to fail a deployment.
+      - name: Quality Gate
+        uses: sonarsource/sonarqube-quality-gate-action@master
+        timeout-minutes: 5
+        env:
+          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,12 +1,12 @@
 repos:
  - repo: https://github.com/ambv/black
-    rev: 20.8b1
+    rev: 22.3.0
    hooks:
      - id: black
        exclude: ".virtual_documents"
        language_version: python3.8
  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.4
+    rev: 3.9.2
    hooks:
      - id: flake8
        exclude: ".virtual_documents"
@@ -16,7 +16,7 @@ repos:
  #     - id: mypy
  #       # args: [--strict, --ignore-missing-imports]
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.4.0
+    rev: v4.2.0
    hooks:
      - id: trailing-whitespace
      - id: check-case-conflict
--- a/5
+++ b/5
@@ -1,6 +1,6 @@
 SHELL := /bin/bash
 .DEFAULT_GOAL := help
-.PHONY: coverage deps help lint push test doc build
+.PHONY: coverage deps help lint push test build

 coverage:  ## Run tests with coverage
 	coverage erase
@@ -26,9 +26,6 @@ build:  ## Build package
 	rm -fr build/*
 	python setup.py sdist bdist_wheel

-doc-clean:  ## Update documentation
-	make -C docs --makefile=Makefile clean
-
 help: ## Show help message
 	@IFS=$$'\n' ; \
 	help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
--- a/README.md
+++ b/README.md
@@ -1,6 +1,10 @@
 ![CI](https://github.com/Doctorado-ML/mufs/workflows/CI/badge.svg)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/66ad727eb13e4c7a8816db1e44d994a7)](https://www.codacy.com/gh/Doctorado-ML/mufs/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/mufs&utm_campaign=Badge_Grade)
 [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/Doctorado-ML/mufs.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/Doctorado-ML/mufs/context:python)
+[![PyPI version](https://badge.fury.io/py/MUFS.svg)](https://badge.fury.io/py/MUFS)
+[![Technical Debt](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=mufs&metric=sqale_index&token=1119a3bfd4025d50ef3009a44c600c16670ee31a)](https://haystack.rmontanana.es:25000/dashboard?id=mufs)
+[![Quality Gate Status](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=mufs&metric=alert_status&token=1119a3bfd4025d50ef3009a44c600c16670ee31a)](https://haystack.rmontanana.es:25000/dashboard?id=mufs)
+![https://img.shields.io/badge/python-3.8%2B-blue](https://img.shields.io/badge/python-3.8%2B-brightgreen)

 # MUFS

@@ -15,3 +19,7 @@ Proceedings, Twentieth International Conference on Machine Learning. ed. / T. Fa
 ### Correlation-based Feature Selection

 Hall, M. A. (1999), 'Correlation-based Feature Selection for Machine Learning'.
+
+### IWSS
+
+Based on: P. Bermejo, J. A. Gamez and J. M. Puerta, "Incremental Wrapper-based subset Selection with replacement: An advantageous alternative to sequential forward selection," 2009 IEEE Symposium on Computational Intelligence and Data Mining, 2009, pp. 367-374, doi: 10.1109/CIDM.2009.4938673.
--- a/mufs/Selection.py
+++ b/mufs/Selection.py
@@ -3,6 +3,7 @@ from sys import float_info
 from itertools import combinations
 import numpy as np
 from .Metrics import Metrics
+from ._version import __version__


 class MUFS:
@@ -26,7 +27,7 @@ class MUFS:
    """

    def __init__(self, max_features=None, discrete=True):
-        self._max_features = max_features
+        self.max_features = max_features
        self._discrete = discrete
        self.symmetrical_uncertainty = (
            Metrics.symmetrical_uncertainty
@@ -40,6 +41,11 @@ class MUFS:
        )
        self._fitted = False

+    @staticmethod
+    def version() -> str:
+        """Return the version of the package."""
+        return __version__
+
    def _initialize(self, X, y):
        """Initialize the attributes so support multiple calls using same
        object
@@ -53,8 +59,10 @@ class MUFS:
        """
        self.X_ = X
        self.y_ = y
-        if self._max_features is None:
+        if self.max_features is None:
            self._max_features = X.shape[1]
+        else:
+            self._max_features = self.max_features
        self._result = None
        self._scores = []
        self._su_labels = None
@@ -105,7 +113,9 @@ class MUFS:

    def _compute_merit(self, features):
        """Compute the merit function for cfs algorithms
-
+           "Good feature subsets contain features highly correlated with
+           (predictive of) the class, yet uncorrelated with (not predictive of)
+           each other"
        Parameters
        ----------
        features : list
@@ -124,7 +134,7 @@ class MUFS:
        k = len(features)
        for pair in list(combinations(features, 2)):
            rff += self._compute_su_features(*pair)
-        return rcf / sqrt(k + (k ** 2 - k) * rff)
+        return rcf / sqrt(k + (k**2 - k) * rff)

    def cfs(self, X, y):
        """Correlation-based Feature Selection
@@ -162,6 +172,10 @@ class MUFS:
                    id_selected = idx
                    merit = merit_new
                candidates.pop()
+            if id_selected is None:
+                # No more features to add all merits are nan because of
+                # constant features
+                break
            candidates.append(feature_order[id_selected])
            self._scores.append(merit)
            del feature_order[id_selected]
@@ -264,3 +278,58 @@ class MUFS:
            list of scores of the features selected
        """
        return self._scores if self._fitted else []
+
+    def iwss(self, X, y, threshold):
+        """Incremental Wrapper Subset Selection
+
+        Parameters
+        ----------
+        X : np.array
+            array of features
+        y : np.array
+            vector of labels
+        threshold : float
+            threshold to select relevant features
+
+        Returns
+        -------
+        self
+            self
+        Raises
+        ------
+        ValueError
+            if the threshold is less than a selected value of 1e-7
+            or greater than .5
+
+        """
+        if threshold < 0 or threshold > 0.5:
+            raise ValueError(
+                "Threshold cannot be less than 0 or greater than 0.5"
+            )
+        self._initialize(X, y)
+        s_list = self._compute_su_labels()
+        feature_order = (-s_list).argsort()
+        features = feature_order.copy().tolist()
+        candidates = []
+        # Add first and second features to result
+        first_feature = features.pop(0)
+        candidates.append(first_feature)
+        self._scores.append(s_list[first_feature])
+        candidates.append(features.pop(0))
+        merit = self._compute_merit(candidates)
+        self._scores.append(merit)
+        for feature in features:
+            candidates.append(feature)
+            merit_new = self._compute_merit(candidates)
+            delta = abs(merit - merit_new) / merit if merit != 0.0 else 0.0
+            if merit_new > merit or delta < threshold:
+                if merit_new > merit:
+                    merit = merit_new
+                self._scores.append(merit_new)
+            else:
+                candidates.pop()
+                break
+            if len(candidates) == self._max_features:
+                break
+        self._result = candidates
+        return self
--- a/mufs/init.py
+++ b/mufs/init.py
@@ -1,9 +1,8 @@
 from .Selection import MUFS

-__version__ = "0.1.1"
 __author__ = "Ricardo Montañana Gómez"
 __author_email__ = "Ricardo.Montanana@alu.uclm.es"
-__copyright__ = "Copyright 2021, Ricardo Montañana Gómez"
+__copyright__ = "Copyright 2021-2022, Ricardo Montañana Gómez"
 __license__ = "MIT License"

 __all__ = ["MUFS"]
--- a/mufs/_version.py
+++ b/mufs/_version.py
@@ -0,0 +1 @@
+__version__ = "0.1.3"
--- a/mufs/tests/MUFS_test.py
+++ b/mufs/tests/MUFS_test.py
@@ -1,11 +1,14 @@
 import unittest
+import os
+import pandas as pd
+import numpy as np
 from mdlp import MDLP
 from sklearn.datasets import load_wine, load_iris
-
 from ..Selection import MUFS
+from .._version import __version__


-class MUFS_test(unittest.TestCase):
+class MUFSTest(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        mdlp = MDLP(random_state=1)
@@ -15,6 +18,11 @@ class MUFS_test(unittest.TestCase):
        mdlp = MDLP(random_state=1)
        self.X_i = mdlp.fit_transform(self.X_ic, self.y_i).astype("int64")

+    def test_version(self):
+        """Check package version."""
+        mufs = MUFS()
+        self.assertEqual(__version__, mufs.version())
+
    def assertListAlmostEqual(self, list1, list2, tol=7):
        self.assertEqual(len(list1), len(list2))
        for a, b in zip(list1, list2):
@@ -32,7 +40,7 @@ class MUFS_test(unittest.TestCase):
    def test_csf_wine(self):
        mufs = MUFS()
        expected = [6, 12, 9, 4, 10, 0]
-        self.assertListAlmostEqual(
+        self.assertListEqual(
            expected, mufs.cfs(self.X_w, self.y_w).get_results()
        )
        expected = [
@@ -78,7 +86,7 @@ class MUFS_test(unittest.TestCase):
        mufs = MUFS()
        expected = [3, 2, 0, 1]
        computed = mufs.cfs(self.X_i, self.y_i).get_results()
-        self.assertListAlmostEqual(expected, computed)
+        self.assertListEqual(expected, computed)
        expected = [
            0.870521418179061,
            0.8968651482682227,
@@ -148,3 +156,46 @@ class MUFS_test(unittest.TestCase):
            0.44518278979085646,
        ]
        self.assertListAlmostEqual(expected, mufs.get_scores())
+
+    def test_iwss_wine(self):
+        mufs = MUFS()
+        expected = [6, 9, 12]
+        self.assertListEqual(
+            expected, mufs.iwss(self.X_w, self.y_w, 0.2).get_results()
+        )
+        expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
+        self.assertListAlmostEqual(expected, mufs.get_scores())
+
+    def test_iwss_wine_max_features(self):
+        mufs = MUFS(max_features=3)
+        expected = [6, 9, 12]
+        self.assertListEqual(
+            expected, mufs.iwss(self.X_w, self.y_w, 0.4).get_results()
+        )
+        expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
+        self.assertListAlmostEqual(expected, mufs.get_scores())
+
+    def test_iwss_exception(self):
+        mufs = MUFS()
+        with self.assertRaises(ValueError):
+            mufs.iwss(self.X_w, self.y_w, 0.51)
+        with self.assertRaises(ValueError):
+            mufs.iwss(self.X_w, self.y_w, -0.01)
+
+    def test_iwss_better_merit_condition(self):
+        folder = os.path.dirname(os.path.abspath(__file__))
+        data = pd.read_csv(
+            os.path.join(folder, "balloons_R.dat"),
+            sep="\t",
+            index_col=0,
+        )
+        X = data.drop("clase", axis=1).to_numpy()
+        y = data["clase"].to_numpy()
+        mufs = MUFS()
+        expected = [0, 2, 3, 1]
+        self.assertListEqual(expected, mufs.iwss(X, y, 0.3).get_results())
+
+    def test_iwss_empty(self):
+        mufs = MUFS()
+        X = np.delete(self.X_i, [0, 1], 1)
+        self.assertListEqual(mufs.iwss(X, self.y_i, 0.3).get_results(), [1, 0])
--- a/mufs/tests/Metrics_test.py
+++ b/mufs/tests/Metrics_test.py
@@ -6,7 +6,7 @@ from mdlp import MDLP
 from ..Selection import Metrics


-class Metrics_test(unittest.TestCase):
+class MetricsTest(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        mdlp = MDLP(random_state=1)
--- a/mufs/tests/init.py
+++ b/mufs/tests/init.py
@@ -1,4 +1,4 @@
-from .MUFS_test import MUFS_test
-from .Metrics_test import Metrics_test
+from .MUFS_test import MUFSTest
+from .Metrics_test import MetricsTest

-__all__ = ["MUFS_test", "Metrics_test"]
+__all__ = ["MUFSTest", "MetricsTest"]
--- a/mufs/tests/balloons_R.dat
+++ b/mufs/tests/balloons_R.dat
@@ -0,0 +1,17 @@
+	f1	f2	f3	f4	clase
+1	0.968246	-0.968246	0.968246	0.968246	1
+2	0.968246	-0.968246	0.968246	-0.968246	1
+3	0.968246	-0.968246	-0.968246	0.968246	1
+4	0.968246	-0.968246	-0.968246	-0.968246	1
+5	0.968246	0.968246	0.968246	0.968246	1
+6	0.968246	0.968246	0.968246	-0.968246	0
+7	0.968246	0.968246	-0.968246	0.968246	0
+8	0.968246	0.968246	-0.968246	-0.968246	0
+9	-0.968246	-0.968246	0.968246	0.968246	1
+10	-0.968246	-0.968246	0.968246	-0.968246	0
+11	-0.968246	-0.968246	-0.968246	0.968246	0
+12	-0.968246	-0.968246	-0.968246	-0.968246	0
+13	-0.968246	0.968246	0.968246	0.968246	1
+14	-0.968246	0.968246	0.968246	-0.968246	0
+15	-0.968246	0.968246	-0.968246	0.968246	0
+16	-0.968246	0.968246	-0.968246	-0.968246	0
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -0,0 +1,3 @@
+-r production.txt
+mdlp
+pandas
--- a/requirements/production.txt
+++ b/requirements/production.txt
@@ -1,2 +1 @@
 scikit-learn>0.24
-mdlp
--- a/sample.py
+++ b/sample.py
@@ -1,4 +1,5 @@
 import warnings
+import time
 from mufs import MUFS
 from mufs.Metrics import Metrics
 from stree import Stree
@@ -26,16 +27,26 @@ for i in range(n):
 # Classification
 warnings.filterwarnings("ignore")
 print("CFS")
+now = time.time()
 cfs_f = mufsc.cfs(X, y).get_results()
-print(cfs_f)
+time_cfs = time.time() - now
+print(cfs_f, "items: ", len(cfs_f), f"time: {time_cfs:.3f} seconds")
 print("FCBF")
-fcfb_f = mufsc.fcbf(X, y, 5e-2).get_results()
-print(fcfb_f, len(fcfb_f))
+now = time.time()
+fcbf_f = mufsc.fcbf(X, y, 0.07).get_results()
+time_fcbf = time.time() - now
+print(fcbf_f, "items: ", len(fcbf_f), f"time: {time_fcbf:.3f} seconds")
+now = time.time()
+print("IWSS")
+iwss_f = mufsc.iwss(X, y, 0.5).get_results()
+time_iwss = time.time() - now
+print(iwss_f, "items: ", len(iwss_f), f"time: {time_iwss:.3f} seconds")
 print("X.shape=", X.shape)
 clf = Stree(random_state=0)
 print("Accuracy whole dataset", clf.fit(X, y).score(X, y))
 clf = Stree(random_state=0)
 print("Accuracy cfs", clf.fit(X[:, cfs_f], y).score(X[:, cfs_f], y))
 clf = Stree(random_state=0)
-subf = fcfb_f
-print("Accuracy fcfb", clf.fit(X[:, subf], y).score(X[:, subf], y))
+print("Accuracy fcfb", clf.fit(X[:, fcbf_f], y).score(X[:, fcbf_f], y))
+clf = Stree(random_state=0)
+print("Accuracy iwss", clf.fit(X[:, iwss_f], y).score(X[:, iwss_f], y))
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,4 @@
+import os
 import setuptools


@@ -6,9 +7,10 @@ def readme():
        return f.read()


-def get_data(field: str):
+def get_data(field):
    item = ""
-    with open("mufs/__init__.py") as f:
+    file_name = "_version.py" if field == "version" else "__init__.py"
+    with open(os.path.join("mufs", file_name)) as f:
        for line in f.readlines():
            if line.startswith(f"__{field}__"):
                delim = '"' if '"' in line else "'"
@@ -19,6 +21,11 @@ def get_data(field: str):
    return item


+def get_requirements():
+    with open("requirements/production.txt") as f:
+        return f.read().splitlines()
+
+
 setuptools.setup(
    name="MUFS",
    version=get_data("version"),
@@ -38,11 +45,13 @@ setuptools.setup(
        "Development Status :: 4 - Beta",
        "License :: OSI Approved :: " + get_data("license"),
        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
        "Natural Language :: English",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
        "Intended Audience :: Science/Research",
    ],
-    install_requires=["scikit-learn"],
+    install_requires=get_requirements(),
    test_suite="mufs.tests",
    zip_safe=False,
 )
--- a/sonar-project.properties
+++ b/sonar-project.properties
@@ -0,0 +1,4 @@
+sonar.projectKey=mufs
+sonar.sourceEncoding=UTF-8
+sonar.sources=.
+sonar.python.version=3.8, 3.9, 3.10
Author	SHA1	Message	Date
Ricardo Montañana	0fdd754050	Add constant features treatment	2023-10-13 13:35:24 +02:00
Ricardo Montañana	7035cc4edc	Update readme	2022-05-19 17:52:54 +02:00
Ricardo Montañana	edc8816041	Update Setup and __init__	2022-05-19 17:46:00 +02:00
Ricardo Montañana	20db8c5745	Add version to _version file, method and test	2022-05-19 17:36:59 +02:00
Ricardo Montañana	a9384685fe	Clean main.yml	2022-05-19 17:27:40 +02:00
Ricardo Montañana	86aaf23dd9	Patch main.yml	2022-05-19 17:18:45 +02:00
Ricardo Montañana	9395e8cc23	Patch main.yml	2022-05-19 17:08:44 +02:00
Ricardo Montañana	5723da9535	Patch main.yml	2022-05-19 16:46:25 +02:00
Ricardo Montañana	fb4ed468b0	Patch main.yml	2022-05-19 16:37:19 +02:00
Ricardo Montañana	57334a0b74	Patch main.yml	2022-05-19 16:31:37 +02:00
Ricardo Montañana	c47f69847e	Patch main.yml	2022-05-19 16:22:31 +02:00
Ricardo Montañana	4532309309	Patch main.yml	2022-05-19 16:09:36 +02:00
Ricardo Montañana	aa53e3dbc0	update main.yml	2022-05-19 14:22:10 +02:00
Ricardo Montañana	2861e22c57	Update main using checkout@v3	2022-05-19 12:53:14 +02:00
Ricardo Montañana	e0acd6d239	Update main.yml	2022-05-19 12:30:29 +02:00
Ricardo Montañana	3d98a39d4b	Update sonar.sources	2022-05-19 11:57:44 +02:00
Ricardo Montañana	1a4de38328	Update sonar.project.properties	2022-05-19 11:52:23 +02:00
Ricardo Montañana	a9c40f1fb7	Fix issue in gh action	2022-05-19 11:46:48 +02:00
Ricardo Montañana	81da48ec31	Fix format issue	2022-05-19 11:41:09 +02:00
Ricardo Montañana	2548ab8533	Update formatter version	2022-05-19 11:37:45 +02:00
Ricardo Montañana	08cade5dec	Add sonarqube scanner to gh actions	2022-05-19 11:21:55 +02:00
Ricardo Montañana	0a13f5e5eb	Update main.yml requirements	2022-05-19 01:20:11 +02:00
Ricardo Montañana	a0f172ac13	Update version number and sample	2021-10-28 14:30:28 +02:00
Ricardo Montañana Gómez	cfb37d2f6c	Merge pull request #3 from Doctorado-ML/Add-IWSS-(#2 ) Add iwss (#2)	2021-10-28 12:39:57 +02:00
Ricardo Montañana	5d1720c9ae	Update ci file	2021-10-28 12:22:21 +02:00
Ricardo Montañana	1c5f1977e5	Complete iwss based implementation (#2 )	2021-10-28 11:55:40 +02:00
Ricardo Montañana	27f8a370c5	Begin IWSS implementation Update requirements Create requirements for dev	2021-10-10 19:06:57 +02:00
Ricardo Montañana Gómez	9d74bc8a70	Add package version badge to README	2021-08-17 12:02:15 +02:00