Complete implementation of both algorithms

Check results Complete coverage tests
2025-08-15 15:45:53 +00:00 · 2021-05-25 11:59:24 +02:00
parent 70560506f1
commit 17d44080f5
6 changed files with 112 additions and 52 deletions
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,6 +1,6 @@
 [run]
 branch = True
-source = cfs
+source = mfs

 [report]
 exclude_lines =
@@ -10,4 +10,4 @@ exclude_lines =
    if __name__ == .__main__.:
 ignore_errors = True
 omit =
-    cfs/__init__.py
+    mfs/__init__.py
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,35 +1,36 @@
 repos:
  - repo: https://github.com/ambv/black
-    rev: stable
+    rev: 20.8b1
    hooks:
      - id: black
+        exclude: ".virtual_documents"
        language_version: python3.8
  - repo: https://gitlab.com/pycqa/flake8
    rev: 3.8.4
    hooks:
      - id: flake8
-  #-   repo: https://github.com/pre-commit/mirrors-mypy
-  #    rev: 'v0.782'  # Use the sha / tag you want to point at
-  #    hooks:
-  #    -   id: mypy
-  #        args: [--strict]
+        exclude: ".virtual_documents"
+  # - repo: https://github.com/pre-commit/mirrors-mypy
+  #   rev: "v0.790" # Use the sha / tag you want to point at
+  #   hooks:
+  #     - id: mypy
+  #       # args: [--strict, --ignore-missing-imports]
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.3.0
+    rev: v3.4.0
    hooks:
      - id: trailing-whitespace
      - id: check-case-conflict
      - id: check-ast
      - id: trailing-whitespace
-
  - repo: local
    hooks:
-      - id: tests
-        name: tests
+      - id: unittest
+        name: unittest
+        entry: python -m coverage run -m unittest discover
        language: system
-        entry: coverage run -m unittest
        pass_filenames: false
      - id: coverage
        name: coverage
+        entry: python -m coverage report -m --fail-under=100
        language: system
-        entry: coverage report -m --fail-under=100
        pass_filenames: false
--- a/6
+++ b/6
@@ -11,9 +11,9 @@ deps:  ## Install dependencies
 	pip install -r requirements.txt

 lint:  ## Lint and static-check
-	black cfs
-	flake8 cfs
-	mypy cfs
+	black mfs
+	flake8 mfs
+	mypy mfs

 push:  ## Push code with tags
 	git push && git push --tags
--- a/mfs/Selection.py
+++ b/mfs/Selection.py
@@ -1,4 +1,4 @@
-from math import log
+from math import log, sqrt
 from sys import float_info
 from itertools import combinations
 import numpy as np
@@ -145,7 +145,7 @@ class MFS:
        k = len(features)
        for pair in list(combinations(features, 2)):
            rff += self._compute_su_features(*pair)
-        return rcf / ((k ** 2 - k) * rff)
+        return rcf / sqrt(k + (k ** 2 - k) * rff)

    def cfs(self, X, y):
        """CFS forward best first heuristic search
@@ -161,34 +161,41 @@ class MFS:
        self.X_ = X
        self.y_ = y
        s_list = self._compute_su_labels()
-        # Descending orders
+        # Descending order
        feature_order = (-s_list).argsort().tolist()
-        merit = float_info.min
-        exit_condition = 0
+        continue_condition = True
        candidates = []
        # start with the best feature (max symmetrical uncertainty wrt label)
        first_candidate = feature_order.pop(0)
        candidates.append(first_candidate)
        self._scores.append(s_list[first_candidate])
-        while exit_condition < 5:  # as proposed in the original algorithm
-            id_selected = -1
+        while continue_condition:
+            merit = float_info.min
+            id_selected = None
            for idx, feature in enumerate(feature_order):
                candidates.append(feature)
                merit_new = self._compute_merit(candidates)
                if merit_new > merit:
                    id_selected = idx
                    merit = merit_new
-                    exit_condition = 0
                candidates.pop()
-            if id_selected == -1:
-                exit_condition += 1
-            else:
-                candidates.append(feature_order[id_selected])
-                self._scores.append(merit_new)
-                del feature_order[id_selected]
+            candidates.append(feature_order[id_selected])
+            self._scores.append(merit)
+            del feature_order[id_selected]
            if len(feature_order) == 0:
                # Force leaving the loop
-                exit_condition = 5
+                continue_condition = False
+            if len(self._scores) >= 5:
+                item_ant = -1
+                for item in self._scores[-5:]:
+                    if item_ant == -1:
+                        item_ant = item
+                    if item > item_ant:
+                        break
+                    else:
+                        item_ant = item
+                else:
+                    continue_condition = False
        self._result = candidates
        return self

@@ -213,7 +220,6 @@ class MFS:
                break
            # Remove redundant features
            for index_q in feature_dup:
-                # test if feature(index_q) su with feature(index_p) is
                su_pq = self._compute_su_features(index_p, index_q)
                if su_pq >= s_list[index_q]:
                    # remove feature from list
--- a/mfs/tests/MFS_test.py
+++ b/mfs/tests/MFS_test.py
@@ -1,6 +1,6 @@
 import unittest
 from mdlp import MDLP
-from sklearn.datasets import load_wine
+from sklearn.datasets import load_wine, load_iris

 from ..Selection import MFS

@@ -9,33 +9,53 @@ class MFS_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        mdlp = MDLP(random_state=1)
-        X, self.y = load_wine(return_X_y=True)
-        self.X = mdlp.fit_transform(X, self.y).astype("int64")
-        self.m, self.n = self.X.shape
-
-    # @classmethod
-    # def setup(cls):
-    #     pass
+        X, self.y_w = load_wine(return_X_y=True)
+        self.X_w = mdlp.fit_transform(X, self.y_w).astype("int64")
+        X, self.y_i = load_iris(return_X_y=True)
+        mdlp = MDLP(random_state=1)
+        self.X_i = mdlp.fit_transform(X, self.y_i).astype("int64")

    def test_initialize(self):
        mfs = MFS()
-        mfs.fcbs(self.X, self.y, 0.05)
+        mfs.fcbs(self.X_w, self.y_w, 0.05)
        mfs._initialize()
        self.assertIsNone(mfs.get_results())
        self.assertListEqual([], mfs.get_scores())
        self.assertDictEqual({}, mfs._su_features)
        self.assertIsNone(mfs._su_labels)

-    def test_csf(self):
+    def test_csf_wine(self):
        mfs = MFS()
-        expected = [6, 4]
-        self.assertListEqual(expected, mfs.cfs(self.X, self.y).get_results())
-        expected = [0.5218299405215557, 2.4168234005280964]
+        expected = [6, 12, 9, 4, 10, 0]
+        self.assertListEqual(
+            expected, mfs.cfs(self.X_w, self.y_w).get_results()
+        )
+        expected = [
+            0.5218299405215557,
+            0.602513857132804,
+            0.4877384978817362,
+            0.3743688234383051,
+            0.28795671854246285,
+            0.2309165735173175,
+        ]
        self.assertListEqual(expected, mfs.get_scores())

-    def test_fcbs(self):
+    def test_csf_iris(self):
        mfs = MFS()
-        computed = mfs.fcbs(self.X, self.y, threshold=0.05).get_results()
+        expected = [3, 2, 0, 1]
+        computed = mfs.cfs(self.X_i, self.y_i).get_results()
+        self.assertListEqual(expected, computed)
+        expected = [
+            0.870521418179061,
+            0.8968651482682227,
+            0.5908278453318913,
+            0.40371971570693366,
+        ]
+        self.assertListEqual(expected, mfs.get_scores())
+
+    def test_fcbs_wine(self):
+        mfs = MFS()
+        computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.05).get_results()
        expected = [6, 9, 12, 0, 11, 4]
        self.assertListEqual(expected, computed)
        expected = [
@@ -47,3 +67,36 @@ class MFS_test(unittest.TestCase):
            0.24972405134844652,
        ]
        self.assertListEqual(expected, mfs.get_scores())
+
+    def test_fcbs_iris(self):
+        mfs = MFS()
+        computed = mfs.fcbs(self.X_i, self.y_i, threshold=0.05).get_results()
+        expected = [3, 2]
+        self.assertListEqual(expected, computed)
+        expected = [0.870521418179061, 0.810724587460511]
+        self.assertListEqual(expected, mfs.get_scores())
+
+    def test_compute_su_labels(self):
+        mfs = MFS()
+        mfs.fcbs(self.X_i, self.y_i, threshold=0.05)
+        expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
+        self.assertListEqual(expected, mfs._compute_su_labels().tolist())
+        mfs._su_labels = [1, 2, 3, 4]
+        self.assertListEqual([1, 2, 3, 4], mfs._compute_su_labels())
+
+    def test_invalid_threshold(self):
+        mfs = MFS()
+        with self.assertRaises(ValueError):
+            mfs.fcbs(self.X_i, self.y_i, threshold=1e-5)
+
+    def test_fcbs_exit_threshold(self):
+        mfs = MFS()
+        computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.4).get_results()
+        expected = [6, 9, 12]
+        self.assertListEqual(expected, computed)
+        expected = [
+            0.5218299405215557,
+            0.46224298637417455,
+            0.44518278979085646,
+        ]
+        self.assertListEqual(expected, mfs.get_scores())
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@ def readme():

 def get_data(field: str):
    item = ""
-    with open("stree/__init__.py") as f:
+    with open("mfs/__init__.py") as f:
        for line in f.readlines():
            if line.startswith(f"__{field}__"):
                delim = '"' if '"' in line else "'"
@@ -27,9 +27,9 @@ setuptools.setup(
    long_description=readme(),
    long_description_content_type="text/markdown",
    packages=setuptools.find_packages(),
-    url="https://github.com/Doctorado-ML/cfs#cfs",
+    url="https://github.com/Doctorado-ML/mfs#mfs",
    project_urls={
-        "Code": "https://github.com/Doctorado-ML/cfs",
+        "Code": "https://github.com/Doctorado-ML/mfs",
    },
    author=get_data("author"),
    author_email=get_data("author_email"),
@@ -43,6 +43,6 @@ setuptools.setup(
        "Intended Audience :: Science/Research",
    ],
    install_requires=["scikit-learn"],
-    test_suite="cfs.tests",
+    test_suite="mfs.tests",
    zip_safe=False,
 )