From 17d44080f57d52f27350417ac4661ebfc81dc3d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Tue, 25 May 2021 11:59:24 +0200
Subject: [PATCH] Complete implementation of both algorithms Check results
 Complete coverage tests

---
 .coveragerc             |  4 +-
 .pre-commit-config.yaml | 25 +++++++------
 Makefile                |  6 +--
 mfs/Selection.py        | 38 +++++++++++--------
 mfs/tests/MFS_test.py   | 83 +++++++++++++++++++++++++++++++++--------
 setup.py                |  8 ++--
 6 files changed, 112 insertions(+), 52 deletions(-)

diff --git a/.coveragerc b/.coveragerc
index b08bab6..6e37457 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,6 +1,6 @@
 [run]
 branch = True
-source = cfs
+source = mfs
 
 [report]
 exclude_lines =
@@ -10,4 +10,4 @@ exclude_lines =
     if __name__ == .__main__.:
 ignore_errors = True
 omit =
-    cfs/__init__.py
\ No newline at end of file
+    mfs/__init__.py
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7152d26..2a87625 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,35 +1,36 @@
 repos:
   - repo: https://github.com/ambv/black
-    rev: stable
+    rev: 20.8b1
     hooks:
       - id: black
+        exclude: ".virtual_documents"
         language_version: python3.8
   - repo: https://gitlab.com/pycqa/flake8
     rev: 3.8.4
     hooks:
       - id: flake8
-  #-   repo: https://github.com/pre-commit/mirrors-mypy
-  #    rev: 'v0.782'  # Use the sha / tag you want to point at
-  #    hooks:
-  #    -   id: mypy
-  #        args: [--strict]
+        exclude: ".virtual_documents"
+  # - repo: https://github.com/pre-commit/mirrors-mypy
+  #   rev: "v0.790" # Use the sha / tag you want to point at
+  #   hooks:
+  #     - id: mypy
+  #       # args: [--strict, --ignore-missing-imports]
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.3.0
+    rev: v3.4.0
     hooks:
       - id: trailing-whitespace
       - id: check-case-conflict
       - id: check-ast
       - id: trailing-whitespace
-
   - repo: local
     hooks:
-      - id: tests
-        name: tests
+      - id: unittest
+        name: unittest
+        entry: python -m coverage run -m unittest discover
         language: system
-        entry: coverage run -m unittest
         pass_filenames: false
       - id: coverage
         name: coverage
+        entry: python -m coverage report -m --fail-under=100
         language: system
-        entry: coverage report -m --fail-under=100
         pass_filenames: false
diff --git a/Makefile b/Makefile
index 100c82b..489c37c 100644
--- a/Makefile
+++ b/Makefile
@@ -11,9 +11,9 @@ deps:  ## Install dependencies
 	pip install -r requirements.txt
 
 lint:  ## Lint and static-check
-	black cfs
-	flake8 cfs
-	mypy cfs
+	black mfs
+	flake8 mfs
+	mypy mfs
 
 push:  ## Push code with tags
 	git push && git push --tags
diff --git a/mfs/Selection.py b/mfs/Selection.py
index d13e02f..c265a98 100755
--- a/mfs/Selection.py
+++ b/mfs/Selection.py
@@ -1,4 +1,4 @@
-from math import log
+from math import log, sqrt
 from sys import float_info
 from itertools import combinations
 import numpy as np
@@ -145,7 +145,7 @@ class MFS:
         k = len(features)
         for pair in list(combinations(features, 2)):
             rff += self._compute_su_features(*pair)
-        return rcf / ((k ** 2 - k) * rff)
+        return rcf / sqrt(k + (k ** 2 - k) * rff)
 
     def cfs(self, X, y):
         """CFS forward best first heuristic search
@@ -161,34 +161,41 @@ class MFS:
         self.X_ = X
         self.y_ = y
         s_list = self._compute_su_labels()
-        # Descending orders
+        # Descending order
         feature_order = (-s_list).argsort().tolist()
-        merit = float_info.min
-        exit_condition = 0
+        continue_condition = True
         candidates = []
         # start with the best feature (max symmetrical uncertainty wrt label)
         first_candidate = feature_order.pop(0)
         candidates.append(first_candidate)
         self._scores.append(s_list[first_candidate])
-        while exit_condition < 5:  # as proposed in the original algorithm
-            id_selected = -1
+        while continue_condition:
+            merit = float_info.min
+            id_selected = None
             for idx, feature in enumerate(feature_order):
                 candidates.append(feature)
                 merit_new = self._compute_merit(candidates)
                 if merit_new > merit:
                     id_selected = idx
                     merit = merit_new
-                    exit_condition = 0
                 candidates.pop()
-            if id_selected == -1:
-                exit_condition += 1
-            else:
-                candidates.append(feature_order[id_selected])
-                self._scores.append(merit_new)
-                del feature_order[id_selected]
+            candidates.append(feature_order[id_selected])
+            self._scores.append(merit)
+            del feature_order[id_selected]
             if len(feature_order) == 0:
                 # Force leaving the loop
-                exit_condition = 5
+                continue_condition = False
+            if len(self._scores) >= 5:
+                item_ant = -1
+                for item in self._scores[-5:]:
+                    if item_ant == -1:
+                        item_ant = item
+                    if item > item_ant:
+                        break
+                    else:
+                        item_ant = item
+                else:
+                    continue_condition = False
         self._result = candidates
         return self
 
@@ -213,7 +220,6 @@ class MFS:
                 break
             # Remove redundant features
             for index_q in feature_dup:
-                # test if feature(index_q) su with feature(index_p) is
                 su_pq = self._compute_su_features(index_p, index_q)
                 if su_pq >= s_list[index_q]:
                     # remove feature from list
diff --git a/mfs/tests/MFS_test.py b/mfs/tests/MFS_test.py
index 2991ad2..1203116 100755
--- a/mfs/tests/MFS_test.py
+++ b/mfs/tests/MFS_test.py
@@ -1,6 +1,6 @@
 import unittest
 from mdlp import MDLP
-from sklearn.datasets import load_wine
+from sklearn.datasets import load_wine, load_iris
 
 from ..Selection import MFS
 
@@ -9,33 +9,53 @@ class MFS_test(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         mdlp = MDLP(random_state=1)
-        X, self.y = load_wine(return_X_y=True)
-        self.X = mdlp.fit_transform(X, self.y).astype("int64")
-        self.m, self.n = self.X.shape
-
-    # @classmethod
-    # def setup(cls):
-    #     pass
+        X, self.y_w = load_wine(return_X_y=True)
+        self.X_w = mdlp.fit_transform(X, self.y_w).astype("int64")
+        X, self.y_i = load_iris(return_X_y=True)
+        mdlp = MDLP(random_state=1)
+        self.X_i = mdlp.fit_transform(X, self.y_i).astype("int64")
 
     def test_initialize(self):
         mfs = MFS()
-        mfs.fcbs(self.X, self.y, 0.05)
+        mfs.fcbs(self.X_w, self.y_w, 0.05)
         mfs._initialize()
         self.assertIsNone(mfs.get_results())
         self.assertListEqual([], mfs.get_scores())
         self.assertDictEqual({}, mfs._su_features)
         self.assertIsNone(mfs._su_labels)
 
-    def test_csf(self):
+    def test_csf_wine(self):
         mfs = MFS()
-        expected = [6, 4]
-        self.assertListEqual(expected, mfs.cfs(self.X, self.y).get_results())
-        expected = [0.5218299405215557, 2.4168234005280964]
+        expected = [6, 12, 9, 4, 10, 0]
+        self.assertListEqual(
+            expected, mfs.cfs(self.X_w, self.y_w).get_results()
+        )
+        expected = [
+            0.5218299405215557,
+            0.602513857132804,
+            0.4877384978817362,
+            0.3743688234383051,
+            0.28795671854246285,
+            0.2309165735173175,
+        ]
         self.assertListEqual(expected, mfs.get_scores())
 
-    def test_fcbs(self):
+    def test_csf_iris(self):
         mfs = MFS()
-        computed = mfs.fcbs(self.X, self.y, threshold=0.05).get_results()
+        expected = [3, 2, 0, 1]
+        computed = mfs.cfs(self.X_i, self.y_i).get_results()
+        self.assertListEqual(expected, computed)
+        expected = [
+            0.870521418179061,
+            0.8968651482682227,
+            0.5908278453318913,
+            0.40371971570693366,
+        ]
+        self.assertListEqual(expected, mfs.get_scores())
+
+    def test_fcbs_wine(self):
+        mfs = MFS()
+        computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.05).get_results()
         expected = [6, 9, 12, 0, 11, 4]
         self.assertListEqual(expected, computed)
         expected = [
@@ -47,3 +67,36 @@ class MFS_test(unittest.TestCase):
             0.24972405134844652,
         ]
         self.assertListEqual(expected, mfs.get_scores())
+
+    def test_fcbs_iris(self):
+        mfs = MFS()
+        computed = mfs.fcbs(self.X_i, self.y_i, threshold=0.05).get_results()
+        expected = [3, 2]
+        self.assertListEqual(expected, computed)
+        expected = [0.870521418179061, 0.810724587460511]
+        self.assertListEqual(expected, mfs.get_scores())
+
+    def test_compute_su_labels(self):
+        mfs = MFS()
+        mfs.fcbs(self.X_i, self.y_i, threshold=0.05)
+        expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
+        self.assertListEqual(expected, mfs._compute_su_labels().tolist())
+        mfs._su_labels = [1, 2, 3, 4]
+        self.assertListEqual([1, 2, 3, 4], mfs._compute_su_labels())
+
+    def test_invalid_threshold(self):
+        mfs = MFS()
+        with self.assertRaises(ValueError):
+            mfs.fcbs(self.X_i, self.y_i, threshold=1e-5)
+
+    def test_fcbs_exit_threshold(self):
+        mfs = MFS()
+        computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.4).get_results()
+        expected = [6, 9, 12]
+        self.assertListEqual(expected, computed)
+        expected = [
+            0.5218299405215557,
+            0.46224298637417455,
+            0.44518278979085646,
+        ]
+        self.assertListEqual(expected, mfs.get_scores())
diff --git a/setup.py b/setup.py
index 59e6c1e..ef24e0d 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@ def readme():
 
 def get_data(field: str):
     item = ""
-    with open("stree/__init__.py") as f:
+    with open("mfs/__init__.py") as f:
         for line in f.readlines():
             if line.startswith(f"__{field}__"):
                 delim = '"' if '"' in line else "'"
@@ -27,9 +27,9 @@ setuptools.setup(
     long_description=readme(),
     long_description_content_type="text/markdown",
     packages=setuptools.find_packages(),
-    url="https://github.com/Doctorado-ML/cfs#cfs",
+    url="https://github.com/Doctorado-ML/mfs#mfs",
     project_urls={
-        "Code": "https://github.com/Doctorado-ML/cfs",
+        "Code": "https://github.com/Doctorado-ML/mfs",
     },
     author=get_data("author"),
     author_email=get_data("author_email"),
@@ -43,6 +43,6 @@ setuptools.setup(
         "Intended Audience :: Science/Research",
     ],
     install_requires=["scikit-learn"],
-    test_suite="cfs.tests",
+    test_suite="mfs.tests",
     zip_safe=False,
 )