From 17d44080f57d52f27350417ac4661ebfc81dc3d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Tue, 25 May 2021 11:59:24 +0200 Subject: [PATCH] Complete implementation of both algorithms Check results Complete coverage tests --- .coveragerc | 4 +- .pre-commit-config.yaml | 25 +++++++------ Makefile | 6 +-- mfs/Selection.py | 38 +++++++++++-------- mfs/tests/MFS_test.py | 83 +++++++++++++++++++++++++++++++++-------- setup.py | 8 ++-- 6 files changed, 112 insertions(+), 52 deletions(-) diff --git a/.coveragerc b/.coveragerc index b08bab6..6e37457 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,6 @@ [run] branch = True -source = cfs +source = mfs [report] exclude_lines = @@ -10,4 +10,4 @@ exclude_lines = if __name__ == .__main__.: ignore_errors = True omit = - cfs/__init__.py \ No newline at end of file + mfs/__init__.py \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7152d26..2a87625 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,35 +1,36 @@ repos: - repo: https://github.com/ambv/black - rev: stable + rev: 20.8b1 hooks: - id: black + exclude: ".virtual_documents" language_version: python3.8 - repo: https://gitlab.com/pycqa/flake8 rev: 3.8.4 hooks: - id: flake8 - #- repo: https://github.com/pre-commit/mirrors-mypy - # rev: 'v0.782' # Use the sha / tag you want to point at - # hooks: - # - id: mypy - # args: [--strict] + exclude: ".virtual_documents" + # - repo: https://github.com/pre-commit/mirrors-mypy + # rev: "v0.790" # Use the sha / tag you want to point at + # hooks: + # - id: mypy + # # args: [--strict, --ignore-missing-imports] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.3.0 + rev: v3.4.0 hooks: - id: trailing-whitespace - id: check-case-conflict - id: check-ast - id: trailing-whitespace - - repo: local hooks: - - id: tests - name: tests + - id: unittest + name: unittest + entry: python -m coverage run -m unittest discover language: system - entry: coverage run -m unittest pass_filenames: false - id: coverage name: coverage + entry: python -m coverage report -m --fail-under=100 language: system - entry: coverage report -m --fail-under=100 pass_filenames: false diff --git a/Makefile b/Makefile index 100c82b..489c37c 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,9 @@ deps: ## Install dependencies pip install -r requirements.txt lint: ## Lint and static-check - black cfs - flake8 cfs - mypy cfs + black mfs + flake8 mfs + mypy mfs push: ## Push code with tags git push && git push --tags diff --git a/mfs/Selection.py b/mfs/Selection.py index d13e02f..c265a98 100755 --- a/mfs/Selection.py +++ b/mfs/Selection.py @@ -1,4 +1,4 @@ -from math import log +from math import log, sqrt from sys import float_info from itertools import combinations import numpy as np @@ -145,7 +145,7 @@ class MFS: k = len(features) for pair in list(combinations(features, 2)): rff += self._compute_su_features(*pair) - return rcf / ((k ** 2 - k) * rff) + return rcf / sqrt(k + (k ** 2 - k) * rff) def cfs(self, X, y): """CFS forward best first heuristic search @@ -161,34 +161,41 @@ class MFS: self.X_ = X self.y_ = y s_list = self._compute_su_labels() - # Descending orders + # Descending order feature_order = (-s_list).argsort().tolist() - merit = float_info.min - exit_condition = 0 + continue_condition = True candidates = [] # start with the best feature (max symmetrical uncertainty wrt label) first_candidate = feature_order.pop(0) candidates.append(first_candidate) self._scores.append(s_list[first_candidate]) - while exit_condition < 5: # as proposed in the original algorithm - id_selected = -1 + while continue_condition: + merit = float_info.min + id_selected = None for idx, feature in enumerate(feature_order): candidates.append(feature) merit_new = self._compute_merit(candidates) if merit_new > merit: id_selected = idx merit = merit_new - exit_condition = 0 candidates.pop() - if id_selected == -1: - exit_condition += 1 - else: - candidates.append(feature_order[id_selected]) - self._scores.append(merit_new) - del feature_order[id_selected] + candidates.append(feature_order[id_selected]) + self._scores.append(merit) + del feature_order[id_selected] if len(feature_order) == 0: # Force leaving the loop - exit_condition = 5 + continue_condition = False + if len(self._scores) >= 5: + item_ant = -1 + for item in self._scores[-5:]: + if item_ant == -1: + item_ant = item + if item > item_ant: + break + else: + item_ant = item + else: + continue_condition = False self._result = candidates return self @@ -213,7 +220,6 @@ class MFS: break # Remove redundant features for index_q in feature_dup: - # test if feature(index_q) su with feature(index_p) is su_pq = self._compute_su_features(index_p, index_q) if su_pq >= s_list[index_q]: # remove feature from list diff --git a/mfs/tests/MFS_test.py b/mfs/tests/MFS_test.py index 2991ad2..1203116 100755 --- a/mfs/tests/MFS_test.py +++ b/mfs/tests/MFS_test.py @@ -1,6 +1,6 @@ import unittest from mdlp import MDLP -from sklearn.datasets import load_wine +from sklearn.datasets import load_wine, load_iris from ..Selection import MFS @@ -9,33 +9,53 @@ class MFS_test(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) mdlp = MDLP(random_state=1) - X, self.y = load_wine(return_X_y=True) - self.X = mdlp.fit_transform(X, self.y).astype("int64") - self.m, self.n = self.X.shape - - # @classmethod - # def setup(cls): - # pass + X, self.y_w = load_wine(return_X_y=True) + self.X_w = mdlp.fit_transform(X, self.y_w).astype("int64") + X, self.y_i = load_iris(return_X_y=True) + mdlp = MDLP(random_state=1) + self.X_i = mdlp.fit_transform(X, self.y_i).astype("int64") def test_initialize(self): mfs = MFS() - mfs.fcbs(self.X, self.y, 0.05) + mfs.fcbs(self.X_w, self.y_w, 0.05) mfs._initialize() self.assertIsNone(mfs.get_results()) self.assertListEqual([], mfs.get_scores()) self.assertDictEqual({}, mfs._su_features) self.assertIsNone(mfs._su_labels) - def test_csf(self): + def test_csf_wine(self): mfs = MFS() - expected = [6, 4] - self.assertListEqual(expected, mfs.cfs(self.X, self.y).get_results()) - expected = [0.5218299405215557, 2.4168234005280964] + expected = [6, 12, 9, 4, 10, 0] + self.assertListEqual( + expected, mfs.cfs(self.X_w, self.y_w).get_results() + ) + expected = [ + 0.5218299405215557, + 0.602513857132804, + 0.4877384978817362, + 0.3743688234383051, + 0.28795671854246285, + 0.2309165735173175, + ] self.assertListEqual(expected, mfs.get_scores()) - def test_fcbs(self): + def test_csf_iris(self): mfs = MFS() - computed = mfs.fcbs(self.X, self.y, threshold=0.05).get_results() + expected = [3, 2, 0, 1] + computed = mfs.cfs(self.X_i, self.y_i).get_results() + self.assertListEqual(expected, computed) + expected = [ + 0.870521418179061, + 0.8968651482682227, + 0.5908278453318913, + 0.40371971570693366, + ] + self.assertListEqual(expected, mfs.get_scores()) + + def test_fcbs_wine(self): + mfs = MFS() + computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.05).get_results() expected = [6, 9, 12, 0, 11, 4] self.assertListEqual(expected, computed) expected = [ @@ -47,3 +67,36 @@ class MFS_test(unittest.TestCase): 0.24972405134844652, ] self.assertListEqual(expected, mfs.get_scores()) + + def test_fcbs_iris(self): + mfs = MFS() + computed = mfs.fcbs(self.X_i, self.y_i, threshold=0.05).get_results() + expected = [3, 2] + self.assertListEqual(expected, computed) + expected = [0.870521418179061, 0.810724587460511] + self.assertListEqual(expected, mfs.get_scores()) + + def test_compute_su_labels(self): + mfs = MFS() + mfs.fcbs(self.X_i, self.y_i, threshold=0.05) + expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061] + self.assertListEqual(expected, mfs._compute_su_labels().tolist()) + mfs._su_labels = [1, 2, 3, 4] + self.assertListEqual([1, 2, 3, 4], mfs._compute_su_labels()) + + def test_invalid_threshold(self): + mfs = MFS() + with self.assertRaises(ValueError): + mfs.fcbs(self.X_i, self.y_i, threshold=1e-5) + + def test_fcbs_exit_threshold(self): + mfs = MFS() + computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.4).get_results() + expected = [6, 9, 12] + self.assertListEqual(expected, computed) + expected = [ + 0.5218299405215557, + 0.46224298637417455, + 0.44518278979085646, + ] + self.assertListEqual(expected, mfs.get_scores()) diff --git a/setup.py b/setup.py index 59e6c1e..ef24e0d 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ def readme(): def get_data(field: str): item = "" - with open("stree/__init__.py") as f: + with open("mfs/__init__.py") as f: for line in f.readlines(): if line.startswith(f"__{field}__"): delim = '"' if '"' in line else "'" @@ -27,9 +27,9 @@ setuptools.setup( long_description=readme(), long_description_content_type="text/markdown", packages=setuptools.find_packages(), - url="https://github.com/Doctorado-ML/cfs#cfs", + url="https://github.com/Doctorado-ML/mfs#mfs", project_urls={ - "Code": "https://github.com/Doctorado-ML/cfs", + "Code": "https://github.com/Doctorado-ML/mfs", }, author=get_data("author"), author_email=get_data("author_email"), @@ -43,6 +43,6 @@ setuptools.setup( "Intended Audience :: Science/Research", ], install_requires=["scikit-learn"], - test_suite="cfs.tests", + test_suite="mfs.tests", zip_safe=False, )