Complete implementation of both algorithms

Check results
Complete coverage tests
This commit is contained in:
2021-05-25 11:59:24 +02:00
parent 70560506f1
commit 17d44080f5
6 changed files with 112 additions and 52 deletions

View File

@@ -1,6 +1,6 @@
[run] [run]
branch = True branch = True
source = cfs source = mfs
[report] [report]
exclude_lines = exclude_lines =
@@ -10,4 +10,4 @@ exclude_lines =
if __name__ == .__main__.: if __name__ == .__main__.:
ignore_errors = True ignore_errors = True
omit = omit =
cfs/__init__.py mfs/__init__.py

View File

@@ -1,35 +1,36 @@
repos: repos:
- repo: https://github.com/ambv/black - repo: https://github.com/ambv/black
rev: stable rev: 20.8b1
hooks: hooks:
- id: black - id: black
exclude: ".virtual_documents"
language_version: python3.8 language_version: python3.8
- repo: https://gitlab.com/pycqa/flake8 - repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4 rev: 3.8.4
hooks: hooks:
- id: flake8 - id: flake8
#- repo: https://github.com/pre-commit/mirrors-mypy exclude: ".virtual_documents"
# rev: 'v0.782' # Use the sha / tag you want to point at # - repo: https://github.com/pre-commit/mirrors-mypy
# hooks: # rev: "v0.790" # Use the sha / tag you want to point at
# - id: mypy # hooks:
# args: [--strict] # - id: mypy
# # args: [--strict, --ignore-missing-imports]
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.3.0 rev: v3.4.0
hooks: hooks:
- id: trailing-whitespace - id: trailing-whitespace
- id: check-case-conflict - id: check-case-conflict
- id: check-ast - id: check-ast
- id: trailing-whitespace - id: trailing-whitespace
- repo: local - repo: local
hooks: hooks:
- id: tests - id: unittest
name: tests name: unittest
entry: python -m coverage run -m unittest discover
language: system language: system
entry: coverage run -m unittest
pass_filenames: false pass_filenames: false
- id: coverage - id: coverage
name: coverage name: coverage
entry: python -m coverage report -m --fail-under=100
language: system language: system
entry: coverage report -m --fail-under=100
pass_filenames: false pass_filenames: false

View File

@@ -11,9 +11,9 @@ deps: ## Install dependencies
pip install -r requirements.txt pip install -r requirements.txt
lint: ## Lint and static-check lint: ## Lint and static-check
black cfs black mfs
flake8 cfs flake8 mfs
mypy cfs mypy mfs
push: ## Push code with tags push: ## Push code with tags
git push && git push --tags git push && git push --tags

View File

@@ -1,4 +1,4 @@
from math import log from math import log, sqrt
from sys import float_info from sys import float_info
from itertools import combinations from itertools import combinations
import numpy as np import numpy as np
@@ -145,7 +145,7 @@ class MFS:
k = len(features) k = len(features)
for pair in list(combinations(features, 2)): for pair in list(combinations(features, 2)):
rff += self._compute_su_features(*pair) rff += self._compute_su_features(*pair)
return rcf / ((k ** 2 - k) * rff) return rcf / sqrt(k + (k ** 2 - k) * rff)
def cfs(self, X, y): def cfs(self, X, y):
"""CFS forward best first heuristic search """CFS forward best first heuristic search
@@ -161,34 +161,41 @@ class MFS:
self.X_ = X self.X_ = X
self.y_ = y self.y_ = y
s_list = self._compute_su_labels() s_list = self._compute_su_labels()
# Descending orders # Descending order
feature_order = (-s_list).argsort().tolist() feature_order = (-s_list).argsort().tolist()
merit = float_info.min continue_condition = True
exit_condition = 0
candidates = [] candidates = []
# start with the best feature (max symmetrical uncertainty wrt label) # start with the best feature (max symmetrical uncertainty wrt label)
first_candidate = feature_order.pop(0) first_candidate = feature_order.pop(0)
candidates.append(first_candidate) candidates.append(first_candidate)
self._scores.append(s_list[first_candidate]) self._scores.append(s_list[first_candidate])
while exit_condition < 5: # as proposed in the original algorithm while continue_condition:
id_selected = -1 merit = float_info.min
id_selected = None
for idx, feature in enumerate(feature_order): for idx, feature in enumerate(feature_order):
candidates.append(feature) candidates.append(feature)
merit_new = self._compute_merit(candidates) merit_new = self._compute_merit(candidates)
if merit_new > merit: if merit_new > merit:
id_selected = idx id_selected = idx
merit = merit_new merit = merit_new
exit_condition = 0
candidates.pop() candidates.pop()
if id_selected == -1: candidates.append(feature_order[id_selected])
exit_condition += 1 self._scores.append(merit)
else: del feature_order[id_selected]
candidates.append(feature_order[id_selected])
self._scores.append(merit_new)
del feature_order[id_selected]
if len(feature_order) == 0: if len(feature_order) == 0:
# Force leaving the loop # Force leaving the loop
exit_condition = 5 continue_condition = False
if len(self._scores) >= 5:
item_ant = -1
for item in self._scores[-5:]:
if item_ant == -1:
item_ant = item
if item > item_ant:
break
else:
item_ant = item
else:
continue_condition = False
self._result = candidates self._result = candidates
return self return self
@@ -213,7 +220,6 @@ class MFS:
break break
# Remove redundant features # Remove redundant features
for index_q in feature_dup: for index_q in feature_dup:
# test if feature(index_q) su with feature(index_p) is
su_pq = self._compute_su_features(index_p, index_q) su_pq = self._compute_su_features(index_p, index_q)
if su_pq >= s_list[index_q]: if su_pq >= s_list[index_q]:
# remove feature from list # remove feature from list

View File

@@ -1,6 +1,6 @@
import unittest import unittest
from mdlp import MDLP from mdlp import MDLP
from sklearn.datasets import load_wine from sklearn.datasets import load_wine, load_iris
from ..Selection import MFS from ..Selection import MFS
@@ -9,33 +9,53 @@ class MFS_test(unittest.TestCase):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
mdlp = MDLP(random_state=1) mdlp = MDLP(random_state=1)
X, self.y = load_wine(return_X_y=True) X, self.y_w = load_wine(return_X_y=True)
self.X = mdlp.fit_transform(X, self.y).astype("int64") self.X_w = mdlp.fit_transform(X, self.y_w).astype("int64")
self.m, self.n = self.X.shape X, self.y_i = load_iris(return_X_y=True)
mdlp = MDLP(random_state=1)
# @classmethod self.X_i = mdlp.fit_transform(X, self.y_i).astype("int64")
# def setup(cls):
# pass
def test_initialize(self): def test_initialize(self):
mfs = MFS() mfs = MFS()
mfs.fcbs(self.X, self.y, 0.05) mfs.fcbs(self.X_w, self.y_w, 0.05)
mfs._initialize() mfs._initialize()
self.assertIsNone(mfs.get_results()) self.assertIsNone(mfs.get_results())
self.assertListEqual([], mfs.get_scores()) self.assertListEqual([], mfs.get_scores())
self.assertDictEqual({}, mfs._su_features) self.assertDictEqual({}, mfs._su_features)
self.assertIsNone(mfs._su_labels) self.assertIsNone(mfs._su_labels)
def test_csf(self): def test_csf_wine(self):
mfs = MFS() mfs = MFS()
expected = [6, 4] expected = [6, 12, 9, 4, 10, 0]
self.assertListEqual(expected, mfs.cfs(self.X, self.y).get_results()) self.assertListEqual(
expected = [0.5218299405215557, 2.4168234005280964] expected, mfs.cfs(self.X_w, self.y_w).get_results()
)
expected = [
0.5218299405215557,
0.602513857132804,
0.4877384978817362,
0.3743688234383051,
0.28795671854246285,
0.2309165735173175,
]
self.assertListEqual(expected, mfs.get_scores()) self.assertListEqual(expected, mfs.get_scores())
def test_fcbs(self): def test_csf_iris(self):
mfs = MFS() mfs = MFS()
computed = mfs.fcbs(self.X, self.y, threshold=0.05).get_results() expected = [3, 2, 0, 1]
computed = mfs.cfs(self.X_i, self.y_i).get_results()
self.assertListEqual(expected, computed)
expected = [
0.870521418179061,
0.8968651482682227,
0.5908278453318913,
0.40371971570693366,
]
self.assertListEqual(expected, mfs.get_scores())
def test_fcbs_wine(self):
mfs = MFS()
computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.05).get_results()
expected = [6, 9, 12, 0, 11, 4] expected = [6, 9, 12, 0, 11, 4]
self.assertListEqual(expected, computed) self.assertListEqual(expected, computed)
expected = [ expected = [
@@ -47,3 +67,36 @@ class MFS_test(unittest.TestCase):
0.24972405134844652, 0.24972405134844652,
] ]
self.assertListEqual(expected, mfs.get_scores()) self.assertListEqual(expected, mfs.get_scores())
def test_fcbs_iris(self):
mfs = MFS()
computed = mfs.fcbs(self.X_i, self.y_i, threshold=0.05).get_results()
expected = [3, 2]
self.assertListEqual(expected, computed)
expected = [0.870521418179061, 0.810724587460511]
self.assertListEqual(expected, mfs.get_scores())
def test_compute_su_labels(self):
mfs = MFS()
mfs.fcbs(self.X_i, self.y_i, threshold=0.05)
expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
self.assertListEqual(expected, mfs._compute_su_labels().tolist())
mfs._su_labels = [1, 2, 3, 4]
self.assertListEqual([1, 2, 3, 4], mfs._compute_su_labels())
def test_invalid_threshold(self):
mfs = MFS()
with self.assertRaises(ValueError):
mfs.fcbs(self.X_i, self.y_i, threshold=1e-5)
def test_fcbs_exit_threshold(self):
mfs = MFS()
computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.4).get_results()
expected = [6, 9, 12]
self.assertListEqual(expected, computed)
expected = [
0.5218299405215557,
0.46224298637417455,
0.44518278979085646,
]
self.assertListEqual(expected, mfs.get_scores())

View File

@@ -8,7 +8,7 @@ def readme():
def get_data(field: str): def get_data(field: str):
item = "" item = ""
with open("stree/__init__.py") as f: with open("mfs/__init__.py") as f:
for line in f.readlines(): for line in f.readlines():
if line.startswith(f"__{field}__"): if line.startswith(f"__{field}__"):
delim = '"' if '"' in line else "'" delim = '"' if '"' in line else "'"
@@ -27,9 +27,9 @@ setuptools.setup(
long_description=readme(), long_description=readme(),
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
packages=setuptools.find_packages(), packages=setuptools.find_packages(),
url="https://github.com/Doctorado-ML/cfs#cfs", url="https://github.com/Doctorado-ML/mfs#mfs",
project_urls={ project_urls={
"Code": "https://github.com/Doctorado-ML/cfs", "Code": "https://github.com/Doctorado-ML/mfs",
}, },
author=get_data("author"), author=get_data("author"),
author_email=get_data("author_email"), author_email=get_data("author_email"),
@@ -43,6 +43,6 @@ setuptools.setup(
"Intended Audience :: Science/Research", "Intended Audience :: Science/Research",
], ],
install_requires=["scikit-learn"], install_requires=["scikit-learn"],
test_suite="cfs.tests", test_suite="mfs.tests",
zip_safe=False, zip_safe=False,
) )