mirror of
https://github.com/Doctorado-ML/mufs.git
synced 2025-08-16 08:05:56 +00:00
Complete implementation of both algorithms
Check results Complete coverage tests
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
[run]
|
[run]
|
||||||
branch = True
|
branch = True
|
||||||
source = cfs
|
source = mfs
|
||||||
|
|
||||||
[report]
|
[report]
|
||||||
exclude_lines =
|
exclude_lines =
|
||||||
@@ -10,4 +10,4 @@ exclude_lines =
|
|||||||
if __name__ == .__main__.:
|
if __name__ == .__main__.:
|
||||||
ignore_errors = True
|
ignore_errors = True
|
||||||
omit =
|
omit =
|
||||||
cfs/__init__.py
|
mfs/__init__.py
|
@@ -1,35 +1,36 @@
|
|||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/ambv/black
|
- repo: https://github.com/ambv/black
|
||||||
rev: stable
|
rev: 20.8b1
|
||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
|
exclude: ".virtual_documents"
|
||||||
language_version: python3.8
|
language_version: python3.8
|
||||||
- repo: https://gitlab.com/pycqa/flake8
|
- repo: https://gitlab.com/pycqa/flake8
|
||||||
rev: 3.8.4
|
rev: 3.8.4
|
||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
#- repo: https://github.com/pre-commit/mirrors-mypy
|
exclude: ".virtual_documents"
|
||||||
# rev: 'v0.782' # Use the sha / tag you want to point at
|
# - repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
# hooks:
|
# rev: "v0.790" # Use the sha / tag you want to point at
|
||||||
# - id: mypy
|
# hooks:
|
||||||
# args: [--strict]
|
# - id: mypy
|
||||||
|
# # args: [--strict, --ignore-missing-imports]
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v3.3.0
|
rev: v3.4.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
- id: check-case-conflict
|
- id: check-case-conflict
|
||||||
- id: check-ast
|
- id: check-ast
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
|
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
- id: tests
|
- id: unittest
|
||||||
name: tests
|
name: unittest
|
||||||
|
entry: python -m coverage run -m unittest discover
|
||||||
language: system
|
language: system
|
||||||
entry: coverage run -m unittest
|
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
- id: coverage
|
- id: coverage
|
||||||
name: coverage
|
name: coverage
|
||||||
|
entry: python -m coverage report -m --fail-under=100
|
||||||
language: system
|
language: system
|
||||||
entry: coverage report -m --fail-under=100
|
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
|
6
Makefile
6
Makefile
@@ -11,9 +11,9 @@ deps: ## Install dependencies
|
|||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
lint: ## Lint and static-check
|
lint: ## Lint and static-check
|
||||||
black cfs
|
black mfs
|
||||||
flake8 cfs
|
flake8 mfs
|
||||||
mypy cfs
|
mypy mfs
|
||||||
|
|
||||||
push: ## Push code with tags
|
push: ## Push code with tags
|
||||||
git push && git push --tags
|
git push && git push --tags
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
from math import log
|
from math import log, sqrt
|
||||||
from sys import float_info
|
from sys import float_info
|
||||||
from itertools import combinations
|
from itertools import combinations
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -145,7 +145,7 @@ class MFS:
|
|||||||
k = len(features)
|
k = len(features)
|
||||||
for pair in list(combinations(features, 2)):
|
for pair in list(combinations(features, 2)):
|
||||||
rff += self._compute_su_features(*pair)
|
rff += self._compute_su_features(*pair)
|
||||||
return rcf / ((k ** 2 - k) * rff)
|
return rcf / sqrt(k + (k ** 2 - k) * rff)
|
||||||
|
|
||||||
def cfs(self, X, y):
|
def cfs(self, X, y):
|
||||||
"""CFS forward best first heuristic search
|
"""CFS forward best first heuristic search
|
||||||
@@ -161,34 +161,41 @@ class MFS:
|
|||||||
self.X_ = X
|
self.X_ = X
|
||||||
self.y_ = y
|
self.y_ = y
|
||||||
s_list = self._compute_su_labels()
|
s_list = self._compute_su_labels()
|
||||||
# Descending orders
|
# Descending order
|
||||||
feature_order = (-s_list).argsort().tolist()
|
feature_order = (-s_list).argsort().tolist()
|
||||||
merit = float_info.min
|
continue_condition = True
|
||||||
exit_condition = 0
|
|
||||||
candidates = []
|
candidates = []
|
||||||
# start with the best feature (max symmetrical uncertainty wrt label)
|
# start with the best feature (max symmetrical uncertainty wrt label)
|
||||||
first_candidate = feature_order.pop(0)
|
first_candidate = feature_order.pop(0)
|
||||||
candidates.append(first_candidate)
|
candidates.append(first_candidate)
|
||||||
self._scores.append(s_list[first_candidate])
|
self._scores.append(s_list[first_candidate])
|
||||||
while exit_condition < 5: # as proposed in the original algorithm
|
while continue_condition:
|
||||||
id_selected = -1
|
merit = float_info.min
|
||||||
|
id_selected = None
|
||||||
for idx, feature in enumerate(feature_order):
|
for idx, feature in enumerate(feature_order):
|
||||||
candidates.append(feature)
|
candidates.append(feature)
|
||||||
merit_new = self._compute_merit(candidates)
|
merit_new = self._compute_merit(candidates)
|
||||||
if merit_new > merit:
|
if merit_new > merit:
|
||||||
id_selected = idx
|
id_selected = idx
|
||||||
merit = merit_new
|
merit = merit_new
|
||||||
exit_condition = 0
|
|
||||||
candidates.pop()
|
candidates.pop()
|
||||||
if id_selected == -1:
|
candidates.append(feature_order[id_selected])
|
||||||
exit_condition += 1
|
self._scores.append(merit)
|
||||||
else:
|
del feature_order[id_selected]
|
||||||
candidates.append(feature_order[id_selected])
|
|
||||||
self._scores.append(merit_new)
|
|
||||||
del feature_order[id_selected]
|
|
||||||
if len(feature_order) == 0:
|
if len(feature_order) == 0:
|
||||||
# Force leaving the loop
|
# Force leaving the loop
|
||||||
exit_condition = 5
|
continue_condition = False
|
||||||
|
if len(self._scores) >= 5:
|
||||||
|
item_ant = -1
|
||||||
|
for item in self._scores[-5:]:
|
||||||
|
if item_ant == -1:
|
||||||
|
item_ant = item
|
||||||
|
if item > item_ant:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
item_ant = item
|
||||||
|
else:
|
||||||
|
continue_condition = False
|
||||||
self._result = candidates
|
self._result = candidates
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@@ -213,7 +220,6 @@ class MFS:
|
|||||||
break
|
break
|
||||||
# Remove redundant features
|
# Remove redundant features
|
||||||
for index_q in feature_dup:
|
for index_q in feature_dup:
|
||||||
# test if feature(index_q) su with feature(index_p) is
|
|
||||||
su_pq = self._compute_su_features(index_p, index_q)
|
su_pq = self._compute_su_features(index_p, index_q)
|
||||||
if su_pq >= s_list[index_q]:
|
if su_pq >= s_list[index_q]:
|
||||||
# remove feature from list
|
# remove feature from list
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from mdlp import MDLP
|
from mdlp import MDLP
|
||||||
from sklearn.datasets import load_wine
|
from sklearn.datasets import load_wine, load_iris
|
||||||
|
|
||||||
from ..Selection import MFS
|
from ..Selection import MFS
|
||||||
|
|
||||||
@@ -9,33 +9,53 @@ class MFS_test(unittest.TestCase):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
mdlp = MDLP(random_state=1)
|
mdlp = MDLP(random_state=1)
|
||||||
X, self.y = load_wine(return_X_y=True)
|
X, self.y_w = load_wine(return_X_y=True)
|
||||||
self.X = mdlp.fit_transform(X, self.y).astype("int64")
|
self.X_w = mdlp.fit_transform(X, self.y_w).astype("int64")
|
||||||
self.m, self.n = self.X.shape
|
X, self.y_i = load_iris(return_X_y=True)
|
||||||
|
mdlp = MDLP(random_state=1)
|
||||||
# @classmethod
|
self.X_i = mdlp.fit_transform(X, self.y_i).astype("int64")
|
||||||
# def setup(cls):
|
|
||||||
# pass
|
|
||||||
|
|
||||||
def test_initialize(self):
|
def test_initialize(self):
|
||||||
mfs = MFS()
|
mfs = MFS()
|
||||||
mfs.fcbs(self.X, self.y, 0.05)
|
mfs.fcbs(self.X_w, self.y_w, 0.05)
|
||||||
mfs._initialize()
|
mfs._initialize()
|
||||||
self.assertIsNone(mfs.get_results())
|
self.assertIsNone(mfs.get_results())
|
||||||
self.assertListEqual([], mfs.get_scores())
|
self.assertListEqual([], mfs.get_scores())
|
||||||
self.assertDictEqual({}, mfs._su_features)
|
self.assertDictEqual({}, mfs._su_features)
|
||||||
self.assertIsNone(mfs._su_labels)
|
self.assertIsNone(mfs._su_labels)
|
||||||
|
|
||||||
def test_csf(self):
|
def test_csf_wine(self):
|
||||||
mfs = MFS()
|
mfs = MFS()
|
||||||
expected = [6, 4]
|
expected = [6, 12, 9, 4, 10, 0]
|
||||||
self.assertListEqual(expected, mfs.cfs(self.X, self.y).get_results())
|
self.assertListEqual(
|
||||||
expected = [0.5218299405215557, 2.4168234005280964]
|
expected, mfs.cfs(self.X_w, self.y_w).get_results()
|
||||||
|
)
|
||||||
|
expected = [
|
||||||
|
0.5218299405215557,
|
||||||
|
0.602513857132804,
|
||||||
|
0.4877384978817362,
|
||||||
|
0.3743688234383051,
|
||||||
|
0.28795671854246285,
|
||||||
|
0.2309165735173175,
|
||||||
|
]
|
||||||
self.assertListEqual(expected, mfs.get_scores())
|
self.assertListEqual(expected, mfs.get_scores())
|
||||||
|
|
||||||
def test_fcbs(self):
|
def test_csf_iris(self):
|
||||||
mfs = MFS()
|
mfs = MFS()
|
||||||
computed = mfs.fcbs(self.X, self.y, threshold=0.05).get_results()
|
expected = [3, 2, 0, 1]
|
||||||
|
computed = mfs.cfs(self.X_i, self.y_i).get_results()
|
||||||
|
self.assertListEqual(expected, computed)
|
||||||
|
expected = [
|
||||||
|
0.870521418179061,
|
||||||
|
0.8968651482682227,
|
||||||
|
0.5908278453318913,
|
||||||
|
0.40371971570693366,
|
||||||
|
]
|
||||||
|
self.assertListEqual(expected, mfs.get_scores())
|
||||||
|
|
||||||
|
def test_fcbs_wine(self):
|
||||||
|
mfs = MFS()
|
||||||
|
computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.05).get_results()
|
||||||
expected = [6, 9, 12, 0, 11, 4]
|
expected = [6, 9, 12, 0, 11, 4]
|
||||||
self.assertListEqual(expected, computed)
|
self.assertListEqual(expected, computed)
|
||||||
expected = [
|
expected = [
|
||||||
@@ -47,3 +67,36 @@ class MFS_test(unittest.TestCase):
|
|||||||
0.24972405134844652,
|
0.24972405134844652,
|
||||||
]
|
]
|
||||||
self.assertListEqual(expected, mfs.get_scores())
|
self.assertListEqual(expected, mfs.get_scores())
|
||||||
|
|
||||||
|
def test_fcbs_iris(self):
|
||||||
|
mfs = MFS()
|
||||||
|
computed = mfs.fcbs(self.X_i, self.y_i, threshold=0.05).get_results()
|
||||||
|
expected = [3, 2]
|
||||||
|
self.assertListEqual(expected, computed)
|
||||||
|
expected = [0.870521418179061, 0.810724587460511]
|
||||||
|
self.assertListEqual(expected, mfs.get_scores())
|
||||||
|
|
||||||
|
def test_compute_su_labels(self):
|
||||||
|
mfs = MFS()
|
||||||
|
mfs.fcbs(self.X_i, self.y_i, threshold=0.05)
|
||||||
|
expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
|
||||||
|
self.assertListEqual(expected, mfs._compute_su_labels().tolist())
|
||||||
|
mfs._su_labels = [1, 2, 3, 4]
|
||||||
|
self.assertListEqual([1, 2, 3, 4], mfs._compute_su_labels())
|
||||||
|
|
||||||
|
def test_invalid_threshold(self):
|
||||||
|
mfs = MFS()
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
mfs.fcbs(self.X_i, self.y_i, threshold=1e-5)
|
||||||
|
|
||||||
|
def test_fcbs_exit_threshold(self):
|
||||||
|
mfs = MFS()
|
||||||
|
computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.4).get_results()
|
||||||
|
expected = [6, 9, 12]
|
||||||
|
self.assertListEqual(expected, computed)
|
||||||
|
expected = [
|
||||||
|
0.5218299405215557,
|
||||||
|
0.46224298637417455,
|
||||||
|
0.44518278979085646,
|
||||||
|
]
|
||||||
|
self.assertListEqual(expected, mfs.get_scores())
|
||||||
|
8
setup.py
8
setup.py
@@ -8,7 +8,7 @@ def readme():
|
|||||||
|
|
||||||
def get_data(field: str):
|
def get_data(field: str):
|
||||||
item = ""
|
item = ""
|
||||||
with open("stree/__init__.py") as f:
|
with open("mfs/__init__.py") as f:
|
||||||
for line in f.readlines():
|
for line in f.readlines():
|
||||||
if line.startswith(f"__{field}__"):
|
if line.startswith(f"__{field}__"):
|
||||||
delim = '"' if '"' in line else "'"
|
delim = '"' if '"' in line else "'"
|
||||||
@@ -27,9 +27,9 @@ setuptools.setup(
|
|||||||
long_description=readme(),
|
long_description=readme(),
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
packages=setuptools.find_packages(),
|
packages=setuptools.find_packages(),
|
||||||
url="https://github.com/Doctorado-ML/cfs#cfs",
|
url="https://github.com/Doctorado-ML/mfs#mfs",
|
||||||
project_urls={
|
project_urls={
|
||||||
"Code": "https://github.com/Doctorado-ML/cfs",
|
"Code": "https://github.com/Doctorado-ML/mfs",
|
||||||
},
|
},
|
||||||
author=get_data("author"),
|
author=get_data("author"),
|
||||||
author_email=get_data("author_email"),
|
author_email=get_data("author_email"),
|
||||||
@@ -43,6 +43,6 @@ setuptools.setup(
|
|||||||
"Intended Audience :: Science/Research",
|
"Intended Audience :: Science/Research",
|
||||||
],
|
],
|
||||||
install_requires=["scikit-learn"],
|
install_requires=["scikit-learn"],
|
||||||
test_suite="cfs.tests",
|
test_suite="mfs.tests",
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user