Add max_features to MFS to help STree integration

This commit is contained in:
2021-05-29 01:24:37 +02:00
parent ee5020e6d9
commit 39fbdf73a7
2 changed files with 75 additions and 39 deletions

View File

@@ -109,10 +109,16 @@ class MFS:
Correlated Feature Selection as in "Correlation-based Feature Selection for
Machine Learning" by Mark A. Hall
Parameters
----------
max_features: int
The maximum number of features to return
"""
def __init__(self):
def __init__(self, max_features):
self._initialize()
self._max_features = max_features
def _initialize(self):
"""Initialize the attributes so support multiple calls using same
@@ -180,8 +186,8 @@ class MFS:
"""
# lgtm has already recognized that this is a false positive
rcf = self._su_labels[
features
].sum() # lgtm [py/hash-unhashable-value]
features # lgtm [py/hash-unhashable-value]
].sum()
rff = 0.0
k = len(features)
for pair in list(combinations(features, 2)):
@@ -229,7 +235,10 @@ class MFS:
candidates.append(feature_order[id_selected])
self._scores.append(merit)
del feature_order[id_selected]
if len(feature_order) == 0:
if (
len(feature_order) == 0
or len(candidates) == self._max_features
):
# Force leaving the loop
continue_condition = False
if len(self._scores) >= 5:
@@ -253,7 +262,7 @@ class MFS:
self._result = candidates
return self
def fcbs(self, X, y, threshold):
def fcbf(self, X, y, threshold):
"""Fast Correlation-Based Filter
Parameters
@@ -273,10 +282,10 @@ class MFS:
Raises
------
ValueError
if the threshold is less than a selected value of 1e-4
if the threshold is less than a selected value of 1e-7
"""
if threshold < 1e-4:
raise ValueError("Threshold cannot be less than 1e-4")
if threshold < 1e-7:
raise ValueError("Threshold cannot be less than 1e-7")
self._initialize()
self.X_ = X
self.y_ = y
@@ -301,6 +310,8 @@ class MFS:
s_list[index_q] = 0.0
self._result.append(index_p)
self._scores.append(s_list[index_p])
if len(self._result) == self._max_features:
break
return self
def get_results(self):

View File

@@ -21,8 +21,8 @@ class MFS_test(unittest.TestCase):
self.assertAlmostEqual(a, b, tol)
def test_initialize(self):
mfs = MFS()
mfs.fcbs(self.X_w, self.y_w, 0.05)
mfs = MFS(max_features=100)
mfs.fcbf(self.X_w, self.y_w, 0.05)
mfs._initialize()
self.assertIsNone(mfs.get_results())
self.assertListEqual([], mfs.get_scores())
@@ -30,7 +30,7 @@ class MFS_test(unittest.TestCase):
self.assertIsNone(mfs._su_labels)
def test_csf_wine(self):
mfs = MFS()
mfs = MFS(max_features=100)
expected = [6, 12, 9, 4, 10, 0]
self.assertListAlmostEqual(
expected, mfs.cfs(self.X_w, self.y_w).get_results()
@@ -45,8 +45,21 @@ class MFS_test(unittest.TestCase):
]
self.assertListAlmostEqual(expected, mfs.get_scores())
def test_csf_max_features(self):
mfs = MFS(max_features=3)
expected = [6, 12, 9]
self.assertListAlmostEqual(
expected, mfs.cfs(self.X_w, self.y_w).get_results()
)
expected = [
0.5218299405215557,
0.602513857132804,
0.4877384978817362,
]
self.assertListAlmostEqual(expected, mfs.get_scores())
def test_csf_iris(self):
mfs = MFS()
mfs = MFS(max_features=100)
expected = [3, 2, 0, 1]
computed = mfs.cfs(self.X_i, self.y_i).get_results()
self.assertListAlmostEqual(expected, computed)
@@ -58,9 +71,9 @@ class MFS_test(unittest.TestCase):
]
self.assertListAlmostEqual(expected, mfs.get_scores())
def test_fcbs_wine(self):
mfs = MFS()
computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.05).get_results()
def test_fcbf_wine(self):
mfs = MFS(max_features=100)
computed = mfs.fcbf(self.X_w, self.y_w, threshold=0.05).get_results()
expected = [6, 9, 12, 0, 11, 4]
self.assertListAlmostEqual(expected, computed)
expected = [
@@ -73,30 +86,42 @@ class MFS_test(unittest.TestCase):
]
self.assertListAlmostEqual(expected, mfs.get_scores())
def test_fcbs_iris(self):
mfs = MFS()
computed = mfs.fcbs(self.X_i, self.y_i, threshold=0.05).get_results()
expected = [3, 2]
self.assertListAlmostEqual(expected, computed)
expected = [0.870521418179061, 0.810724587460511]
self.assertListAlmostEqual(expected, mfs.get_scores())
def test_compute_su_labels(self):
mfs = MFS()
mfs.fcbs(self.X_i, self.y_i, threshold=0.05)
expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
self.assertListAlmostEqual(expected, mfs._compute_su_labels().tolist())
mfs._su_labels = [1, 2, 3, 4]
self.assertListAlmostEqual([1, 2, 3, 4], mfs._compute_su_labels())
def test_invalid_threshold(self):
mfs = MFS()
with self.assertRaises(ValueError):
mfs.fcbs(self.X_i, self.y_i, threshold=1e-5)
def test_fcbs_exit_threshold(self):
mfs = MFS()
computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.4).get_results()
def test_fcbf_max_features(self):
mfs = MFS(max_features=3)
computed = mfs.fcbf(self.X_w, self.y_w, threshold=0.05).get_results()
expected = [6, 9, 12]
self.assertListAlmostEqual(expected, computed)
expected = [
0.5218299405215557,
0.46224298637417455,
0.44518278979085646,
]
self.assertListAlmostEqual(expected, mfs.get_scores())
def test_fcbf_iris(self):
mfs = MFS(max_features=100)
computed = mfs.fcbf(self.X_i, self.y_i, threshold=0.05).get_results()
expected = [3, 2]
self.assertListAlmostEqual(expected, computed)
expected = [0.870521418179061, 0.810724587460511]
self.assertListAlmostEqual(expected, mfs.get_scores())
def test_compute_su_labels(self):
mfs = MFS(max_features=100)
mfs.fcbf(self.X_i, self.y_i, threshold=0.05)
expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
self.assertListAlmostEqual(expected, mfs._compute_su_labels().tolist())
mfs._su_labels = [1, 2, 3, 4]
self.assertListAlmostEqual([1, 2, 3, 4], mfs._compute_su_labels())
def test_invalid_threshold(self):
mfs = MFS(max_features=100)
with self.assertRaises(ValueError):
mfs.fcbf(self.X_i, self.y_i, threshold=1e-15)
def test_fcbf_exit_threshold(self):
mfs = MFS(max_features=100)
computed = mfs.fcbf(self.X_w, self.y_w, threshold=0.4).get_results()
expected = [6, 9, 12]
self.assertListAlmostEqual(expected, computed)
expected = [