Add max_features to MFS to help STree integration

This commit is contained in:
2021-05-29 01:24:37 +02:00
parent ee5020e6d9
commit 39fbdf73a7
2 changed files with 75 additions and 39 deletions

View File

@@ -109,10 +109,16 @@ class MFS:
Correlated Feature Selection as in "Correlation-based Feature Selection for Correlated Feature Selection as in "Correlation-based Feature Selection for
Machine Learning" by Mark A. Hall Machine Learning" by Mark A. Hall
Parameters
----------
max_features: int
The maximum number of features to return
""" """
def __init__(self): def __init__(self, max_features):
self._initialize() self._initialize()
self._max_features = max_features
def _initialize(self): def _initialize(self):
"""Initialize the attributes so support multiple calls using same """Initialize the attributes so support multiple calls using same
@@ -180,8 +186,8 @@ class MFS:
""" """
# lgtm has already recognized that this is a false positive # lgtm has already recognized that this is a false positive
rcf = self._su_labels[ rcf = self._su_labels[
features features # lgtm [py/hash-unhashable-value]
].sum() # lgtm [py/hash-unhashable-value] ].sum()
rff = 0.0 rff = 0.0
k = len(features) k = len(features)
for pair in list(combinations(features, 2)): for pair in list(combinations(features, 2)):
@@ -229,7 +235,10 @@ class MFS:
candidates.append(feature_order[id_selected]) candidates.append(feature_order[id_selected])
self._scores.append(merit) self._scores.append(merit)
del feature_order[id_selected] del feature_order[id_selected]
if len(feature_order) == 0: if (
len(feature_order) == 0
or len(candidates) == self._max_features
):
# Force leaving the loop # Force leaving the loop
continue_condition = False continue_condition = False
if len(self._scores) >= 5: if len(self._scores) >= 5:
@@ -253,7 +262,7 @@ class MFS:
self._result = candidates self._result = candidates
return self return self
def fcbs(self, X, y, threshold): def fcbf(self, X, y, threshold):
"""Fast Correlation-Based Filter """Fast Correlation-Based Filter
Parameters Parameters
@@ -273,10 +282,10 @@ class MFS:
Raises Raises
------ ------
ValueError ValueError
if the threshold is less than a selected value of 1e-4 if the threshold is less than a selected value of 1e-7
""" """
if threshold < 1e-4: if threshold < 1e-7:
raise ValueError("Threshold cannot be less than 1e-4") raise ValueError("Threshold cannot be less than 1e-7")
self._initialize() self._initialize()
self.X_ = X self.X_ = X
self.y_ = y self.y_ = y
@@ -301,6 +310,8 @@ class MFS:
s_list[index_q] = 0.0 s_list[index_q] = 0.0
self._result.append(index_p) self._result.append(index_p)
self._scores.append(s_list[index_p]) self._scores.append(s_list[index_p])
if len(self._result) == self._max_features:
break
return self return self
def get_results(self): def get_results(self):

View File

@@ -21,8 +21,8 @@ class MFS_test(unittest.TestCase):
self.assertAlmostEqual(a, b, tol) self.assertAlmostEqual(a, b, tol)
def test_initialize(self): def test_initialize(self):
mfs = MFS() mfs = MFS(max_features=100)
mfs.fcbs(self.X_w, self.y_w, 0.05) mfs.fcbf(self.X_w, self.y_w, 0.05)
mfs._initialize() mfs._initialize()
self.assertIsNone(mfs.get_results()) self.assertIsNone(mfs.get_results())
self.assertListEqual([], mfs.get_scores()) self.assertListEqual([], mfs.get_scores())
@@ -30,7 +30,7 @@ class MFS_test(unittest.TestCase):
self.assertIsNone(mfs._su_labels) self.assertIsNone(mfs._su_labels)
def test_csf_wine(self): def test_csf_wine(self):
mfs = MFS() mfs = MFS(max_features=100)
expected = [6, 12, 9, 4, 10, 0] expected = [6, 12, 9, 4, 10, 0]
self.assertListAlmostEqual( self.assertListAlmostEqual(
expected, mfs.cfs(self.X_w, self.y_w).get_results() expected, mfs.cfs(self.X_w, self.y_w).get_results()
@@ -45,8 +45,21 @@ class MFS_test(unittest.TestCase):
] ]
self.assertListAlmostEqual(expected, mfs.get_scores()) self.assertListAlmostEqual(expected, mfs.get_scores())
def test_csf_max_features(self):
mfs = MFS(max_features=3)
expected = [6, 12, 9]
self.assertListAlmostEqual(
expected, mfs.cfs(self.X_w, self.y_w).get_results()
)
expected = [
0.5218299405215557,
0.602513857132804,
0.4877384978817362,
]
self.assertListAlmostEqual(expected, mfs.get_scores())
def test_csf_iris(self): def test_csf_iris(self):
mfs = MFS() mfs = MFS(max_features=100)
expected = [3, 2, 0, 1] expected = [3, 2, 0, 1]
computed = mfs.cfs(self.X_i, self.y_i).get_results() computed = mfs.cfs(self.X_i, self.y_i).get_results()
self.assertListAlmostEqual(expected, computed) self.assertListAlmostEqual(expected, computed)
@@ -58,9 +71,9 @@ class MFS_test(unittest.TestCase):
] ]
self.assertListAlmostEqual(expected, mfs.get_scores()) self.assertListAlmostEqual(expected, mfs.get_scores())
def test_fcbs_wine(self): def test_fcbf_wine(self):
mfs = MFS() mfs = MFS(max_features=100)
computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.05).get_results() computed = mfs.fcbf(self.X_w, self.y_w, threshold=0.05).get_results()
expected = [6, 9, 12, 0, 11, 4] expected = [6, 9, 12, 0, 11, 4]
self.assertListAlmostEqual(expected, computed) self.assertListAlmostEqual(expected, computed)
expected = [ expected = [
@@ -73,30 +86,42 @@ class MFS_test(unittest.TestCase):
] ]
self.assertListAlmostEqual(expected, mfs.get_scores()) self.assertListAlmostEqual(expected, mfs.get_scores())
def test_fcbs_iris(self): def test_fcbf_max_features(self):
mfs = MFS() mfs = MFS(max_features=3)
computed = mfs.fcbs(self.X_i, self.y_i, threshold=0.05).get_results() computed = mfs.fcbf(self.X_w, self.y_w, threshold=0.05).get_results()
expected = [3, 2] expected = [6, 9, 12]
self.assertListAlmostEqual(expected, computed) self.assertListAlmostEqual(expected, computed)
expected = [0.870521418179061, 0.810724587460511] expected = [
self.assertListAlmostEqual(expected, mfs.get_scores()) 0.5218299405215557,
0.46224298637417455,
def test_compute_su_labels(self): 0.44518278979085646,
mfs = MFS() ]
mfs.fcbs(self.X_i, self.y_i, threshold=0.05) self.assertListAlmostEqual(expected, mfs.get_scores())
expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
self.assertListAlmostEqual(expected, mfs._compute_su_labels().tolist()) def test_fcbf_iris(self):
mfs._su_labels = [1, 2, 3, 4] mfs = MFS(max_features=100)
self.assertListAlmostEqual([1, 2, 3, 4], mfs._compute_su_labels()) computed = mfs.fcbf(self.X_i, self.y_i, threshold=0.05).get_results()
expected = [3, 2]
def test_invalid_threshold(self): self.assertListAlmostEqual(expected, computed)
mfs = MFS() expected = [0.870521418179061, 0.810724587460511]
with self.assertRaises(ValueError): self.assertListAlmostEqual(expected, mfs.get_scores())
mfs.fcbs(self.X_i, self.y_i, threshold=1e-5)
def test_compute_su_labels(self):
def test_fcbs_exit_threshold(self): mfs = MFS(max_features=100)
mfs = MFS() mfs.fcbf(self.X_i, self.y_i, threshold=0.05)
computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.4).get_results() expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
self.assertListAlmostEqual(expected, mfs._compute_su_labels().tolist())
mfs._su_labels = [1, 2, 3, 4]
self.assertListAlmostEqual([1, 2, 3, 4], mfs._compute_su_labels())
def test_invalid_threshold(self):
mfs = MFS(max_features=100)
with self.assertRaises(ValueError):
mfs.fcbf(self.X_i, self.y_i, threshold=1e-15)
def test_fcbf_exit_threshold(self):
mfs = MFS(max_features=100)
computed = mfs.fcbf(self.X_w, self.y_w, threshold=0.4).get_results()
expected = [6, 9, 12] expected = [6, 9, 12]
self.assertListAlmostEqual(expected, computed) self.assertListAlmostEqual(expected, computed)
expected = [ expected = [