mirror of
https://github.com/Doctorado-ML/mufs.git
synced 2025-08-17 08:35:52 +00:00
Begin IWSS implementation
Update requirements Create requirements for dev
This commit is contained in:
@@ -17,3 +17,7 @@ Proceedings, Twentieth International Conference on Machine Learning. ed. / T. Fa
|
|||||||
### Correlation-based Feature Selection
|
### Correlation-based Feature Selection
|
||||||
|
|
||||||
Hall, M. A. (1999), 'Correlation-based Feature Selection for Machine Learning'.
|
Hall, M. A. (1999), 'Correlation-based Feature Selection for Machine Learning'.
|
||||||
|
|
||||||
|
### IWSS
|
||||||
|
|
||||||
|
Based on: P. Bermejo, J. A. Gamez and J. M. Puerta, "Incremental Wrapper-based subset Selection with replacement: An advantageous alternative to sequential forward selection," 2009 IEEE Symposium on Computational Intelligence and Data Mining, 2009, pp. 367-374, doi: 10.1109/CIDM.2009.4938673.
|
||||||
|
@@ -26,7 +26,7 @@ class MUFS:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, max_features=None, discrete=True):
|
def __init__(self, max_features=None, discrete=True):
|
||||||
self._max_features = max_features
|
self.max_features = max_features
|
||||||
self._discrete = discrete
|
self._discrete = discrete
|
||||||
self.symmetrical_uncertainty = (
|
self.symmetrical_uncertainty = (
|
||||||
Metrics.symmetrical_uncertainty
|
Metrics.symmetrical_uncertainty
|
||||||
@@ -53,8 +53,10 @@ class MUFS:
|
|||||||
"""
|
"""
|
||||||
self.X_ = X
|
self.X_ = X
|
||||||
self.y_ = y
|
self.y_ = y
|
||||||
if self._max_features is None:
|
if self.max_features is None:
|
||||||
self._max_features = X.shape[1]
|
self._max_features = X.shape[1]
|
||||||
|
else:
|
||||||
|
self._max_features = self.max_features
|
||||||
self._result = None
|
self._result = None
|
||||||
self._scores = []
|
self._scores = []
|
||||||
self._su_labels = None
|
self._su_labels = None
|
||||||
@@ -105,7 +107,9 @@ class MUFS:
|
|||||||
|
|
||||||
def _compute_merit(self, features):
|
def _compute_merit(self, features):
|
||||||
"""Compute the merit function for cfs algorithms
|
"""Compute the merit function for cfs algorithms
|
||||||
|
"Good feature subsets contain features highly correlated with
|
||||||
|
(predictive of) the class, yet uncorrelated with (not predictive of)
|
||||||
|
each other"
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
features : list
|
features : list
|
||||||
@@ -264,3 +268,57 @@ class MUFS:
|
|||||||
list of scores of the features selected
|
list of scores of the features selected
|
||||||
"""
|
"""
|
||||||
return self._scores if self._fitted else []
|
return self._scores if self._fitted else []
|
||||||
|
|
||||||
|
def iwss(self, X, y, threshold):
|
||||||
|
"""Incremental Wrapper Subset Selection
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
X : np.array
|
||||||
|
array of features
|
||||||
|
y : np.array
|
||||||
|
vector of labels
|
||||||
|
threshold : float
|
||||||
|
threshold to select relevant features
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
self
|
||||||
|
self
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
if the threshold is less than a selected value of 1e-7
|
||||||
|
or greater than .5
|
||||||
|
|
||||||
|
"""
|
||||||
|
if threshold < 0 or threshold > 0.5:
|
||||||
|
raise ValueError(
|
||||||
|
"Threshold cannot be less than 0 or greater than 0.5"
|
||||||
|
)
|
||||||
|
self._initialize(X, y)
|
||||||
|
s_list = self._compute_su_labels()
|
||||||
|
feature_order = (-s_list).argsort()
|
||||||
|
features = feature_order.copy().tolist()
|
||||||
|
candidates = []
|
||||||
|
# Add first and second features to result
|
||||||
|
first_feature = features.pop(0)
|
||||||
|
candidates.append(first_feature)
|
||||||
|
self._scores.append(s_list[first_feature])
|
||||||
|
candidates.append(features.pop(0))
|
||||||
|
merit = self._compute_merit(candidates)
|
||||||
|
self._scores.append(merit)
|
||||||
|
for feature in features:
|
||||||
|
candidates.append(feature)
|
||||||
|
merit_new = self._compute_merit(candidates)
|
||||||
|
delta = abs(merit - merit_new) / merit if merit != 0.0 else 0.0
|
||||||
|
if merit_new > merit or delta < threshold:
|
||||||
|
if merit_new > merit:
|
||||||
|
merit = merit_new
|
||||||
|
self._scores.append(merit_new)
|
||||||
|
else:
|
||||||
|
candidates.pop()
|
||||||
|
if len(candidates) == self._max_features:
|
||||||
|
break
|
||||||
|
self._result = candidates
|
||||||
|
return self
|
||||||
|
@@ -32,7 +32,7 @@ class MUFS_test(unittest.TestCase):
|
|||||||
def test_csf_wine(self):
|
def test_csf_wine(self):
|
||||||
mufs = MUFS()
|
mufs = MUFS()
|
||||||
expected = [6, 12, 9, 4, 10, 0]
|
expected = [6, 12, 9, 4, 10, 0]
|
||||||
self.assertListAlmostEqual(
|
self.assertListEqual(
|
||||||
expected, mufs.cfs(self.X_w, self.y_w).get_results()
|
expected, mufs.cfs(self.X_w, self.y_w).get_results()
|
||||||
)
|
)
|
||||||
expected = [
|
expected = [
|
||||||
@@ -78,7 +78,7 @@ class MUFS_test(unittest.TestCase):
|
|||||||
mufs = MUFS()
|
mufs = MUFS()
|
||||||
expected = [3, 2, 0, 1]
|
expected = [3, 2, 0, 1]
|
||||||
computed = mufs.cfs(self.X_i, self.y_i).get_results()
|
computed = mufs.cfs(self.X_i, self.y_i).get_results()
|
||||||
self.assertListAlmostEqual(expected, computed)
|
self.assertListEqual(expected, computed)
|
||||||
expected = [
|
expected = [
|
||||||
0.870521418179061,
|
0.870521418179061,
|
||||||
0.8968651482682227,
|
0.8968651482682227,
|
||||||
@@ -148,3 +148,44 @@ class MUFS_test(unittest.TestCase):
|
|||||||
0.44518278979085646,
|
0.44518278979085646,
|
||||||
]
|
]
|
||||||
self.assertListAlmostEqual(expected, mufs.get_scores())
|
self.assertListAlmostEqual(expected, mufs.get_scores())
|
||||||
|
|
||||||
|
def test_iwss_wine(self):
|
||||||
|
mufs = MUFS()
|
||||||
|
expected = [6, 9, 12]
|
||||||
|
self.assertListEqual(
|
||||||
|
expected, mufs.iwss(self.X_w, self.y_w, 0.2).get_results()
|
||||||
|
)
|
||||||
|
expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
|
||||||
|
self.assertListAlmostEqual(expected, mufs.get_scores())
|
||||||
|
|
||||||
|
def test_iwss_wine_max_features(self):
|
||||||
|
mufs = MUFS(max_features=3)
|
||||||
|
expected = [6, 9, 12]
|
||||||
|
self.assertListEqual(
|
||||||
|
expected, mufs.iwss(self.X_w, self.y_w, 0.4).get_results()
|
||||||
|
)
|
||||||
|
expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
|
||||||
|
self.assertListAlmostEqual(expected, mufs.get_scores())
|
||||||
|
|
||||||
|
def test_iwss_exception(self):
|
||||||
|
mufs = MUFS()
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
mufs.iwss(self.X_w, self.y_w, 0.51)
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
mufs.iwss(self.X_w, self.y_w, -0.01)
|
||||||
|
|
||||||
|
def test_iwss_better_merit_condition(self):
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
folder = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
data = pd.read_csv(
|
||||||
|
os.path.join(folder, "balloons_R.dat"),
|
||||||
|
sep="\t",
|
||||||
|
index_col=0,
|
||||||
|
)
|
||||||
|
X = data.drop("clase", axis=1).to_numpy()
|
||||||
|
y = data["clase"].to_numpy()
|
||||||
|
mufs = MUFS()
|
||||||
|
expected = [0, 2, 3, 1]
|
||||||
|
self.assertListEqual(expected, mufs.iwss(X, y, 0.3).get_results())
|
||||||
|
17
mufs/tests/balloons_R.dat
Executable file
17
mufs/tests/balloons_R.dat
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
f1 f2 f3 f4 clase
|
||||||
|
1 0.968246 -0.968246 0.968246 0.968246 1
|
||||||
|
2 0.968246 -0.968246 0.968246 -0.968246 1
|
||||||
|
3 0.968246 -0.968246 -0.968246 0.968246 1
|
||||||
|
4 0.968246 -0.968246 -0.968246 -0.968246 1
|
||||||
|
5 0.968246 0.968246 0.968246 0.968246 1
|
||||||
|
6 0.968246 0.968246 0.968246 -0.968246 0
|
||||||
|
7 0.968246 0.968246 -0.968246 0.968246 0
|
||||||
|
8 0.968246 0.968246 -0.968246 -0.968246 0
|
||||||
|
9 -0.968246 -0.968246 0.968246 0.968246 1
|
||||||
|
10 -0.968246 -0.968246 0.968246 -0.968246 0
|
||||||
|
11 -0.968246 -0.968246 -0.968246 0.968246 0
|
||||||
|
12 -0.968246 -0.968246 -0.968246 -0.968246 0
|
||||||
|
13 -0.968246 0.968246 0.968246 0.968246 1
|
||||||
|
14 -0.968246 0.968246 0.968246 -0.968246 0
|
||||||
|
15 -0.968246 0.968246 -0.968246 0.968246 0
|
||||||
|
16 -0.968246 0.968246 -0.968246 -0.968246 0
|
3
requirements/dev.txt
Normal file
3
requirements/dev.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
-r production.txt
|
||||||
|
mdlp
|
||||||
|
pandas
|
@@ -1,2 +1 @@
|
|||||||
scikit-learn>0.24
|
scikit-learn>0.24
|
||||||
mdlp
|
|
Reference in New Issue
Block a user