From 27f8a370c5f74a03f181f9de7b4c1759c1b6d9ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Sun, 10 Oct 2021 19:06:57 +0200
Subject: [PATCH] Begin IWSS implementation Update requirements Create
 requirements for dev

---
 README.md                                     |  4 ++
 mufs/Selection.py                             | 64 ++++++++++++++++++-
 mufs/tests/MUFS_test.py                       | 45 ++++++++++++-
 mufs/tests/balloons_R.dat                     | 17 +++++
 requirements/dev.txt                          |  3 +
 .../production.txt                            |  1 -
 6 files changed, 128 insertions(+), 6 deletions(-)
 create mode 100755 mufs/tests/balloons_R.dat
 create mode 100644 requirements/dev.txt
 rename requirements.txt => requirements/production.txt (81%)

diff --git a/README.md b/README.md
index 6fccbd7..8d8cdb1 100644
--- a/README.md
+++ b/README.md
@@ -17,3 +17,7 @@ Proceedings, Twentieth International Conference on Machine Learning. ed. / T. Fa
 ### Correlation-based Feature Selection
 
 Hall, M. A. (1999), 'Correlation-based Feature Selection for Machine Learning'.
+
+### IWSS
+
+Based on: P. Bermejo, J. A. Gamez and J. M. Puerta, "Incremental Wrapper-based subset Selection with replacement: An advantageous alternative to sequential forward selection," 2009 IEEE Symposium on Computational Intelligence and Data Mining, 2009, pp. 367-374, doi: 10.1109/CIDM.2009.4938673.
diff --git a/mufs/Selection.py b/mufs/Selection.py
index 7783450..7f96003 100755
--- a/mufs/Selection.py
+++ b/mufs/Selection.py
@@ -26,7 +26,7 @@ class MUFS:
     """
 
     def __init__(self, max_features=None, discrete=True):
-        self._max_features = max_features
+        self.max_features = max_features
         self._discrete = discrete
         self.symmetrical_uncertainty = (
             Metrics.symmetrical_uncertainty
@@ -53,8 +53,10 @@ class MUFS:
         """
         self.X_ = X
         self.y_ = y
-        if self._max_features is None:
+        if self.max_features is None:
             self._max_features = X.shape[1]
+        else:
+            self._max_features = self.max_features
         self._result = None
         self._scores = []
         self._su_labels = None
@@ -105,7 +107,9 @@ class MUFS:
 
     def _compute_merit(self, features):
         """Compute the merit function for cfs algorithms
-
+           "Good feature subsets contain features highly correlated with
+           (predictive of) the class, yet uncorrelated with (not predictive of)
+           each other"
         Parameters
         ----------
         features : list
@@ -264,3 +268,57 @@ class MUFS:
             list of scores of the features selected
         """
         return self._scores if self._fitted else []
+
+    def iwss(self, X, y, threshold):
+        """Incremental Wrapper Subset Selection
+
+        Parameters
+        ----------
+        X : np.array
+            array of features
+        y : np.array
+            vector of labels
+        threshold : float
+            threshold to select relevant features
+
+        Returns
+        -------
+        self
+            self
+        Raises
+        ------
+        ValueError
+            if the threshold is less than a selected value of 1e-7
+            or greater than .5
+
+        """
+        if threshold < 0 or threshold > 0.5:
+            raise ValueError(
+                "Threshold cannot be less than 0 or greater than 0.5"
+            )
+        self._initialize(X, y)
+        s_list = self._compute_su_labels()
+        feature_order = (-s_list).argsort()
+        features = feature_order.copy().tolist()
+        candidates = []
+        # Add first and second features to result
+        first_feature = features.pop(0)
+        candidates.append(first_feature)
+        self._scores.append(s_list[first_feature])
+        candidates.append(features.pop(0))
+        merit = self._compute_merit(candidates)
+        self._scores.append(merit)
+        for feature in features:
+            candidates.append(feature)
+            merit_new = self._compute_merit(candidates)
+            delta = abs(merit - merit_new) / merit if merit != 0.0 else 0.0
+            if merit_new > merit or delta < threshold:
+                if merit_new > merit:
+                    merit = merit_new
+                self._scores.append(merit_new)
+            else:
+                candidates.pop()
+            if len(candidates) == self._max_features:
+                break
+        self._result = candidates
+        return self
diff --git a/mufs/tests/MUFS_test.py b/mufs/tests/MUFS_test.py
index 82a0dbd..312620d 100755
--- a/mufs/tests/MUFS_test.py
+++ b/mufs/tests/MUFS_test.py
@@ -32,7 +32,7 @@ class MUFS_test(unittest.TestCase):
     def test_csf_wine(self):
         mufs = MUFS()
         expected = [6, 12, 9, 4, 10, 0]
-        self.assertListAlmostEqual(
+        self.assertListEqual(
             expected, mufs.cfs(self.X_w, self.y_w).get_results()
         )
         expected = [
@@ -78,7 +78,7 @@ class MUFS_test(unittest.TestCase):
         mufs = MUFS()
         expected = [3, 2, 0, 1]
         computed = mufs.cfs(self.X_i, self.y_i).get_results()
-        self.assertListAlmostEqual(expected, computed)
+        self.assertListEqual(expected, computed)
         expected = [
             0.870521418179061,
             0.8968651482682227,
@@ -148,3 +148,44 @@ class MUFS_test(unittest.TestCase):
             0.44518278979085646,
         ]
         self.assertListAlmostEqual(expected, mufs.get_scores())
+
+    def test_iwss_wine(self):
+        mufs = MUFS()
+        expected = [6, 9, 12]
+        self.assertListEqual(
+            expected, mufs.iwss(self.X_w, self.y_w, 0.2).get_results()
+        )
+        expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
+        self.assertListAlmostEqual(expected, mufs.get_scores())
+
+    def test_iwss_wine_max_features(self):
+        mufs = MUFS(max_features=3)
+        expected = [6, 9, 12]
+        self.assertListEqual(
+            expected, mufs.iwss(self.X_w, self.y_w, 0.4).get_results()
+        )
+        expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
+        self.assertListAlmostEqual(expected, mufs.get_scores())
+
+    def test_iwss_exception(self):
+        mufs = MUFS()
+        with self.assertRaises(ValueError):
+            mufs.iwss(self.X_w, self.y_w, 0.51)
+        with self.assertRaises(ValueError):
+            mufs.iwss(self.X_w, self.y_w, -0.01)
+
+    def test_iwss_better_merit_condition(self):
+        import pandas as pd
+        import os
+
+        folder = os.path.dirname(os.path.abspath(__file__))
+        data = pd.read_csv(
+            os.path.join(folder, "balloons_R.dat"),
+            sep="\t",
+            index_col=0,
+        )
+        X = data.drop("clase", axis=1).to_numpy()
+        y = data["clase"].to_numpy()
+        mufs = MUFS()
+        expected = [0, 2, 3, 1]
+        self.assertListEqual(expected, mufs.iwss(X, y, 0.3).get_results())
diff --git a/mufs/tests/balloons_R.dat b/mufs/tests/balloons_R.dat
new file mode 100755
index 0000000..1579461
--- /dev/null
+++ b/mufs/tests/balloons_R.dat
@@ -0,0 +1,17 @@
+	f1	f2	f3	f4	clase
+1	0.968246	-0.968246	0.968246	0.968246	1
+2	0.968246	-0.968246	0.968246	-0.968246	1
+3	0.968246	-0.968246	-0.968246	0.968246	1
+4	0.968246	-0.968246	-0.968246	-0.968246	1
+5	0.968246	0.968246	0.968246	0.968246	1
+6	0.968246	0.968246	0.968246	-0.968246	0
+7	0.968246	0.968246	-0.968246	0.968246	0
+8	0.968246	0.968246	-0.968246	-0.968246	0
+9	-0.968246	-0.968246	0.968246	0.968246	1
+10	-0.968246	-0.968246	0.968246	-0.968246	0
+11	-0.968246	-0.968246	-0.968246	0.968246	0
+12	-0.968246	-0.968246	-0.968246	-0.968246	0
+13	-0.968246	0.968246	0.968246	0.968246	1
+14	-0.968246	0.968246	0.968246	-0.968246	0
+15	-0.968246	0.968246	-0.968246	0.968246	0
+16	-0.968246	0.968246	-0.968246	-0.968246	0
diff --git a/requirements/dev.txt b/requirements/dev.txt
new file mode 100644
index 0000000..ed21e03
--- /dev/null
+++ b/requirements/dev.txt
@@ -0,0 +1,3 @@
+-r production.txt
+mdlp
+pandas
diff --git a/requirements.txt b/requirements/production.txt
similarity index 81%
rename from requirements.txt
rename to requirements/production.txt
index 30eef1a..163f1bc 100644
--- a/requirements.txt
+++ b/requirements/production.txt
@@ -1,2 +1 @@
 scikit-learn>0.24
-mdlp
\ No newline at end of file