Complete implementation of both algorithms

Check results Complete coverage tests
2025-08-17 08:35:52 +00:00 · 2021-05-25 11:59:24 +02:00
parent 70560506f1
commit 17d44080f5
6 changed files with 112 additions and 52 deletions
--- a/mfs/Selection.py
+++ b/mfs/Selection.py
@@ -1,4 +1,4 @@
-from math import log
+from math import log, sqrt
 from sys import float_info
 from itertools import combinations
 import numpy as np
@@ -145,7 +145,7 @@ class MFS:
        k = len(features)
        for pair in list(combinations(features, 2)):
            rff += self._compute_su_features(*pair)
-        return rcf / ((k ** 2 - k) * rff)
+        return rcf / sqrt(k + (k ** 2 - k) * rff)

    def cfs(self, X, y):
        """CFS forward best first heuristic search
@@ -161,34 +161,41 @@ class MFS:
        self.X_ = X
        self.y_ = y
        s_list = self._compute_su_labels()
-        # Descending orders
+        # Descending order
        feature_order = (-s_list).argsort().tolist()
-        merit = float_info.min
-        exit_condition = 0
+        continue_condition = True
        candidates = []
        # start with the best feature (max symmetrical uncertainty wrt label)
        first_candidate = feature_order.pop(0)
        candidates.append(first_candidate)
        self._scores.append(s_list[first_candidate])
-        while exit_condition < 5:  # as proposed in the original algorithm
-            id_selected = -1
+        while continue_condition:
+            merit = float_info.min
+            id_selected = None
            for idx, feature in enumerate(feature_order):
                candidates.append(feature)
                merit_new = self._compute_merit(candidates)
                if merit_new > merit:
                    id_selected = idx
                    merit = merit_new
-                    exit_condition = 0
                candidates.pop()
-            if id_selected == -1:
-                exit_condition += 1
-            else:
-                candidates.append(feature_order[id_selected])
-                self._scores.append(merit_new)
-                del feature_order[id_selected]
+            candidates.append(feature_order[id_selected])
+            self._scores.append(merit)
+            del feature_order[id_selected]
            if len(feature_order) == 0:
                # Force leaving the loop
-                exit_condition = 5
+                continue_condition = False
+            if len(self._scores) >= 5:
+                item_ant = -1
+                for item in self._scores[-5:]:
+                    if item_ant == -1:
+                        item_ant = item
+                    if item > item_ant:
+                        break
+                    else:
+                        item_ant = item
+                else:
+                    continue_condition = False
        self._result = candidates
        return self

@@ -213,7 +220,6 @@ class MFS:
                break
            # Remove redundant features
            for index_q in feature_dup:
-                # test if feature(index_q) su with feature(index_p) is
                su_pq = self._compute_su_features(index_p, index_q)
                if su_pq >= s_list[index_q]:
                    # remove feature from list
--- a/mfs/tests/MFS_test.py
+++ b/mfs/tests/MFS_test.py
@@ -1,6 +1,6 @@
 import unittest
 from mdlp import MDLP
-from sklearn.datasets import load_wine
+from sklearn.datasets import load_wine, load_iris

 from ..Selection import MFS

@@ -9,33 +9,53 @@ class MFS_test(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        mdlp = MDLP(random_state=1)
-        X, self.y = load_wine(return_X_y=True)
-        self.X = mdlp.fit_transform(X, self.y).astype("int64")
-        self.m, self.n = self.X.shape
-
-    # @classmethod
-    # def setup(cls):
-    #     pass
+        X, self.y_w = load_wine(return_X_y=True)
+        self.X_w = mdlp.fit_transform(X, self.y_w).astype("int64")
+        X, self.y_i = load_iris(return_X_y=True)
+        mdlp = MDLP(random_state=1)
+        self.X_i = mdlp.fit_transform(X, self.y_i).astype("int64")

    def test_initialize(self):
        mfs = MFS()
-        mfs.fcbs(self.X, self.y, 0.05)
+        mfs.fcbs(self.X_w, self.y_w, 0.05)
        mfs._initialize()
        self.assertIsNone(mfs.get_results())
        self.assertListEqual([], mfs.get_scores())
        self.assertDictEqual({}, mfs._su_features)
        self.assertIsNone(mfs._su_labels)

-    def test_csf(self):
+    def test_csf_wine(self):
        mfs = MFS()
-        expected = [6, 4]
-        self.assertListEqual(expected, mfs.cfs(self.X, self.y).get_results())
-        expected = [0.5218299405215557, 2.4168234005280964]
+        expected = [6, 12, 9, 4, 10, 0]
+        self.assertListEqual(
+            expected, mfs.cfs(self.X_w, self.y_w).get_results()
+        )
+        expected = [
+            0.5218299405215557,
+            0.602513857132804,
+            0.4877384978817362,
+            0.3743688234383051,
+            0.28795671854246285,
+            0.2309165735173175,
+        ]
        self.assertListEqual(expected, mfs.get_scores())

-    def test_fcbs(self):
+    def test_csf_iris(self):
        mfs = MFS()
-        computed = mfs.fcbs(self.X, self.y, threshold=0.05).get_results()
+        expected = [3, 2, 0, 1]
+        computed = mfs.cfs(self.X_i, self.y_i).get_results()
+        self.assertListEqual(expected, computed)
+        expected = [
+            0.870521418179061,
+            0.8968651482682227,
+            0.5908278453318913,
+            0.40371971570693366,
+        ]
+        self.assertListEqual(expected, mfs.get_scores())
+
+    def test_fcbs_wine(self):
+        mfs = MFS()
+        computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.05).get_results()
        expected = [6, 9, 12, 0, 11, 4]
        self.assertListEqual(expected, computed)
        expected = [
@@ -47,3 +67,36 @@ class MFS_test(unittest.TestCase):
            0.24972405134844652,
        ]
        self.assertListEqual(expected, mfs.get_scores())
+
+    def test_fcbs_iris(self):
+        mfs = MFS()
+        computed = mfs.fcbs(self.X_i, self.y_i, threshold=0.05).get_results()
+        expected = [3, 2]
+        self.assertListEqual(expected, computed)
+        expected = [0.870521418179061, 0.810724587460511]
+        self.assertListEqual(expected, mfs.get_scores())
+
+    def test_compute_su_labels(self):
+        mfs = MFS()
+        mfs.fcbs(self.X_i, self.y_i, threshold=0.05)
+        expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
+        self.assertListEqual(expected, mfs._compute_su_labels().tolist())
+        mfs._su_labels = [1, 2, 3, 4]
+        self.assertListEqual([1, 2, 3, 4], mfs._compute_su_labels())
+
+    def test_invalid_threshold(self):
+        mfs = MFS()
+        with self.assertRaises(ValueError):
+            mfs.fcbs(self.X_i, self.y_i, threshold=1e-5)
+
+    def test_fcbs_exit_threshold(self):
+        mfs = MFS()
+        computed = mfs.fcbs(self.X_w, self.y_w, threshold=0.4).get_results()
+        expected = [6, 9, 12]
+        self.assertListEqual(expected, computed)
+        expected = [
+            0.5218299405215557,
+            0.46224298637417455,
+            0.44518278979085646,
+        ]
+        self.assertListEqual(expected, mfs.get_scores())