From bf678df159f4434c903650e1c1a53bfd92dd3a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?= Date: Fri, 29 Oct 2021 12:59:03 +0200 Subject: [PATCH] (#46) Implement true random feature selection (#48) * (#46) Implement true random feature selection --- stree/Splitter.py | 29 ++++++++++++++++++++++++++++- stree/tests/Splitter_test.py | 13 +++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/stree/Splitter.py b/stree/Splitter.py index 2df3f57..acf737e 100644 --- a/stree/Splitter.py +++ b/stree/Splitter.py @@ -273,6 +273,7 @@ class Splitter: if feature_select not in [ "random", + "trandom", "best", "mutual", "cfs", @@ -280,7 +281,8 @@ class Splitter: "iwss", ]: raise ValueError( - "splitter must be in {random, best, mutual, cfs, fcbf, iwss} " + "splitter must be in {random, trandom, best, mutual, cfs, " + "fcbf, iwss} " f"got ({feature_select})" ) self.criterion_function = getattr(self, f"_{self._criterion}") @@ -312,6 +314,31 @@ class Splitter: features_sets = self._generate_spaces(n_features, max_features) return self._select_best_set(dataset, labels, features_sets) + @staticmethod + def _fs_trandom( + dataset: np.array, labels: np.array, max_features: int + ) -> tuple: + """Return the a random feature set combination + + Parameters + ---------- + dataset : np.array + array of samples + labels : np.array + labels of the dataset + max_features : int + number of features of the subspace + (< number of features in dataset) + + Returns + ------- + tuple + indices of the features selected + """ + # Random feature reduction + n_features = dataset.shape[1] + return tuple(sorted(random.sample(range(n_features), max_features))) + @staticmethod def _fs_best( dataset: np.array, labels: np.array, max_features: int diff --git a/stree/tests/Splitter_test.py b/stree/tests/Splitter_test.py index b360801..13d4756 100644 --- a/stree/tests/Splitter_test.py +++ b/stree/tests/Splitter_test.py @@ -297,3 +297,16 @@ class Splitter_test(unittest.TestCase): Xs, computed = tcl.get_subspace(X, y, rs) self.assertListEqual(expected, list(computed)) self.assertListEqual(X[:, expected].tolist(), Xs.tolist()) + + def test_get_trandom_subspaces(self): + results = [ + (4, [3, 7, 9, 12]), + (6, [0, 1, 2, 8, 15, 18]), + (7, [1, 2, 4, 8, 10, 12, 13]), + ] + for rs, expected in results: + X, y = load_dataset(n_features=20, n_informative=7) + tcl = self.build(feature_select="trandom", random_state=rs) + Xs, computed = tcl.get_subspace(X, y, rs) + self.assertListEqual(expected, list(computed)) + self.assertListEqual(X[:, expected].tolist(), Xs.tolist())