From bf678df159f4434c903650e1c1a53bfd92dd3a45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana=20G=C3=B3mez?=
 <rmontanana@users.noreply.github.com>
Date: Fri, 29 Oct 2021 12:59:03 +0200
Subject: [PATCH] (#46) Implement true random feature selection (#48)

* (#46) Implement true random feature selection
---
 stree/Splitter.py            | 29 ++++++++++++++++++++++++++++-
 stree/tests/Splitter_test.py | 13 +++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/stree/Splitter.py b/stree/Splitter.py
index 2df3f57..acf737e 100644
--- a/stree/Splitter.py
+++ b/stree/Splitter.py
@@ -273,6 +273,7 @@ class Splitter:
 
         if feature_select not in [
             "random",
+            "trandom",
             "best",
             "mutual",
             "cfs",
@@ -280,7 +281,8 @@ class Splitter:
             "iwss",
         ]:
             raise ValueError(
-                "splitter must be in {random, best, mutual, cfs, fcbf, iwss} "
+                "splitter must be in {random, trandom, best, mutual, cfs, "
+                "fcbf, iwss} "
                 f"got ({feature_select})"
             )
         self.criterion_function = getattr(self, f"_{self._criterion}")
@@ -312,6 +314,31 @@ class Splitter:
         features_sets = self._generate_spaces(n_features, max_features)
         return self._select_best_set(dataset, labels, features_sets)
 
+    @staticmethod
+    def _fs_trandom(
+        dataset: np.array, labels: np.array, max_features: int
+    ) -> tuple:
+        """Return the a random feature set combination
+
+        Parameters
+        ----------
+        dataset : np.array
+            array of samples
+        labels : np.array
+            labels of the dataset
+        max_features : int
+            number of features of the subspace
+            (< number of features in dataset)
+
+        Returns
+        -------
+        tuple
+            indices of the features selected
+        """
+        # Random feature reduction
+        n_features = dataset.shape[1]
+        return tuple(sorted(random.sample(range(n_features), max_features)))
+
     @staticmethod
     def _fs_best(
         dataset: np.array, labels: np.array, max_features: int
diff --git a/stree/tests/Splitter_test.py b/stree/tests/Splitter_test.py
index b360801..13d4756 100644
--- a/stree/tests/Splitter_test.py
+++ b/stree/tests/Splitter_test.py
@@ -297,3 +297,16 @@ class Splitter_test(unittest.TestCase):
             Xs, computed = tcl.get_subspace(X, y, rs)
             self.assertListEqual(expected, list(computed))
             self.assertListEqual(X[:, expected].tolist(), Xs.tolist())
+
+    def test_get_trandom_subspaces(self):
+        results = [
+            (4, [3, 7, 9, 12]),
+            (6, [0, 1, 2, 8, 15, 18]),
+            (7, [1, 2, 4, 8, 10, 12, 13]),
+        ]
+        for rs, expected in results:
+            X, y = load_dataset(n_features=20, n_informative=7)
+            tcl = self.build(feature_select="trandom", random_state=rs)
+            Xs, computed = tcl.get_subspace(X, y, rs)
+            self.assertListEqual(expected, list(computed))
+            self.assertListEqual(X[:, expected].tolist(), Xs.tolist())