mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-16 16:06:01 +00:00
* (#46) Implement true random feature selection
This commit is contained in:
committed by
GitHub
parent
36b08b1bcf
commit
bf678df159
@@ -273,6 +273,7 @@ class Splitter:
|
|||||||
|
|
||||||
if feature_select not in [
|
if feature_select not in [
|
||||||
"random",
|
"random",
|
||||||
|
"trandom",
|
||||||
"best",
|
"best",
|
||||||
"mutual",
|
"mutual",
|
||||||
"cfs",
|
"cfs",
|
||||||
@@ -280,7 +281,8 @@ class Splitter:
|
|||||||
"iwss",
|
"iwss",
|
||||||
]:
|
]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"splitter must be in {random, best, mutual, cfs, fcbf, iwss} "
|
"splitter must be in {random, trandom, best, mutual, cfs, "
|
||||||
|
"fcbf, iwss} "
|
||||||
f"got ({feature_select})"
|
f"got ({feature_select})"
|
||||||
)
|
)
|
||||||
self.criterion_function = getattr(self, f"_{self._criterion}")
|
self.criterion_function = getattr(self, f"_{self._criterion}")
|
||||||
@@ -312,6 +314,31 @@ class Splitter:
|
|||||||
features_sets = self._generate_spaces(n_features, max_features)
|
features_sets = self._generate_spaces(n_features, max_features)
|
||||||
return self._select_best_set(dataset, labels, features_sets)
|
return self._select_best_set(dataset, labels, features_sets)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _fs_trandom(
|
||||||
|
dataset: np.array, labels: np.array, max_features: int
|
||||||
|
) -> tuple:
|
||||||
|
"""Return the a random feature set combination
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
dataset : np.array
|
||||||
|
array of samples
|
||||||
|
labels : np.array
|
||||||
|
labels of the dataset
|
||||||
|
max_features : int
|
||||||
|
number of features of the subspace
|
||||||
|
(< number of features in dataset)
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
tuple
|
||||||
|
indices of the features selected
|
||||||
|
"""
|
||||||
|
# Random feature reduction
|
||||||
|
n_features = dataset.shape[1]
|
||||||
|
return tuple(sorted(random.sample(range(n_features), max_features)))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _fs_best(
|
def _fs_best(
|
||||||
dataset: np.array, labels: np.array, max_features: int
|
dataset: np.array, labels: np.array, max_features: int
|
||||||
|
@@ -297,3 +297,16 @@ class Splitter_test(unittest.TestCase):
|
|||||||
Xs, computed = tcl.get_subspace(X, y, rs)
|
Xs, computed = tcl.get_subspace(X, y, rs)
|
||||||
self.assertListEqual(expected, list(computed))
|
self.assertListEqual(expected, list(computed))
|
||||||
self.assertListEqual(X[:, expected].tolist(), Xs.tolist())
|
self.assertListEqual(X[:, expected].tolist(), Xs.tolist())
|
||||||
|
|
||||||
|
def test_get_trandom_subspaces(self):
|
||||||
|
results = [
|
||||||
|
(4, [3, 7, 9, 12]),
|
||||||
|
(6, [0, 1, 2, 8, 15, 18]),
|
||||||
|
(7, [1, 2, 4, 8, 10, 12, 13]),
|
||||||
|
]
|
||||||
|
for rs, expected in results:
|
||||||
|
X, y = load_dataset(n_features=20, n_informative=7)
|
||||||
|
tcl = self.build(feature_select="trandom", random_state=rs)
|
||||||
|
Xs, computed = tcl.get_subspace(X, y, rs)
|
||||||
|
self.assertListEqual(expected, list(computed))
|
||||||
|
self.assertListEqual(X[:, expected].tolist(), Xs.tolist())
|
||||||
|
Reference in New Issue
Block a user