mirror of
https://github.com/Doctorado-ML/Odte.git
synced 2025-07-11 08:12:06 +00:00
add max_features working
This commit is contained in:
parent
98a28cd271
commit
19543b48fa
44
odte/Odte.py
44
odte/Odte.py
@ -53,8 +53,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
def _initialize_random(self) -> np.random.mtrand.RandomState:
|
def _initialize_random(self) -> np.random.mtrand.RandomState:
|
||||||
if self.random_state is None:
|
if self.random_state is None:
|
||||||
return np.random.mtrand._rand
|
return np.random.mtrand._rand
|
||||||
else:
|
return np.random.RandomState(self.random_state)
|
||||||
return np.random.RandomState(self.random_state)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _initialize_sample_weight(
|
def _initialize_sample_weight(
|
||||||
@ -62,8 +61,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
) -> np.array:
|
) -> np.array:
|
||||||
if sample_weight is None:
|
if sample_weight is None:
|
||||||
return np.ones((n_samples,), dtype=np.float64)
|
return np.ones((n_samples,), dtype=np.float64)
|
||||||
else:
|
return sample_weight.copy()
|
||||||
return sample_weight.copy()
|
|
||||||
|
|
||||||
def _validate_estimator(self):
|
def _validate_estimator(self):
|
||||||
"""Check the estimator and set the base_estimator_ attribute."""
|
"""Check the estimator and set the base_estimator_ attribute."""
|
||||||
@ -77,10 +75,9 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
# Check parameters are Ok.
|
# Check parameters are Ok.
|
||||||
if self.n_estimators < 3:
|
if self.n_estimators < 3:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"n_estimators must be greater than 3... got (n_estimators=\
|
f"n_estimators must be greater than 2 but got (n_estimators=\
|
||||||
{self.n_estimators:f})"
|
{self.n_estimators})"
|
||||||
)
|
)
|
||||||
# the rest of parameters are checked in estimator
|
|
||||||
check_classification_targets(y)
|
check_classification_targets(y)
|
||||||
X, y = check_X_y(X, y)
|
X, y = check_X_y(X, y)
|
||||||
sample_weight = _check_sample_weight(
|
sample_weight = _check_sample_weight(
|
||||||
@ -90,12 +87,13 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
# Initialize computed parameters
|
# Initialize computed parameters
|
||||||
# Build the estimator
|
# Build the estimator
|
||||||
self.n_features_in_ = X.shape[1]
|
self.n_features_in_ = X.shape[1]
|
||||||
self.n_features = X.shape[1]
|
self.n_features_ = X.shape[1]
|
||||||
self.max_features_ = self._initialize_max_features()
|
self.max_features_ = self._initialize_max_features()
|
||||||
self._validate_estimator()
|
self._validate_estimator()
|
||||||
self.classes_, y = np.unique(y, return_inverse=True)
|
self.classes_, y = np.unique(y, return_inverse=True)
|
||||||
self.n_classes_ = self.classes_.shape[0]
|
self.n_classes_ = self.classes_.shape[0]
|
||||||
self.estimators_ = []
|
self.estimators_ = []
|
||||||
|
self.subspaces_ = []
|
||||||
self._train(X, y, sample_weight)
|
self._train(X, y, sample_weight)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@ -109,16 +107,19 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
for _ in range(self.n_estimators):
|
for _ in range(self.n_estimators):
|
||||||
# Build clf
|
# Build clf
|
||||||
clf = clone(self.base_estimator_)
|
clf = clone(self.base_estimator_)
|
||||||
# clf.set_params(**self.estimator_params)
|
|
||||||
self.estimators_.append(clf)
|
self.estimators_.append(clf)
|
||||||
# bootstrap
|
# bootstrap
|
||||||
indices = random_box.randint(0, n_samples, boot_samples)
|
indices = random_box.randint(0, n_samples, boot_samples)
|
||||||
# update weights with the chosen samples
|
# update weights with the chosen samples
|
||||||
weights_update = np.bincount(indices, minlength=n_samples)
|
weights_update = np.bincount(indices, minlength=n_samples)
|
||||||
features = self.get_subspace(X, y)
|
features = self._get_random_subspace(X, y)
|
||||||
|
self.subspaces_.append(features)
|
||||||
current_weights = weights * weights_update
|
current_weights = weights * weights_update
|
||||||
# train the classifier
|
# train the classifier
|
||||||
clf.fit(X[indices, features], y[indices], current_weights[indices])
|
bootstrap = X[indices, :]
|
||||||
|
clf.fit(
|
||||||
|
bootstrap[:, features], y[indices], current_weights[indices]
|
||||||
|
)
|
||||||
|
|
||||||
def _get_bootstrap_n_samples(self, n_samples) -> int:
|
def _get_bootstrap_n_samples(self, n_samples) -> int:
|
||||||
if self.max_samples is None:
|
if self.max_samples is None:
|
||||||
@ -157,7 +158,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
elif self.max_features is None:
|
elif self.max_features is None:
|
||||||
max_features = self.n_features_
|
max_features = self.n_features_
|
||||||
elif isinstance(self.max_features, int):
|
elif isinstance(self.max_features, int):
|
||||||
max_features = self.max_features
|
max_features = abs(self.max_features)
|
||||||
else: # float
|
else: # float
|
||||||
if self.max_features > 0.0:
|
if self.max_features > 0.0:
|
||||||
max_features = max(
|
max_features = max(
|
||||||
@ -171,7 +172,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
)
|
)
|
||||||
return max_features
|
return max_features
|
||||||
|
|
||||||
def _get_subspaces_set(
|
def _get_random_subspace(
|
||||||
self, dataset: np.array, labels: np.array
|
self, dataset: np.array, labels: np.array
|
||||||
) -> np.array:
|
) -> np.array:
|
||||||
features = range(dataset.shape[1])
|
features = range(dataset.shape[1])
|
||||||
@ -182,12 +183,6 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
else:
|
else:
|
||||||
return features_sets[0]
|
return features_sets[0]
|
||||||
|
|
||||||
def get_subspace(self, dataset: np.array, labels: np.array) -> list:
|
|
||||||
"""Return the best subspace to build a tree
|
|
||||||
"""
|
|
||||||
indices = self._get_subspaces_set(dataset, labels)
|
|
||||||
return dataset[:, indices], indices
|
|
||||||
|
|
||||||
def predict(self, X: np.array) -> np.array:
|
def predict(self, X: np.array) -> np.array:
|
||||||
proba = self.predict_proba(X)
|
proba = self.predict_proba(X)
|
||||||
return self.classes_.take((np.argmax(proba, axis=1)), axis=0)
|
return self.classes_.take((np.argmax(proba, axis=1)), axis=0)
|
||||||
@ -196,10 +191,15 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
check_is_fitted(self, ["estimators_"])
|
check_is_fitted(self, ["estimators_"])
|
||||||
# Input validation
|
# Input validation
|
||||||
X = check_array(X)
|
X = check_array(X)
|
||||||
for tree in self.estimators_:
|
if self.n_features_ != X.shape[1]:
|
||||||
|
raise ValueError("Number of features of the model must "
|
||||||
|
"match the input. Model n_features is {0} and "
|
||||||
|
"input n_features is {1}."
|
||||||
|
"".format(self.n_features_, X.shape[1]))
|
||||||
|
for tree, features in zip(self.estimators_, self.subspaces_):
|
||||||
n_samples = X.shape[0]
|
n_samples = X.shape[0]
|
||||||
result = np.zeros((n_samples, self.n_classes_))
|
result = np.zeros((n_samples, self.n_classes_))
|
||||||
predictions = tree.predict(X)
|
predictions = tree.predict(X[:, features])
|
||||||
for i in range(n_samples):
|
for i in range(n_samples):
|
||||||
result[i, predictions[i]] += 1
|
result[i, predictions[i]] += 1
|
||||||
return result
|
return result
|
||||||
@ -207,8 +207,6 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
def score(
|
def score(
|
||||||
self, X: np.array, y: np.array, sample_weight: np.array = None
|
self, X: np.array, y: np.array, sample_weight: np.array = None
|
||||||
) -> float:
|
) -> float:
|
||||||
# todo
|
|
||||||
check_is_fitted(self, ["estimators_"])
|
|
||||||
check_classification_targets(y)
|
check_classification_targets(y)
|
||||||
X, y = check_X_y(X, y)
|
X, y = check_X_y(X, y)
|
||||||
y_pred = self.predict(X).reshape(y.shape)
|
y_pred = self.predict(X).reshape(y.shape)
|
||||||
|
@ -37,6 +37,28 @@ class Odte_test(unittest.TestCase):
|
|||||||
computed = tclf._initialize_sample_weight(value, m)
|
computed = tclf._initialize_sample_weight(value, m)
|
||||||
self.assertListEqual(expected.tolist(), computed.tolist())
|
self.assertListEqual(expected.tolist(), computed.tolist())
|
||||||
|
|
||||||
|
def test_initialize_max_feature(self):
|
||||||
|
expected_values = [
|
||||||
|
[0, 4, 10, 11],
|
||||||
|
[0, 2, 3, 5, 14, 15],
|
||||||
|
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||||
|
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||||
|
[0, 4, 10, 11],
|
||||||
|
[0, 4, 10, 11],
|
||||||
|
[0, 4, 10, 11],
|
||||||
|
]
|
||||||
|
X, y = load_dataset(
|
||||||
|
random_state=self._random_state, n_features=16, n_samples=10
|
||||||
|
)
|
||||||
|
for max_features in [4, 0.4, 1.0, None, "auto", "sqrt", "log2"]:
|
||||||
|
tclf = Odte(
|
||||||
|
random_state=self._random_state, max_features=max_features
|
||||||
|
)
|
||||||
|
tclf.fit(X, y)
|
||||||
|
computed = tclf._get_random_subspace(X, y)
|
||||||
|
expected = expected_values.pop(0)
|
||||||
|
self.assertListEqual(expected, list(computed))
|
||||||
|
|
||||||
def test_initialize_random(self):
|
def test_initialize_random(self):
|
||||||
expected = [37, 235, 908]
|
expected = [37, 235, 908]
|
||||||
tclf = Odte(random_state=self._random_state)
|
tclf = Odte(random_state=self._random_state)
|
||||||
@ -51,6 +73,13 @@ class Odte_test(unittest.TestCase):
|
|||||||
self.assertGreaterEqual(value, 101)
|
self.assertGreaterEqual(value, 101)
|
||||||
self.assertLessEqual(value, 1000)
|
self.assertLessEqual(value, 1000)
|
||||||
|
|
||||||
|
def test_bogus_max_features(self):
|
||||||
|
values = ["duck", -0.1, 0.0]
|
||||||
|
for max_features in values:
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
tclf = Odte(max_features=max_features)
|
||||||
|
tclf.fit(*load_dataset(self._random_state))
|
||||||
|
|
||||||
def test_bogus_n_estimator(self):
|
def test_bogus_n_estimator(self):
|
||||||
values = [0, -1, 2]
|
values = [0, -1, 2]
|
||||||
for n_estimators in values:
|
for n_estimators in values:
|
||||||
@ -79,9 +108,7 @@ class Odte_test(unittest.TestCase):
|
|||||||
X, y = load_dataset(self._random_state)
|
X, y = load_dataset(self._random_state)
|
||||||
expected = y
|
expected = y
|
||||||
tclf = Odte(
|
tclf = Odte(
|
||||||
random_state=self._random_state,
|
random_state=self._random_state, max_features=1.0, max_samples=0.1,
|
||||||
max_features=None,
|
|
||||||
max_samples=0.1,
|
|
||||||
)
|
)
|
||||||
tclf.set_params(**dict(base_estimator__kernel="linear",))
|
tclf.set_params(**dict(base_estimator__kernel="linear",))
|
||||||
computed = tclf.fit(X, y).predict(X)
|
computed = tclf.fit(X, y).predict(X)
|
||||||
@ -101,8 +128,8 @@ class Odte_test(unittest.TestCase):
|
|||||||
def test_score_splitter_max_features(self):
|
def test_score_splitter_max_features(self):
|
||||||
X, y = load_dataset(self._random_state, n_features=12, n_samples=150)
|
X, y = load_dataset(self._random_state, n_features=12, n_samples=150)
|
||||||
results = [
|
results = [
|
||||||
0.9866666666666667,
|
0.6466666666666666,
|
||||||
0.9866666666666667,
|
0.6466666666666666,
|
||||||
0.9866666666666667,
|
0.9866666666666667,
|
||||||
0.9866666666666667,
|
0.9866666666666667,
|
||||||
]
|
]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user