mirror of
https://github.com/Doctorado-ML/Odte.git
synced 2025-07-11 08:12:06 +00:00
add max_features working
This commit is contained in:
parent
98a28cd271
commit
19543b48fa
44
odte/Odte.py
44
odte/Odte.py
@ -53,8 +53,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
def _initialize_random(self) -> np.random.mtrand.RandomState:
|
||||
if self.random_state is None:
|
||||
return np.random.mtrand._rand
|
||||
else:
|
||||
return np.random.RandomState(self.random_state)
|
||||
return np.random.RandomState(self.random_state)
|
||||
|
||||
@staticmethod
|
||||
def _initialize_sample_weight(
|
||||
@ -62,8 +61,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
) -> np.array:
|
||||
if sample_weight is None:
|
||||
return np.ones((n_samples,), dtype=np.float64)
|
||||
else:
|
||||
return sample_weight.copy()
|
||||
return sample_weight.copy()
|
||||
|
||||
def _validate_estimator(self):
|
||||
"""Check the estimator and set the base_estimator_ attribute."""
|
||||
@ -77,10 +75,9 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
# Check parameters are Ok.
|
||||
if self.n_estimators < 3:
|
||||
raise ValueError(
|
||||
f"n_estimators must be greater than 3... got (n_estimators=\
|
||||
{self.n_estimators:f})"
|
||||
f"n_estimators must be greater than 2 but got (n_estimators=\
|
||||
{self.n_estimators})"
|
||||
)
|
||||
# the rest of parameters are checked in estimator
|
||||
check_classification_targets(y)
|
||||
X, y = check_X_y(X, y)
|
||||
sample_weight = _check_sample_weight(
|
||||
@ -90,12 +87,13 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
# Initialize computed parameters
|
||||
# Build the estimator
|
||||
self.n_features_in_ = X.shape[1]
|
||||
self.n_features = X.shape[1]
|
||||
self.n_features_ = X.shape[1]
|
||||
self.max_features_ = self._initialize_max_features()
|
||||
self._validate_estimator()
|
||||
self.classes_, y = np.unique(y, return_inverse=True)
|
||||
self.n_classes_ = self.classes_.shape[0]
|
||||
self.estimators_ = []
|
||||
self.subspaces_ = []
|
||||
self._train(X, y, sample_weight)
|
||||
return self
|
||||
|
||||
@ -109,16 +107,19 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
for _ in range(self.n_estimators):
|
||||
# Build clf
|
||||
clf = clone(self.base_estimator_)
|
||||
# clf.set_params(**self.estimator_params)
|
||||
self.estimators_.append(clf)
|
||||
# bootstrap
|
||||
indices = random_box.randint(0, n_samples, boot_samples)
|
||||
# update weights with the chosen samples
|
||||
weights_update = np.bincount(indices, minlength=n_samples)
|
||||
features = self.get_subspace(X, y)
|
||||
features = self._get_random_subspace(X, y)
|
||||
self.subspaces_.append(features)
|
||||
current_weights = weights * weights_update
|
||||
# train the classifier
|
||||
clf.fit(X[indices, features], y[indices], current_weights[indices])
|
||||
bootstrap = X[indices, :]
|
||||
clf.fit(
|
||||
bootstrap[:, features], y[indices], current_weights[indices]
|
||||
)
|
||||
|
||||
def _get_bootstrap_n_samples(self, n_samples) -> int:
|
||||
if self.max_samples is None:
|
||||
@ -157,7 +158,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
elif self.max_features is None:
|
||||
max_features = self.n_features_
|
||||
elif isinstance(self.max_features, int):
|
||||
max_features = self.max_features
|
||||
max_features = abs(self.max_features)
|
||||
else: # float
|
||||
if self.max_features > 0.0:
|
||||
max_features = max(
|
||||
@ -171,7 +172,7 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
)
|
||||
return max_features
|
||||
|
||||
def _get_subspaces_set(
|
||||
def _get_random_subspace(
|
||||
self, dataset: np.array, labels: np.array
|
||||
) -> np.array:
|
||||
features = range(dataset.shape[1])
|
||||
@ -182,12 +183,6 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
else:
|
||||
return features_sets[0]
|
||||
|
||||
def get_subspace(self, dataset: np.array, labels: np.array) -> list:
|
||||
"""Return the best subspace to build a tree
|
||||
"""
|
||||
indices = self._get_subspaces_set(dataset, labels)
|
||||
return dataset[:, indices], indices
|
||||
|
||||
def predict(self, X: np.array) -> np.array:
|
||||
proba = self.predict_proba(X)
|
||||
return self.classes_.take((np.argmax(proba, axis=1)), axis=0)
|
||||
@ -196,10 +191,15 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
check_is_fitted(self, ["estimators_"])
|
||||
# Input validation
|
||||
X = check_array(X)
|
||||
for tree in self.estimators_:
|
||||
if self.n_features_ != X.shape[1]:
|
||||
raise ValueError("Number of features of the model must "
|
||||
"match the input. Model n_features is {0} and "
|
||||
"input n_features is {1}."
|
||||
"".format(self.n_features_, X.shape[1]))
|
||||
for tree, features in zip(self.estimators_, self.subspaces_):
|
||||
n_samples = X.shape[0]
|
||||
result = np.zeros((n_samples, self.n_classes_))
|
||||
predictions = tree.predict(X)
|
||||
predictions = tree.predict(X[:, features])
|
||||
for i in range(n_samples):
|
||||
result[i, predictions[i]] += 1
|
||||
return result
|
||||
@ -207,8 +207,6 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
||||
def score(
|
||||
self, X: np.array, y: np.array, sample_weight: np.array = None
|
||||
) -> float:
|
||||
# todo
|
||||
check_is_fitted(self, ["estimators_"])
|
||||
check_classification_targets(y)
|
||||
X, y = check_X_y(X, y)
|
||||
y_pred = self.predict(X).reshape(y.shape)
|
||||
|
@ -37,6 +37,28 @@ class Odte_test(unittest.TestCase):
|
||||
computed = tclf._initialize_sample_weight(value, m)
|
||||
self.assertListEqual(expected.tolist(), computed.tolist())
|
||||
|
||||
def test_initialize_max_feature(self):
|
||||
expected_values = [
|
||||
[0, 4, 10, 11],
|
||||
[0, 2, 3, 5, 14, 15],
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
[0, 4, 10, 11],
|
||||
[0, 4, 10, 11],
|
||||
[0, 4, 10, 11],
|
||||
]
|
||||
X, y = load_dataset(
|
||||
random_state=self._random_state, n_features=16, n_samples=10
|
||||
)
|
||||
for max_features in [4, 0.4, 1.0, None, "auto", "sqrt", "log2"]:
|
||||
tclf = Odte(
|
||||
random_state=self._random_state, max_features=max_features
|
||||
)
|
||||
tclf.fit(X, y)
|
||||
computed = tclf._get_random_subspace(X, y)
|
||||
expected = expected_values.pop(0)
|
||||
self.assertListEqual(expected, list(computed))
|
||||
|
||||
def test_initialize_random(self):
|
||||
expected = [37, 235, 908]
|
||||
tclf = Odte(random_state=self._random_state)
|
||||
@ -51,6 +73,13 @@ class Odte_test(unittest.TestCase):
|
||||
self.assertGreaterEqual(value, 101)
|
||||
self.assertLessEqual(value, 1000)
|
||||
|
||||
def test_bogus_max_features(self):
|
||||
values = ["duck", -0.1, 0.0]
|
||||
for max_features in values:
|
||||
with self.assertRaises(ValueError):
|
||||
tclf = Odte(max_features=max_features)
|
||||
tclf.fit(*load_dataset(self._random_state))
|
||||
|
||||
def test_bogus_n_estimator(self):
|
||||
values = [0, -1, 2]
|
||||
for n_estimators in values:
|
||||
@ -79,9 +108,7 @@ class Odte_test(unittest.TestCase):
|
||||
X, y = load_dataset(self._random_state)
|
||||
expected = y
|
||||
tclf = Odte(
|
||||
random_state=self._random_state,
|
||||
max_features=None,
|
||||
max_samples=0.1,
|
||||
random_state=self._random_state, max_features=1.0, max_samples=0.1,
|
||||
)
|
||||
tclf.set_params(**dict(base_estimator__kernel="linear",))
|
||||
computed = tclf.fit(X, y).predict(X)
|
||||
@ -101,8 +128,8 @@ class Odte_test(unittest.TestCase):
|
||||
def test_score_splitter_max_features(self):
|
||||
X, y = load_dataset(self._random_state, n_features=12, n_samples=150)
|
||||
results = [
|
||||
0.9866666666666667,
|
||||
0.9866666666666667,
|
||||
0.6466666666666666,
|
||||
0.6466666666666666,
|
||||
0.9866666666666667,
|
||||
0.9866666666666667,
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user