mirror of
https://github.com/Doctorado-ML/Odte.git
synced 2025-07-11 08:12:06 +00:00
First try to fix initialization issue
This commit is contained in:
parent
aff96bb97d
commit
cd7c7f3938
73
odte/Odte.py
73
odte/Odte.py
@ -26,6 +26,35 @@ from stree import Stree # type: ignore
|
|||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
def _parallel_build_tree(
|
||||||
|
base_estimator_: Stree,
|
||||||
|
X: np.ndarray,
|
||||||
|
y: np.ndarray,
|
||||||
|
weights: np.ndarray,
|
||||||
|
random_box: np.random.mtrand.RandomState,
|
||||||
|
random_seed: int,
|
||||||
|
boot_samples: int,
|
||||||
|
max_features: int,
|
||||||
|
hyperparams: str,
|
||||||
|
) -> Tuple[BaseEstimator, Tuple[int, ...]]:
|
||||||
|
clf = base_estimator_
|
||||||
|
hyperparams_ = json.loads(hyperparams)
|
||||||
|
hyperparams_.update(dict(random_state=random_seed))
|
||||||
|
clf.set_params(**hyperparams_)
|
||||||
|
n_samples = X.shape[0]
|
||||||
|
# bootstrap
|
||||||
|
indices = random_box.randint(0, n_samples, boot_samples)
|
||||||
|
# update weights with the chosen samples
|
||||||
|
weights_update = np.bincount(indices, minlength=n_samples)
|
||||||
|
current_weights = weights * weights_update
|
||||||
|
# random subspace
|
||||||
|
features = Odte._get_random_subspace(X, y, max_features)
|
||||||
|
# train the classifier
|
||||||
|
bootstrap = X[indices, :]
|
||||||
|
clf.fit(bootstrap[:, features], y[indices], current_weights[indices])
|
||||||
|
return (clf, features)
|
||||||
|
|
||||||
|
|
||||||
class Odte(BaseEnsemble, ClassifierMixin):
|
class Odte(BaseEnsemble, ClassifierMixin):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -109,45 +138,18 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
self.leaves_ = tleaves / self.n_estimators
|
self.leaves_ = tleaves / self.n_estimators
|
||||||
self.nodes_ = tnodes / self.n_estimators
|
self.nodes_ = tnodes / self.n_estimators
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _parallel_build_tree(
|
|
||||||
base_estimator_: Stree,
|
|
||||||
X: np.ndarray,
|
|
||||||
y: np.ndarray,
|
|
||||||
weights: np.ndarray,
|
|
||||||
random_box: np.random.mtrand.RandomState,
|
|
||||||
random_seed: int,
|
|
||||||
boot_samples: int,
|
|
||||||
max_features: int,
|
|
||||||
hyperparams: str,
|
|
||||||
) -> Tuple[BaseEstimator, Tuple[int, ...]]:
|
|
||||||
clf = clone(base_estimator_)
|
|
||||||
hyperparams_ = json.loads(hyperparams)
|
|
||||||
hyperparams_.update(dict(random_state=random_seed))
|
|
||||||
clf.set_params(**hyperparams_)
|
|
||||||
n_samples = X.shape[0]
|
|
||||||
# bootstrap
|
|
||||||
indices = random_box.randint(0, n_samples, boot_samples)
|
|
||||||
# update weights with the chosen samples
|
|
||||||
weights_update = np.bincount(indices, minlength=n_samples)
|
|
||||||
current_weights = weights * weights_update
|
|
||||||
# random subspace
|
|
||||||
features = Odte._get_random_subspace(X, y, max_features)
|
|
||||||
# train the classifier
|
|
||||||
bootstrap = X[indices, :]
|
|
||||||
clf.fit(bootstrap[:, features], y[indices], current_weights[indices])
|
|
||||||
return (clf, features)
|
|
||||||
|
|
||||||
def _train(
|
def _train(
|
||||||
self, X: np.ndarray, y: np.ndarray, weights: np.ndarray
|
self, X: np.ndarray, y: np.ndarray, weights: np.ndarray
|
||||||
) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
|
) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
|
||||||
random_box = self._initialize_random()
|
random_box = self._initialize_random()
|
||||||
n_samples = X.shape[0]
|
n_samples = X.shape[0]
|
||||||
boot_samples = self._get_bootstrap_n_samples(n_samples)
|
boot_samples = self._get_bootstrap_n_samples(n_samples)
|
||||||
clf = clone(self.base_estimator_)
|
estimator = []
|
||||||
|
for i in range(self.n_estimators):
|
||||||
|
estimator.append(clone(self.base_estimator_))
|
||||||
return Parallel(n_jobs=self.n_jobs, prefer="threads")( # type: ignore
|
return Parallel(n_jobs=self.n_jobs, prefer="threads")( # type: ignore
|
||||||
delayed(Odte._parallel_build_tree)(
|
delayed(_parallel_build_tree)(
|
||||||
clf,
|
estimator[i],
|
||||||
X,
|
X,
|
||||||
y,
|
y,
|
||||||
weights,
|
weights,
|
||||||
@ -157,8 +159,11 @@ class Odte(BaseEnsemble, ClassifierMixin):
|
|||||||
self.max_features_,
|
self.max_features_,
|
||||||
self.be_hyperparams,
|
self.be_hyperparams,
|
||||||
)
|
)
|
||||||
for random_seed in range(
|
for random_seed, i in zip(
|
||||||
self.random_state, self.random_state + self.n_estimators
|
range(
|
||||||
|
self.random_state, self.random_state + self.n_estimators
|
||||||
|
),
|
||||||
|
range(self.n_estimators),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.3.1"
|
__version__ = "0.3.2"
|
||||||
|
@ -257,3 +257,15 @@ class Odte_test(unittest.TestCase):
|
|||||||
def test_version(self):
|
def test_version(self):
|
||||||
tclf = Odte()
|
tclf = Odte()
|
||||||
self.assertEqual(__version__, tclf.version())
|
self.assertEqual(__version__, tclf.version())
|
||||||
|
|
||||||
|
def test_parallel_score(self):
|
||||||
|
tclf_p = Odte(
|
||||||
|
n_jobs=-1, random_state=self._random_state, n_estimators=30
|
||||||
|
)
|
||||||
|
tclf_s = Odte(
|
||||||
|
n_jobs=1, random_state=self._random_state, n_estimators=30
|
||||||
|
)
|
||||||
|
X, y = load_dataset(self._random_state, n_features=56, n_samples=1500)
|
||||||
|
tclf_p.fit(X, y)
|
||||||
|
tclf_s.fit(X, y)
|
||||||
|
self.assertAlmostEqual(tclf_p.score(X, y), tclf_s.score(X, y))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user