From cd7c7f3938291991c23f90936570021eb2339452 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= <rmontanana@gmail.com>
Date: Tue, 22 Feb 2022 20:40:35 +0100
Subject: [PATCH] First try to fix initialization issue

---
 odte/Odte.py             | 73 +++++++++++++++++++++-------------------
 odte/_version.py         |  2 +-
 odte/tests/Odte_tests.py | 12 +++++++
 3 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/odte/Odte.py b/odte/Odte.py
index 036be0a..4b2809f 100644
--- a/odte/Odte.py
+++ b/odte/Odte.py
@@ -26,6 +26,35 @@ from stree import Stree  # type: ignore
 from ._version import __version__
 
 
+def _parallel_build_tree(
+    base_estimator_: Stree,
+    X: np.ndarray,
+    y: np.ndarray,
+    weights: np.ndarray,
+    random_box: np.random.mtrand.RandomState,
+    random_seed: int,
+    boot_samples: int,
+    max_features: int,
+    hyperparams: str,
+) -> Tuple[BaseEstimator, Tuple[int, ...]]:
+    clf = base_estimator_
+    hyperparams_ = json.loads(hyperparams)
+    hyperparams_.update(dict(random_state=random_seed))
+    clf.set_params(**hyperparams_)
+    n_samples = X.shape[0]
+    # bootstrap
+    indices = random_box.randint(0, n_samples, boot_samples)
+    # update weights with the chosen samples
+    weights_update = np.bincount(indices, minlength=n_samples)
+    current_weights = weights * weights_update
+    # random subspace
+    features = Odte._get_random_subspace(X, y, max_features)
+    # train the classifier
+    bootstrap = X[indices, :]
+    clf.fit(bootstrap[:, features], y[indices], current_weights[indices])
+    return (clf, features)
+
+
 class Odte(BaseEnsemble, ClassifierMixin):
     def __init__(
         self,
@@ -109,45 +138,18 @@ class Odte(BaseEnsemble, ClassifierMixin):
         self.leaves_ = tleaves / self.n_estimators
         self.nodes_ = tnodes / self.n_estimators
 
-    @staticmethod
-    def _parallel_build_tree(
-        base_estimator_: Stree,
-        X: np.ndarray,
-        y: np.ndarray,
-        weights: np.ndarray,
-        random_box: np.random.mtrand.RandomState,
-        random_seed: int,
-        boot_samples: int,
-        max_features: int,
-        hyperparams: str,
-    ) -> Tuple[BaseEstimator, Tuple[int, ...]]:
-        clf = clone(base_estimator_)
-        hyperparams_ = json.loads(hyperparams)
-        hyperparams_.update(dict(random_state=random_seed))
-        clf.set_params(**hyperparams_)
-        n_samples = X.shape[0]
-        # bootstrap
-        indices = random_box.randint(0, n_samples, boot_samples)
-        # update weights with the chosen samples
-        weights_update = np.bincount(indices, minlength=n_samples)
-        current_weights = weights * weights_update
-        # random subspace
-        features = Odte._get_random_subspace(X, y, max_features)
-        # train the classifier
-        bootstrap = X[indices, :]
-        clf.fit(bootstrap[:, features], y[indices], current_weights[indices])
-        return (clf, features)
-
     def _train(
         self, X: np.ndarray, y: np.ndarray, weights: np.ndarray
     ) -> Tuple[List[BaseEstimator], List[Tuple[int, ...]]]:
         random_box = self._initialize_random()
         n_samples = X.shape[0]
         boot_samples = self._get_bootstrap_n_samples(n_samples)
-        clf = clone(self.base_estimator_)
+        estimator = []
+        for i in range(self.n_estimators):
+            estimator.append(clone(self.base_estimator_))
         return Parallel(n_jobs=self.n_jobs, prefer="threads")(  # type: ignore
-            delayed(Odte._parallel_build_tree)(
-                clf,
+            delayed(_parallel_build_tree)(
+                estimator[i],
                 X,
                 y,
                 weights,
@@ -157,8 +159,11 @@ class Odte(BaseEnsemble, ClassifierMixin):
                 self.max_features_,
                 self.be_hyperparams,
             )
-            for random_seed in range(
-                self.random_state, self.random_state + self.n_estimators
+            for random_seed, i in zip(
+                range(
+                    self.random_state, self.random_state + self.n_estimators
+                ),
+                range(self.n_estimators),
             )
         )
 
diff --git a/odte/_version.py b/odte/_version.py
index 260c070..f9aa3e1 100644
--- a/odte/_version.py
+++ b/odte/_version.py
@@ -1 +1 @@
-__version__ = "0.3.1"
+__version__ = "0.3.2"
diff --git a/odte/tests/Odte_tests.py b/odte/tests/Odte_tests.py
index f19f063..21abf07 100644
--- a/odte/tests/Odte_tests.py
+++ b/odte/tests/Odte_tests.py
@@ -257,3 +257,15 @@ class Odte_test(unittest.TestCase):
     def test_version(self):
         tclf = Odte()
         self.assertEqual(__version__, tclf.version())
+
+    def test_parallel_score(self):
+        tclf_p = Odte(
+            n_jobs=-1, random_state=self._random_state, n_estimators=30
+        )
+        tclf_s = Odte(
+            n_jobs=1, random_state=self._random_state, n_estimators=30
+        )
+        X, y = load_dataset(self._random_state, n_features=56, n_samples=1500)
+        tclf_p.fit(X, y)
+        tclf_s.fit(X, y)
+        self.assertAlmostEqual(tclf_p.score(X, y), tclf_s.score(X, y))