Implement optimized predict and new predict_proba

This commit is contained in:
2022-05-31 19:12:48 +02:00
parent 65923af9b4
commit 0a78d5be67
3 changed files with 58 additions and 52 deletions

View File

@@ -135,7 +135,7 @@ class Snode:
if not self.is_leaf(): if not self.is_leaf():
return return
classes, card = np.unique(self._y, return_counts=True) classes, card = np.unique(self._y, return_counts=True)
self._proba = np.zeros((num_classes,)) self._proba = np.zeros((num_classes,), dtype=np.int64)
for c, n in zip(classes, card): for c, n in zip(classes, card):
self._proba[c] = n self._proba[c] = n
try: try:

View File

@@ -367,28 +367,66 @@ class Stree(BaseEstimator, ClassifierMixin):
) )
) )
@staticmethod def __predict_class(self, X: np.array) -> np.array:
def _reorder_results(y: np.array, indices: np.array) -> np.array: def compute_prediction(xp, indices, node):
"""Reorder an array based on the array of indices passed if xp is None:
return
if node.is_leaf():
# set a class for indices
result[indices] = node._proba
return
self.splitter_.partition(xp, node, train=False)
x_u, x_d = self.splitter_.part(xp)
i_u, i_d = self.splitter_.part(indices)
compute_prediction(x_u, i_u, node.get_up())
compute_prediction(x_d, i_d, node.get_down())
# setup prediction & make it happen
result = np.zeros((X.shape[0], self.n_classes_))
indices = np.arange(X.shape[0])
compute_prediction(X, indices, self.tree_)
return result
def check_predict(self, X) -> np.array:
check_is_fitted(self, ["tree_"])
# Input validation
X = check_array(X)
if X.shape[1] != self.n_features_:
raise ValueError(
f"Expected {self.n_features_} features but got "
f"({X.shape[1]})"
)
return X
def predict_proba(self, X: np.array) -> np.array:
"""Predict class probabilities of the input samples X.
The predicted class probability is the fraction of samples of the same
class in a leaf.
Parameters Parameters
---------- ----------
y : np.array X : dataset of samples.
data untidy
indices : np.array
indices used to set order
Returns Returns
------- -------
np.array proba : array of shape (n_samples, n_classes)
array y ordered The class probabilities of the input samples.
Raises
------
ValueError
if dataset with inconsistent number of features
NotFittedError
if model is not fitted
""" """
# return array of same type given in y
y_ordered = y.copy() X = self.check_predict(X)
indices = indices.astype(int) # return # of samples of each class in leaf node
for i, index in enumerate(indices): values = self.__predict_class(X)
y_ordered[index] = y[i] normalizer = values.sum(axis=1)[:, np.newaxis]
return y_ordered normalizer[normalizer == 0.0] = 1.0
return values / normalizer
def predict(self, X: np.array) -> np.array: def predict(self, X: np.array) -> np.array:
"""Predict labels for each sample in dataset passed """Predict labels for each sample in dataset passed
@@ -410,40 +448,8 @@ class Stree(BaseEstimator, ClassifierMixin):
NotFittedError NotFittedError
if model is not fitted if model is not fitted
""" """
X = self.check_predict(X)
def predict_class( return self.classes_[np.argmax(self.__predict_class(X), axis=1)]
xp: np.array, indices: np.array, node: Snode
) -> np.array:
if xp is None:
return [], []
if node.is_leaf():
# set a class for every sample in dataset
prediction = np.full((xp.shape[0], 1), node._class)
return prediction, indices
self.splitter_.partition(xp, node, train=False)
x_u, x_d = self.splitter_.part(xp)
i_u, i_d = self.splitter_.part(indices)
prx_u, prin_u = predict_class(x_u, i_u, node.get_up())
prx_d, prin_d = predict_class(x_d, i_d, node.get_down())
return np.append(prx_u, prx_d), np.append(prin_u, prin_d)
# sklearn check
check_is_fitted(self, ["tree_"])
# Input validation
X = check_array(X)
if X.shape[1] != self.n_features_:
raise ValueError(
f"Expected {self.n_features_} features but got "
f"({X.shape[1]})"
)
# setup prediction & make it happen
indices = np.arange(X.shape[0])
result = (
self._reorder_results(*predict_class(X, indices, self.tree_))
.astype(int)
.ravel()
)
return self.classes_[result]
def nodes_leaves(self) -> tuple: def nodes_leaves(self) -> tuple:
"""Compute the number of nodes and leaves in the built tree """Compute the number of nodes and leaves in the built tree

View File

@@ -695,7 +695,7 @@ class Stree_test(unittest.TestCase):
) )
expected_tail = ( expected_tail = (
' [shape=box style=filled label="class=1 impurity=0.000 ' ' [shape=box style=filled label="class=1 impurity=0.000 '
'counts=[0. 1. 0.]"];\n}\n' 'counts=[0 1 0]"];\n}\n'
) )
self.assertEqual(clf.graph(), expected_head + "}\n") self.assertEqual(clf.graph(), expected_head + "}\n")
clf.fit(X, y) clf.fit(X, y)
@@ -715,7 +715,7 @@ class Stree_test(unittest.TestCase):
) )
expected_tail = ( expected_tail = (
' [shape=box style=filled label="class=1 impurity=0.000 ' ' [shape=box style=filled label="class=1 impurity=0.000 '
'counts=[0. 1. 0.]"];\n}\n' 'counts=[0 1 0]"];\n}\n'
) )
self.assertEqual(clf.graph("Sample title"), expected_head + "}\n") self.assertEqual(clf.graph("Sample title"), expected_head + "}\n")
clf.fit(X, y) clf.fit(X, y)