mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-16 07:56:06 +00:00
Implement predict_proba with test.
Fix tree overload with dataset in nodes only needed in tests
This commit is contained in:
@@ -6,6 +6,7 @@ __version__ = "0.9"
|
||||
Node of the Stree (binary tree)
|
||||
'''
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from sklearn.svm import LinearSVC
|
||||
|
||||
@@ -17,11 +18,12 @@ class Snode:
|
||||
self._interceptor = 0. if clf is None else clf.intercept_
|
||||
self._title = title
|
||||
self._belief = 0. # belief of the prediction in a leaf node based on samples
|
||||
self._X = X
|
||||
self._X = X if os.environ.get(
|
||||
'TESTING', 'Not Set') != 'Not Set' else None
|
||||
self._y = y
|
||||
self._down = None
|
||||
self._up = None
|
||||
self._class = None # really needed?
|
||||
self._class = None
|
||||
|
||||
def set_down(self, son):
|
||||
self._down = son
|
||||
@@ -42,6 +44,9 @@ class Snode:
|
||||
"""Compute the class of the predictor and its belief based on the subdataset of the node
|
||||
only if it is a leaf
|
||||
"""
|
||||
# Clean memory
|
||||
#self._X = None
|
||||
#self._y = None
|
||||
if not self.is_leaf():
|
||||
return
|
||||
classes, card = np.unique(self._y, return_counts=True)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# This Python file uses the following encoding: utf-8
|
||||
'''
|
||||
__author__ = "Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
||||
@@ -10,23 +11,37 @@ Uses LinearSVC
|
||||
import numpy as np
|
||||
import typing
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
|
||||
|
||||
from trees.Snode import Snode
|
||||
|
||||
|
||||
class Stree:
|
||||
class Stree(BaseEstimator, ClassifierMixin):
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self, C=1.0, max_iter: int = 1000, random_state: int = 0, use_predictions: bool = False):
|
||||
def __init__(self, C=1.0, max_iter: int=1000, random_state: int=0, use_predictions: bool=False):
|
||||
self._max_iter = max_iter
|
||||
self._C = C
|
||||
self._random_state = random_state
|
||||
self._outcomes = None
|
||||
self._tree = None
|
||||
self.__folder = 'data/'
|
||||
self.__use_predictions = use_predictions
|
||||
self.__trained = False
|
||||
self.__proba = False
|
||||
|
||||
def get_params(self, deep=True):
|
||||
"""Get dict with hyperparameters and its values to accomplish sklearn rules
|
||||
"""
|
||||
return {"C": self._C, "random_state": self._random_state, 'max_iter': self._max_iter}
|
||||
|
||||
def set_params(self, **parameters):
|
||||
"""Set hyperparmeters as specified by sklearn, needed in Gridsearchs
|
||||
"""
|
||||
for parameter, value in parameters.items():
|
||||
setattr(self, parameter, value)
|
||||
return self
|
||||
|
||||
def _split_data(self, clf: LinearSVC, X: np.ndarray, y: np.ndarray) -> list:
|
||||
if self.__use_predictions:
|
||||
@@ -47,6 +62,8 @@ class Stree:
|
||||
return [X_up, y_up, X_down, y_down]
|
||||
|
||||
def fit(self, X: np.ndarray, y: np.ndarray, title: str = 'root') -> 'Stree':
|
||||
X, y = check_X_y(X, y)
|
||||
self.n_features_in_ = X.shape[1]
|
||||
self._tree = self.train(X, y.ravel(), title)
|
||||
self._build_predictor()
|
||||
self.__trained = True
|
||||
@@ -83,16 +100,31 @@ class Stree:
|
||||
def predict(self, X: np.array) -> np.array:
|
||||
def predict_class(xp: np.array, tree: Snode) -> np.array:
|
||||
if tree.is_leaf():
|
||||
return tree._class
|
||||
if self.__proba:
|
||||
return [tree._class, tree._belief]
|
||||
else:
|
||||
return tree._class
|
||||
coef = tree._vector[0, :].reshape(-1, xp.shape[1])
|
||||
if xp.dot(coef.T) + tree._interceptor[0] > 0:
|
||||
return predict_class(xp, tree.get_down())
|
||||
return predict_class(xp, tree.get_up())
|
||||
|
||||
# sklearn check
|
||||
check_is_fitted(self)
|
||||
# Input validation
|
||||
X = check_array(X)
|
||||
# setup prediction & make it happen
|
||||
y = np.array([], dtype=int)
|
||||
for xp in X:
|
||||
y = np.append(y, predict_class(xp.reshape(-1, X.shape[1]), self._tree))
|
||||
return y
|
||||
|
||||
def predict_proba(self, X: np.array) -> np.array:
|
||||
self.__proba = True
|
||||
result = self.predict(X).reshape(X.shape[0], 2)
|
||||
self.__proba = False
|
||||
return result
|
||||
|
||||
def score(self, X: np.array, y: np.array, print_out=True) -> float:
|
||||
if not self.__trained:
|
||||
self.fit(X, y)
|
||||
|
Reference in New Issue
Block a user