Implement ovo strategy (#37)

* Implement ovo strategy
* Set ovo strategy as default
* Add kernel liblinear with LinearSVC classifier
* Fix weak test
This commit is contained in:
Ricardo Montañana Gómez
2021-05-10 12:16:53 +02:00
committed by GitHub
parent 5cef0f4875
commit 4f04e72670
6 changed files with 252 additions and 92 deletions

View File

@@ -34,22 +34,23 @@ Can be found in
## Hyperparameters ## Hyperparameters
| | **Hyperparameter** | **Type/Values** | **Default** | **Meaning** | | | **Hyperparameter** | **Type/Values** | **Default** | **Meaning** |
| --- | ------------------ | ------------------------------------------------------ | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --- | ------------------- | ------------------------------------------------------ | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| \* | C | \<float\> | 1.0 | Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. | | \* | C | \<float\> | 1.0 | Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. |
| \* | kernel | {"linear", "poly", "rbf", "sigmoid"} | linear | Specifies the kernel type to be used in the algorithm. It must be one of linear, poly or rbf. | | \* | kernel | {"liblinear", "linear", "poly", "rbf", "sigmoid"} | linear | Specifies the kernel type to be used in the algorithm. It must be one of liblinear, linear, poly or rbf. liblinear uses [liblinear](https://www.csie.ntu.edu.tw/~cjlin/liblinear/) library and the rest uses [libsvm](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) library through scikit-learn library |
| \* | max_iter | \<int\> | 1e5 | Hard limit on iterations within solver, or -1 for no limit. | | \* | max_iter | \<int\> | 1e5 | Hard limit on iterations within solver, or -1 for no limit. |
| \* | random_state | \<int\> | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.<br>Pass an int for reproducible output across multiple function calls | | \* | random_state | \<int\> | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.<br>Pass an int for reproducible output across multiple function calls |
| | max_depth | \<int\> | None | Specifies the maximum depth of the tree | | | max_depth | \<int\> | None | Specifies the maximum depth of the tree |
| \* | tol | \<float\> | 1e-4 | Tolerance for stopping criterion. | | \* | tol | \<float\> | 1e-4 | Tolerance for stopping criterion. |
| \* | degree | \<int\> | 3 | Degree of the polynomial kernel function (poly). Ignored by all other kernels. | | \* | degree | \<int\> | 3 | Degree of the polynomial kernel function (poly). Ignored by all other kernels. |
| \* | gamma | {"scale", "auto"} or \<float\> | scale | Kernel coefficient for rbf and poly.<br>if gamma='scale' (default) is passed then it uses 1 / (n_features \* X.var()) as value of gamma,<br>if auto, uses 1 / n_features. | | \* | gamma | {"scale", "auto"} or \<float\> | scale | Kernel coefficient for rbf and poly.<br>if gamma='scale' (default) is passed then it uses 1 / (n_features \* X.var()) as value of gamma,<br>if auto, uses 1 / n_features. |
| | split_criteria | {"impurity", "max_samples"} | impurity | Decides (just in case of a multi class classification) which column (class) use to split the dataset in a node\*\* | | | split_criteria | {"impurity", "max_samples"} | impurity | Decides (just in case of a multi class classification) which column (class) use to split the dataset in a node\*\*. max_samples is incompatible with 'ovo' multiclass_strategy |
| | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features). <br>Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. | | | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features). <br>Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. |
| | min_samples_split | \<int\> | 0 | The minimum number of samples required to split an internal node. 0 (default) for any | | | min_samples_split | \<int\> | 0 | The minimum number of samples required to split an internal node. 0 (default) for any |
| | max_features | \<int\>, \<float\> <br><br>or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:<br>If int, then consider max_features features at each split.<br>If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.<br>If “auto”, then max_features=sqrt(n_features).<br>If “sqrt”, then max_features=sqrt(n_features).<br>If “log2”, then max_features=log2(n_features).<br>If None, then max_features=n_features. | | | max_features | \<int\>, \<float\> <br><br>or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:<br>If int, then consider max_features features at each split.<br>If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.<br>If “auto”, then max_features=sqrt(n_features).<br>If “sqrt”, then max_features=sqrt(n_features).<br>If “log2”, then max_features=log2(n_features).<br>If None, then max_features=n_features. |
| | splitter | {"best", "random"} | random | The strategy used to choose the feature set at each node (only used if max_features < num_features). <br>Supported strategies are “best” to choose the best feature set and “random” to choose a random combination. <br>The algorithm generates 5 candidates at most to choose from if random is selected. If best is selected sklearn SelectKBest algorithm is used in every node to choose the _max_features_ best features | | | splitter | {"best", "random", "mutual"} | "random" | The strategy used to choose the feature set at each node (only used if max_features < num_features). Supported strategies are: **best”**: sklearn SelectKBest algorithm is used in every node to choose the max_features best features. **random”**: The algorithm generates 5 candidates and choose one randomly. **"mutual"**: Chooses the best features w.r.t. their mutual info with the label |
| | normalize | \<bool\> | False | If standardization of features should be applied on each node with the samples that reach it | | | normalize | \<bool\> | False | If standardization of features should be applied on each node with the samples that reach it |
| \* | multiclass_strategy | {"ovo", "ovr"} | "ovo" | Strategy to use with multiclass datasets, **"ovo"**: one versus one. **"ovr"**: one versus rest |
\* Hyperparameter used by the support vector classifier of every node \* Hyperparameter used by the support vector classifier of every node

View File

@@ -1,21 +1,22 @@
# Hyperparameters ## Hyperparameters
| | **Hyperparameter** | **Type/Values** | **Default** | **Meaning** | | | **Hyperparameter** | **Type/Values** | **Default** | **Meaning** |
| --- | ------------------ | ------------------------------------------------------ | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | --- | ------------------- | ------------------------------------------------------ | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| \* | C | \<float\> | 1.0 | Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. | | \* | C | \<float\> | 1.0 | Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. |
| \* | kernel | {"linear", "poly", "rbf"} | linear | Specifies the kernel type to be used in the algorithm. It must be one of linear, poly or rbf. | | \* | kernel | {"liblinear", "linear", "poly", "rbf", "sigmoid"} | linear | Specifies the kernel type to be used in the algorithm. It must be one of liblinear, linear, poly or rbf. liblinear uses [liblinear](https://www.csie.ntu.edu.tw/~cjlin/liblinear/) library and the rest uses [libsvm](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) library through scikit-learn library |
| \* | max_iter | \<int\> | 1e5 | Hard limit on iterations within solver, or -1 for no limit. | | \* | max_iter | \<int\> | 1e5 | Hard limit on iterations within solver, or -1 for no limit. |
| \* | random_state | \<int\> | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.<br>Pass an int for reproducible output across multiple function calls | | \* | random_state | \<int\> | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.<br>Pass an int for reproducible output across multiple function calls |
| | max_depth | \<int\> | None | Specifies the maximum depth of the tree | | | max_depth | \<int\> | None | Specifies the maximum depth of the tree |
| \* | tol | \<float\> | 1e-4 | Tolerance for stopping criterion. | | \* | tol | \<float\> | 1e-4 | Tolerance for stopping criterion. |
| \* | degree | \<int\> | 3 | Degree of the polynomial kernel function (poly). Ignored by all other kernels. | | \* | degree | \<int\> | 3 | Degree of the polynomial kernel function (poly). Ignored by all other kernels. |
| \* | gamma | {"scale", "auto"} or \<float\> | scale | Kernel coefficient for rbf and poly.<br>if gamma='scale' (default) is passed then it uses 1 / (n_features \* X.var()) as value of gamma,<br>if auto, uses 1 / n_features. | | \* | gamma | {"scale", "auto"} or \<float\> | scale | Kernel coefficient for rbf and poly.<br>if gamma='scale' (default) is passed then it uses 1 / (n_features \* X.var()) as value of gamma,<br>if auto, uses 1 / n_features. |
| | split_criteria | {"impurity", "max_samples"} | impurity | Decides (just in case of a multi class classification) which column (class) use to split the dataset in a node\*\* | | | split_criteria | {"impurity", "max_samples"} | impurity | Decides (just in case of a multi class classification) which column (class) use to split the dataset in a node\*\*. max_samples is incompatible with 'ovo' multiclass_strategy |
| | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features). <br>Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. | | | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features). <br>Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. |
| | min_samples_split | \<int\> | 0 | The minimum number of samples required to split an internal node. 0 (default) for any | | | min_samples_split | \<int\> | 0 | The minimum number of samples required to split an internal node. 0 (default) for any |
| | max_features | \<int\>, \<float\> <br><br>or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:<br>If int, then consider max_features features at each split.<br>If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.<br>If “auto”, then max_features=sqrt(n_features).<br>If “sqrt”, then max_features=sqrt(n_features).<br>If “log2”, then max_features=log2(n_features).<br>If None, then max_features=n_features. | | | max_features | \<int\>, \<float\> <br><br>or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:<br>If int, then consider max_features features at each split.<br>If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.<br>If “auto”, then max_features=sqrt(n_features).<br>If “sqrt”, then max_features=sqrt(n_features).<br>If “log2”, then max_features=log2(n_features).<br>If None, then max_features=n_features. |
| | splitter | {"best", "random"} | random | The strategy used to choose the feature set at each node (only used if max_features != num_features). <br>Supported strategies are “best” to choose the best feature set and “random” to choose a random combination. <br>The algorithm generates 5 candidates at most to choose from in both strategies. | | | splitter | {"best", "random", "mutual"} | "random" | The strategy used to choose the feature set at each node (only used if max_features < num_features). Supported strategies are: **best”**: sklearn SelectKBest algorithm is used in every node to choose the max_features best features. **random”**: The algorithm generates 5 candidates and choose one randomly. **"mutual"**: Chooses the best features w.r.t. their mutual info with the label |
| | normalize | \<bool\> | False | If standardization of features should be applied on each node with the samples that reach it | | | normalize | \<bool\> | False | If standardization of features should be applied on each node with the samples that reach it |
| \* | multiclass_strategy | {"ovo", "ovr"} | "ovo" | Strategy to use with multiclass datasets, **"ovo"**: one versus one. **"ovr"**: one versus rest |
\* Hyperparameter used by the support vector classifier of every node \* Hyperparameter used by the support vector classifier of every node

View File

@@ -155,6 +155,10 @@ class Siterator:
self._stack = [] self._stack = []
self._push(tree) self._push(tree)
def __iter__(self):
# To complete the iterator interface
return self
def _push(self, node: Snode): def _push(self, node: Snode):
if node is not None: if node is not None:
self._stack.append(node) self._stack.append(node)
@@ -373,16 +377,17 @@ class Splitter:
tuple tuple
indices of the features selected indices of the features selected
""" """
# No feature reduction
if dataset.shape[1] == max_features: if dataset.shape[1] == max_features:
# No feature reduction applies
return tuple(range(dataset.shape[1])) return tuple(range(dataset.shape[1]))
# Random feature reduction
if self._feature_select == "random": if self._feature_select == "random":
features_sets = self._generate_spaces( features_sets = self._generate_spaces(
dataset.shape[1], max_features dataset.shape[1], max_features
) )
return self._select_best_set(dataset, labels, features_sets) return self._select_best_set(dataset, labels, features_sets)
# return the KBest features
if self._feature_select == "best": if self._feature_select == "best":
# Take KBest features
return ( return (
SelectKBest(k=max_features) SelectKBest(k=max_features)
.fit(dataset, labels) .fit(dataset, labels)
@@ -569,6 +574,7 @@ class Stree(BaseEstimator, ClassifierMixin):
min_samples_split: int = 0, min_samples_split: int = 0,
max_features=None, max_features=None,
splitter: str = "random", splitter: str = "random",
multiclass_strategy: str = "ovo",
normalize: bool = False, normalize: bool = False,
): ):
self.max_iter = max_iter self.max_iter = max_iter
@@ -585,6 +591,7 @@ class Stree(BaseEstimator, ClassifierMixin):
self.criterion = criterion self.criterion = criterion
self.splitter = splitter self.splitter = splitter
self.normalize = normalize self.normalize = normalize
self.multiclass_strategy = multiclass_strategy
def _more_tags(self) -> dict: def _more_tags(self) -> dict:
"""Required by sklearn to supply features of the classifier """Required by sklearn to supply features of the classifier
@@ -629,7 +636,23 @@ class Stree(BaseEstimator, ClassifierMixin):
f"Maximum depth has to be greater than 1... got (max_depth=\ f"Maximum depth has to be greater than 1... got (max_depth=\
{self.max_depth})" {self.max_depth})"
) )
kernels = ["linear", "rbf", "poly", "sigmoid"] if self.multiclass_strategy not in ["ovr", "ovo"]:
raise ValueError(
"mutliclass_strategy has to be either ovr or ovo"
f" but got {self.multiclass_strategy}"
)
if self.multiclass_strategy == "ovo":
if self.kernel == "liblinear":
raise ValueError(
"The kernel liblinear is incompatible with ovo "
"multiclass_strategy"
)
if self.split_criteria == "max_samples":
raise ValueError(
"The multiclass_strategy 'ovo' is incompatible with "
"split_criteria 'max_samples'"
)
kernels = ["liblinear", "linear", "rbf", "poly", "sigmoid"]
if self.kernel not in kernels: if self.kernel not in kernels:
raise ValueError(f"Kernel {self.kernel} not in {kernels}") raise ValueError(f"Kernel {self.kernel} not in {kernels}")
check_classification_targets(y) check_classification_targets(y)
@@ -749,7 +772,7 @@ class Stree(BaseEstimator, ClassifierMixin):
C=self.C, C=self.C,
tol=self.tol, tol=self.tol,
) )
if self.kernel == "linear" if self.kernel == "liblinear"
else SVC( else SVC(
kernel=self.kernel, kernel=self.kernel,
max_iter=self.max_iter, max_iter=self.max_iter,
@@ -758,6 +781,7 @@ class Stree(BaseEstimator, ClassifierMixin):
gamma=self.gamma, gamma=self.gamma,
degree=self.degree, degree=self.degree,
random_state=self.random_state, random_state=self.random_state,
decision_function_shape=self.multiclass_strategy,
) )
) )

View File

@@ -1,6 +1,6 @@
from .Strees import Stree, Snode, Siterator, Splitter from .Strees import Stree, Snode, Siterator, Splitter
__version__ = "1.0" __version__ = "1.1"
__author__ = "Ricardo Montañana Gómez" __author__ = "Ricardo Montañana Gómez"
__copyright__ = "Copyright 2020-2021, Ricardo Montañana Gómez" __copyright__ = "Copyright 2020-2021, Ricardo Montañana Gómez"

View File

@@ -8,7 +8,11 @@ from .utils import load_dataset
class Snode_test(unittest.TestCase): class Snode_test(unittest.TestCase):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self._random_state = 1 self._random_state = 1
self._clf = Stree(random_state=self._random_state) self._clf = Stree(
random_state=self._random_state,
kernel="liblinear",
multiclass_strategy="ovr",
)
self._clf.fit(*load_dataset(self._random_state)) self._clf.fit(*load_dataset(self._random_state))
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)

View File

@@ -14,7 +14,7 @@ from .utils import load_dataset
class Stree_test(unittest.TestCase): class Stree_test(unittest.TestCase):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self._random_state = 1 self._random_state = 1
self._kernels = ["linear", "rbf", "poly"] self._kernels = ["liblinear", "linear", "rbf", "poly", "sigmoid"]
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@classmethod @classmethod
@@ -22,10 +22,9 @@ class Stree_test(unittest.TestCase):
os.environ["TESTING"] = "1" os.environ["TESTING"] = "1"
def test_valid_kernels(self): def test_valid_kernels(self):
valid_kernels = ["linear", "rbf", "poly", "sigmoid"]
X, y = load_dataset() X, y = load_dataset()
for kernel in valid_kernels: for kernel in self._kernels:
clf = Stree(kernel=kernel) clf = Stree(kernel=kernel, multiclass_strategy="ovr")
clf.fit(X, y) clf.fit(X, y)
self.assertIsNotNone(clf.tree_) self.assertIsNotNone(clf.tree_)
@@ -55,14 +54,19 @@ class Stree_test(unittest.TestCase):
# i.e. The partition algorithm didn't forget any sample # i.e. The partition algorithm didn't forget any sample
self.assertEqual(node._y.shape[0], y_down.shape[0] + y_up.shape[0]) self.assertEqual(node._y.shape[0], y_down.shape[0] + y_up.shape[0])
unique_y, count_y = np.unique(node._y, return_counts=True) unique_y, count_y = np.unique(node._y, return_counts=True)
_, count_d = np.unique(y_down, return_counts=True) labels_d, count_d = np.unique(y_down, return_counts=True)
_, count_u = np.unique(y_up, return_counts=True) labels_u, count_u = np.unique(y_up, return_counts=True)
dict_d = {label: count_d[i] for i, label in enumerate(labels_d)}
dict_u = {label: count_u[i] for i, label in enumerate(labels_u)}
# #
for i in unique_y: for i in unique_y:
number_up = count_u[i]
try: try:
number_down = count_d[i] number_up = dict_u[i]
except IndexError: except KeyError:
number_up = 0
try:
number_down = dict_d[i]
except KeyError:
number_down = 0 number_down = 0
self.assertEqual(count_y[i], number_down + number_up) self.assertEqual(count_y[i], number_down + number_up)
# Is the partition made the same as the prediction? # Is the partition made the same as the prediction?
@@ -77,14 +81,22 @@ class Stree_test(unittest.TestCase):
"""Check if the tree is built the same way as predictions of models""" """Check if the tree is built the same way as predictions of models"""
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
for kernel in self._kernels: for kernel in self._kernels:
clf = Stree(kernel=kernel, random_state=self._random_state) clf = Stree(
kernel="sigmoid",
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
random_state=self._random_state,
)
clf.fit(*load_dataset(self._random_state)) clf.fit(*load_dataset(self._random_state))
self._check_tree(clf.tree_) self._check_tree(clf.tree_)
def test_single_prediction(self): def test_single_prediction(self):
X, y = load_dataset(self._random_state) X, y = load_dataset(self._random_state)
for kernel in self._kernels: for kernel in self._kernels:
clf = Stree(kernel=kernel, random_state=self._random_state) clf = Stree(
kernel=kernel,
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
random_state=self._random_state,
)
yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1]))) yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1])))
self.assertEqual(yp[0], y[0]) self.assertEqual(yp[0], y[0])
@@ -92,8 +104,12 @@ class Stree_test(unittest.TestCase):
# First 27 elements the predictions are the same as the truth # First 27 elements the predictions are the same as the truth
num = 27 num = 27
X, y = load_dataset(self._random_state) X, y = load_dataset(self._random_state)
for kernel in self._kernels: for kernel in ["liblinear", "linear", "rbf", "poly"]:
clf = Stree(kernel=kernel, random_state=self._random_state) clf = Stree(
kernel=kernel,
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
random_state=self._random_state,
)
yp = clf.fit(X, y).predict(X[:num, :]) yp = clf.fit(X, y).predict(X[:num, :])
self.assertListEqual(y[:num].tolist(), yp.tolist()) self.assertListEqual(y[:num].tolist(), yp.tolist())
@@ -103,7 +119,11 @@ class Stree_test(unittest.TestCase):
""" """
X, y = load_dataset(self._random_state) X, y = load_dataset(self._random_state)
for kernel in self._kernels: for kernel in self._kernels:
clf = Stree(kernel=kernel, random_state=self._random_state) clf = Stree(
kernel=kernel,
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
random_state=self._random_state,
)
clf.fit(X, y) clf.fit(X, y)
# Compute prediction line by line # Compute prediction line by line
yp_line = np.array([], dtype=int) yp_line = np.array([], dtype=int)
@@ -135,9 +155,13 @@ class Stree_test(unittest.TestCase):
] ]
computed = [] computed = []
expected_string = "" expected_string = ""
clf = Stree(kernel="linear", random_state=self._random_state) clf = Stree(
kernel="liblinear",
multiclass_strategy="ovr",
random_state=self._random_state,
)
clf.fit(*load_dataset(self._random_state)) clf.fit(*load_dataset(self._random_state))
for node in clf: for node in iter(clf):
computed.append(str(node)) computed.append(str(node))
expected_string += str(node) + "\n" expected_string += str(node) + "\n"
self.assertListEqual(expected, computed) self.assertListEqual(expected, computed)
@@ -173,7 +197,12 @@ class Stree_test(unittest.TestCase):
def test_check_max_depth(self): def test_check_max_depth(self):
depths = (3, 4) depths = (3, 4)
for depth in depths: for depth in depths:
tcl = Stree(random_state=self._random_state, max_depth=depth) tcl = Stree(
kernel="liblinear",
multiclass_strategy="ovr",
random_state=self._random_state,
max_depth=depth,
)
tcl.fit(*load_dataset(self._random_state)) tcl.fit(*load_dataset(self._random_state))
self.assertEqual(depth, tcl.depth_) self.assertEqual(depth, tcl.depth_)
@@ -194,7 +223,7 @@ class Stree_test(unittest.TestCase):
for kernel in self._kernels: for kernel in self._kernels:
clf = Stree( clf = Stree(
kernel=kernel, kernel=kernel,
split_criteria="max_samples", multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
random_state=self._random_state, random_state=self._random_state,
) )
px = [[1, 2], [5, 6], [9, 10]] px = [[1, 2], [5, 6], [9, 10]]
@@ -205,26 +234,36 @@ class Stree_test(unittest.TestCase):
self.assertListEqual(py, clf.classes_.tolist()) self.assertListEqual(py, clf.classes_.tolist())
def test_muticlass_dataset(self): def test_muticlass_dataset(self):
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
datasets = { datasets = {
"Synt": load_dataset(random_state=self._random_state, n_classes=3), "Synt": load_dataset(random_state=self._random_state, n_classes=3),
"Iris": load_wine(return_X_y=True), "Iris": load_wine(return_X_y=True),
} }
outcomes = { outcomes = {
"Synt": { "Synt": {
"max_samples linear": 0.9606666666666667, "max_samples liblinear": 0.9493333333333334,
"max_samples rbf": 0.7133333333333334, "max_samples linear": 0.9426666666666667,
"max_samples poly": 0.618, "max_samples rbf": 0.9606666666666667,
"impurity linear": 0.9606666666666667, "max_samples poly": 0.9373333333333334,
"impurity rbf": 0.7133333333333334, "max_samples sigmoid": 0.824,
"impurity poly": 0.618, "impurity liblinear": 0.9493333333333334,
"impurity linear": 0.9426666666666667,
"impurity rbf": 0.9606666666666667,
"impurity poly": 0.9373333333333334,
"impurity sigmoid": 0.824,
}, },
"Iris": { "Iris": {
"max_samples liblinear": 0.9550561797752809,
"max_samples linear": 1.0, "max_samples linear": 1.0,
"max_samples rbf": 0.6910112359550562, "max_samples rbf": 0.6685393258426966,
"max_samples poly": 0.6966292134831461, "max_samples poly": 0.6853932584269663,
"impurity linear": 1, "max_samples sigmoid": 0.6404494382022472,
"impurity rbf": 0.6910112359550562, "impurity liblinear": 0.9550561797752809,
"impurity poly": 0.6966292134831461, "impurity linear": 1.0,
"impurity rbf": 0.6685393258426966,
"impurity poly": 0.6853932584269663,
"impurity sigmoid": 0.6404494382022472,
}, },
} }
@@ -233,18 +272,22 @@ class Stree_test(unittest.TestCase):
for criteria in ["max_samples", "impurity"]: for criteria in ["max_samples", "impurity"]:
for kernel in self._kernels: for kernel in self._kernels:
clf = Stree( clf = Stree(
C=55, max_iter=1e4,
max_iter=1e5, multiclass_strategy="ovr"
if kernel == "liblinear"
else "ovo",
kernel=kernel, kernel=kernel,
random_state=self._random_state, random_state=self._random_state,
) )
clf.fit(px, py) clf.fit(px, py)
outcome = outcomes[name][f"{criteria} {kernel}"] outcome = outcomes[name][f"{criteria} {kernel}"]
# print( # print(f'"{criteria} {kernel}": {clf.score(px, py)},')
# f"{name} {criteria} {kernel} {outcome} {clf.score(px" self.assertAlmostEqual(
# ", py)}" outcome,
# ) clf.score(px, py),
self.assertAlmostEqual(outcome, clf.score(px, py)) 5,
f"{name} - {criteria} - {kernel}",
)
def test_max_features(self): def test_max_features(self):
n_features = 16 n_features = 16
@@ -312,17 +355,19 @@ class Stree_test(unittest.TestCase):
clf.predict(X[:, :3]) clf.predict(X[:, :3])
# Tests of score # Tests of score
def test_score_binary(self): def test_score_binary(self):
X, y = load_dataset(self._random_state) X, y = load_dataset(self._random_state)
accuracies = [ accuracies = [
0.9506666666666667, 0.9506666666666667,
0.9493333333333334,
0.9606666666666667, 0.9606666666666667,
0.9433333333333334, 0.9433333333333334,
0.9153333333333333,
] ]
for kernel, accuracy_expected in zip(self._kernels, accuracies): for kernel, accuracy_expected in zip(self._kernels, accuracies):
clf = Stree( clf = Stree(
random_state=self._random_state, random_state=self._random_state,
multiclass_strategy="ovr" if kernel == "liblinear" else "ovo",
kernel=kernel, kernel=kernel,
) )
clf.fit(X, y) clf.fit(X, y)
@@ -334,7 +379,12 @@ class Stree_test(unittest.TestCase):
def test_score_max_features(self): def test_score_max_features(self):
X, y = load_dataset(self._random_state) X, y = load_dataset(self._random_state)
clf = Stree(random_state=self._random_state, max_features=2) clf = Stree(
kernel="liblinear",
multiclass_strategy="ovr",
random_state=self._random_state,
max_features=2,
)
clf.fit(X, y) clf.fit(X, y)
self.assertAlmostEqual(0.9453333333333334, clf.score(X, y)) self.assertAlmostEqual(0.9453333333333334, clf.score(X, y))
@@ -346,7 +396,9 @@ class Stree_test(unittest.TestCase):
def test_multiclass_classifier_integrity(self): def test_multiclass_classifier_integrity(self):
"""Checks if the multiclass operation is done right""" """Checks if the multiclass operation is done right"""
X, y = load_iris(return_X_y=True) X, y = load_iris(return_X_y=True)
clf = Stree(random_state=0) clf = Stree(
kernel="liblinear", multiclass_strategy="ovr", random_state=0
)
clf.fit(X, y) clf.fit(X, y)
score = clf.score(X, y) score = clf.score(X, y)
# Check accuracy of the whole model # Check accuracy of the whole model
@@ -402,10 +454,10 @@ class Stree_test(unittest.TestCase):
clf2 = Stree( clf2 = Stree(
kernel="rbf", random_state=self._random_state, normalize=True kernel="rbf", random_state=self._random_state, normalize=True
) )
self.assertEqual(0.768, clf.fit(X, y).score(X, y)) self.assertEqual(0.966, clf.fit(X, y).score(X, y))
self.assertEqual(0.814, clf2.fit(X, y).score(X, y)) self.assertEqual(0.964, clf2.fit(X, y).score(X, y))
X, y = load_wine(return_X_y=True) X, y = load_wine(return_X_y=True)
self.assertEqual(0.6741573033707865, clf.fit(X, y).score(X, y)) self.assertEqual(0.6685393258426966, clf.fit(X, y).score(X, y))
self.assertEqual(1.0, clf2.fit(X, y).score(X, y)) self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
def test_score_multiclass_poly(self): def test_score_multiclass_poly(self):
@@ -423,24 +475,78 @@ class Stree_test(unittest.TestCase):
random_state=self._random_state, random_state=self._random_state,
normalize=True, normalize=True,
) )
self.assertEqual(0.786, clf.fit(X, y).score(X, y)) self.assertEqual(0.946, clf.fit(X, y).score(X, y))
self.assertEqual(0.818, clf2.fit(X, y).score(X, y)) self.assertEqual(0.972, clf2.fit(X, y).score(X, y))
X, y = load_wine(return_X_y=True) X, y = load_wine(return_X_y=True)
self.assertEqual(0.702247191011236, clf.fit(X, y).score(X, y)) self.assertEqual(0.7808988764044944, clf.fit(X, y).score(X, y))
self.assertEqual(0.6067415730337079, clf2.fit(X, y).score(X, y)) self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
def test_score_multiclass_liblinear(self):
X, y = load_dataset(
random_state=self._random_state,
n_classes=3,
n_features=5,
n_samples=500,
)
clf = Stree(
kernel="liblinear",
multiclass_strategy="ovr",
random_state=self._random_state,
C=10,
)
clf2 = Stree(
kernel="liblinear",
multiclass_strategy="ovr",
random_state=self._random_state,
normalize=True,
)
self.assertEqual(0.968, clf.fit(X, y).score(X, y))
self.assertEqual(0.97, clf2.fit(X, y).score(X, y))
X, y = load_wine(return_X_y=True)
self.assertEqual(1.0, clf.fit(X, y).score(X, y))
self.assertEqual(1.0, clf2.fit(X, y).score(X, y))
def test_score_multiclass_sigmoid(self):
X, y = load_dataset(
random_state=self._random_state,
n_classes=3,
n_features=5,
n_samples=500,
)
clf = Stree(kernel="sigmoid", random_state=self._random_state, C=10)
clf2 = Stree(
kernel="sigmoid",
random_state=self._random_state,
normalize=True,
C=10,
)
self.assertEqual(0.796, clf.fit(X, y).score(X, y))
self.assertEqual(0.952, clf2.fit(X, y).score(X, y))
X, y = load_wine(return_X_y=True)
self.assertEqual(0.6910112359550562, clf.fit(X, y).score(X, y))
self.assertEqual(0.9662921348314607, clf2.fit(X, y).score(X, y))
def test_score_multiclass_linear(self): def test_score_multiclass_linear(self):
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
X, y = load_dataset( X, y = load_dataset(
random_state=self._random_state, random_state=self._random_state,
n_classes=3, n_classes=3,
n_features=5, n_features=5,
n_samples=1500, n_samples=1500,
) )
clf = Stree(kernel="linear", random_state=self._random_state) clf = Stree(
kernel="liblinear",
multiclass_strategy="ovr",
random_state=self._random_state,
)
self.assertEqual(0.9533333333333334, clf.fit(X, y).score(X, y)) self.assertEqual(0.9533333333333334, clf.fit(X, y).score(X, y))
# Check with context based standardization # Check with context based standardization
clf2 = Stree( clf2 = Stree(
kernel="linear", random_state=self._random_state, normalize=True kernel="liblinear",
multiclass_strategy="ovr",
random_state=self._random_state,
normalize=True,
) )
self.assertEqual(0.9526666666666667, clf2.fit(X, y).score(X, y)) self.assertEqual(0.9526666666666667, clf2.fit(X, y).score(X, y))
X, y = load_wine(return_X_y=True) X, y = load_wine(return_X_y=True)
@@ -467,7 +573,7 @@ class Stree_test(unittest.TestCase):
] ]
) )
y = np.array([1, 1, 1, 2, 2, 2, 5, 5, 5]) y = np.array([1, 1, 1, 2, 2, 2, 5, 5, 5])
yw = np.array([1, 1, 1, 5, 5, 5, 5, 5, 5]) yw = np.array([1, 1, 1, 1, 1, 1, 5, 5, 5])
w = [1, 1, 1, 0, 0, 0, 1, 1, 1] w = [1, 1, 1, 0, 0, 0, 1, 1, 1]
model1 = Stree().fit(X, y) model1 = Stree().fit(X, y)
model2 = Stree().fit(X, y, w) model2 = Stree().fit(X, y, w)
@@ -504,14 +610,14 @@ class Stree_test(unittest.TestCase):
clf = Stree(random_state=self._random_state) clf = Stree(random_state=self._random_state)
clf.fit(X, y) clf.fit(X, y)
nodes, leaves = clf.nodes_leaves() nodes, leaves = clf.nodes_leaves()
self.assertEqual(25, nodes) self.assertEqual(31, nodes)
self.assertEqual(13, leaves) self.assertEqual(16, leaves)
X, y = load_wine(return_X_y=True) X, y = load_wine(return_X_y=True)
clf = Stree(random_state=self._random_state) clf = Stree(random_state=self._random_state)
clf.fit(X, y) clf.fit(X, y)
nodes, leaves = clf.nodes_leaves() nodes, leaves = clf.nodes_leaves()
self.assertEqual(9, nodes) self.assertEqual(11, nodes)
self.assertEqual(5, leaves) self.assertEqual(6, leaves)
def test_nodes_leaves_artificial(self): def test_nodes_leaves_artificial(self):
n1 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test1") n1 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test1")
@@ -530,3 +636,27 @@ class Stree_test(unittest.TestCase):
nodes, leaves = clf.nodes_leaves() nodes, leaves = clf.nodes_leaves()
self.assertEqual(6, nodes) self.assertEqual(6, nodes)
self.assertEqual(2, leaves) self.assertEqual(2, leaves)
def test_bogus_multiclass_strategy(self):
clf = Stree(multiclass_strategy="other")
X, y = load_wine(return_X_y=True)
with self.assertRaises(ValueError):
clf.fit(X, y)
def test_multiclass_strategy(self):
X, y = load_wine(return_X_y=True)
clf_o = Stree(multiclass_strategy="ovo")
clf_r = Stree(multiclass_strategy="ovr")
score_o = clf_o.fit(X, y).score(X, y)
score_r = clf_r.fit(X, y).score(X, y)
self.assertEqual(1.0, score_o)
self.assertEqual(0.9269662921348315, score_r)
def test_incompatible_hyperparameters(self):
X, y = load_wine(return_X_y=True)
clf = Stree(kernel="liblinear", multiclass_strategy="ovo")
with self.assertRaises(ValueError):
clf.fit(X, y)
clf = Stree(multiclass_strategy="ovo", split_criteria="max_samples")
with self.assertRaises(ValueError):
clf.fit(X, y)