From 5a36c5d29b76a3e3fe19d4d131b8dd82b46ac7ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Mon, 10 May 2021 09:10:39 +0200 Subject: [PATCH] Implement ovo strategy Add kernel liblinear with LinearSVC classifier Set ovo strategy as default --- README.md | 33 +++--- docs/source/hyperparameters.md | 35 +++--- stree/Strees.py | 28 ++++- stree/__init__.py | 2 +- stree/tests/Snode_test.py | 6 +- stree/tests/Stree_test.py | 203 +++++++++++++++++++++++++++------ 6 files changed, 230 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index f8c4139..b8ac6e9 100644 --- a/README.md +++ b/README.md @@ -34,22 +34,23 @@ Can be found in ## Hyperparameters -| | **Hyperparameter** | **Type/Values** | **Default** | **Meaning** | -| --- | ------------------ | ------------------------------------------------------ | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| \* | C | \ | 1.0 | Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. | -| \* | kernel | {"linear", "poly", "rbf", "sigmoid"} | linear | Specifies the kernel type to be used in the algorithm. It must be one of ‘linear’, ‘poly’ or ‘rbf’. | -| \* | max_iter | \ | 1e5 | Hard limit on iterations within solver, or -1 for no limit. | -| \* | random_state | \ | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.
Pass an int for reproducible output across multiple function calls | -| | max_depth | \ | None | Specifies the maximum depth of the tree | -| \* | tol | \ | 1e-4 | Tolerance for stopping criterion. | -| \* | degree | \ | 3 | Degree of the polynomial kernel function (‘poly’). Ignored by all other kernels. | -| \* | gamma | {"scale", "auto"} or \ | scale | Kernel coefficient for ‘rbf’ and ‘poly’.
if gamma='scale' (default) is passed then it uses 1 / (n_features \* X.var()) as value of gamma,
if ‘auto’, uses 1 / n_features. | -| | split_criteria | {"impurity", "max_samples"} | impurity | Decides (just in case of a multi class classification) which column (class) use to split the dataset in a node\*\* | -| | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features).
Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. | -| | min_samples_split | \ | 0 | The minimum number of samples required to split an internal node. 0 (default) for any | -| | max_features | \, \

or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:
If int, then consider max_features features at each split.
If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.
If “auto”, then max_features=sqrt(n_features).
If “sqrt”, then max_features=sqrt(n_features).
If “log2”, then max_features=log2(n_features).
If None, then max_features=n_features. | -| | splitter | {"best", "random"} | random | The strategy used to choose the feature set at each node (only used if max_features < num_features).
Supported strategies are “best” to choose the best feature set and “random” to choose a random combination.
The algorithm generates 5 candidates at most to choose from if random is selected. If best is selected sklearn SelectKBest algorithm is used in every node to choose the _max_features_ best features | -| | normalize | \ | False | If standardization of features should be applied on each node with the samples that reach it | +| | **Hyperparameter** | **Type/Values** | **Default** | **Meaning** | +| --- | ------------------- | ------------------------------------------------------ | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| \* | C | \ | 1.0 | Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. | +| \* | kernel | {"liblinear", "linear", "poly", "rbf", "sigmoid"} | linear | Specifies the kernel type to be used in the algorithm. It must be one of ‘liblinear’, ‘linear’, ‘poly’ or ‘rbf’. liblinear uses [liblinear](https://www.csie.ntu.edu.tw/~cjlin/liblinear/) library and the rest uses [libsvm](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) library through scikit-learn library | +| \* | max_iter | \ | 1e5 | Hard limit on iterations within solver, or -1 for no limit. | +| \* | random_state | \ | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.
Pass an int for reproducible output across multiple function calls | +| | max_depth | \ | None | Specifies the maximum depth of the tree | +| \* | tol | \ | 1e-4 | Tolerance for stopping criterion. | +| \* | degree | \ | 3 | Degree of the polynomial kernel function (‘poly’). Ignored by all other kernels. | +| \* | gamma | {"scale", "auto"} or \ | scale | Kernel coefficient for ‘rbf’ and ‘poly’.
if gamma='scale' (default) is passed then it uses 1 / (n_features \* X.var()) as value of gamma,
if ‘auto’, uses 1 / n_features. | +| | split_criteria | {"impurity", "max_samples"} | impurity | Decides (just in case of a multi class classification) which column (class) use to split the dataset in a node\*\*. max_samples is incompatible with 'ovo' multiclass_strategy | +| | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features).
Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. | +| | min_samples_split | \ | 0 | The minimum number of samples required to split an internal node. 0 (default) for any | +| | max_features | \, \

or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:
If int, then consider max_features features at each split.
If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.
If “auto”, then max_features=sqrt(n_features).
If “sqrt”, then max_features=sqrt(n_features).
If “log2”, then max_features=log2(n_features).
If None, then max_features=n_features. | +| | splitter | {"best", "random", "mutual"} | "random" | The strategy used to choose the feature set at each node (only used if max_features < num_features). Supported strategies are: **“best”**: sklearn SelectKBest algorithm is used in every node to choose the max_features best features. **“random”**: The algorithm generates 5 candidates and choose one randomly. **"mutual"**: Chooses the best features w.r.t. their mutual info with the label | +| | normalize | \ | False | If standardization of features should be applied on each node with the samples that reach it | +| \* | multiclass_strategy | {"ovo", "ovr"} | "ovo" | Strategy to use with multiclass datasets, **"ovo"**: one versus one. **"ovr"**: one versus rest | \* Hyperparameter used by the support vector classifier of every node diff --git a/docs/source/hyperparameters.md b/docs/source/hyperparameters.md index a4fa6f7..6ffbc3d 100644 --- a/docs/source/hyperparameters.md +++ b/docs/source/hyperparameters.md @@ -1,21 +1,22 @@ -# Hyperparameters +## Hyperparameters -| | **Hyperparameter** | **Type/Values** | **Default** | **Meaning** | -| --- | ------------------ | ------------------------------------------------------ | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| \* | C | \ | 1.0 | Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. | -| \* | kernel | {"linear", "poly", "rbf"} | linear | Specifies the kernel type to be used in the algorithm. It must be one of ‘linear’, ‘poly’ or ‘rbf’. | -| \* | max_iter | \ | 1e5 | Hard limit on iterations within solver, or -1 for no limit. | -| \* | random_state | \ | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.
Pass an int for reproducible output across multiple function calls | -| | max_depth | \ | None | Specifies the maximum depth of the tree | -| \* | tol | \ | 1e-4 | Tolerance for stopping criterion. | -| \* | degree | \ | 3 | Degree of the polynomial kernel function (‘poly’). Ignored by all other kernels. | -| \* | gamma | {"scale", "auto"} or \ | scale | Kernel coefficient for ‘rbf’ and ‘poly’.
if gamma='scale' (default) is passed then it uses 1 / (n_features \* X.var()) as value of gamma,
if ‘auto’, uses 1 / n_features. | -| | split_criteria | {"impurity", "max_samples"} | impurity | Decides (just in case of a multi class classification) which column (class) use to split the dataset in a node\*\* | -| | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features).
Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. | -| | min_samples_split | \ | 0 | The minimum number of samples required to split an internal node. 0 (default) for any | -| | max_features | \, \

or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:
If int, then consider max_features features at each split.
If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.
If “auto”, then max_features=sqrt(n_features).
If “sqrt”, then max_features=sqrt(n_features).
If “log2”, then max_features=log2(n_features).
If None, then max_features=n_features. | -| | splitter | {"best", "random"} | random | The strategy used to choose the feature set at each node (only used if max_features != num_features).
Supported strategies are “best” to choose the best feature set and “random” to choose a random combination.
The algorithm generates 5 candidates at most to choose from in both strategies. | -| | normalize | \ | False | If standardization of features should be applied on each node with the samples that reach it | +| | **Hyperparameter** | **Type/Values** | **Default** | **Meaning** | +| --- | ------------------- | ------------------------------------------------------ | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| \* | C | \ | 1.0 | Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. | +| \* | kernel | {"liblinear", "linear", "poly", "rbf", "sigmoid"} | linear | Specifies the kernel type to be used in the algorithm. It must be one of ‘liblinear’, ‘linear’, ‘poly’ or ‘rbf’. liblinear uses [liblinear](https://www.csie.ntu.edu.tw/~cjlin/liblinear/) library and the rest uses [libsvm](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) library through scikit-learn library | +| \* | max_iter | \ | 1e5 | Hard limit on iterations within solver, or -1 for no limit. | +| \* | random_state | \ | None | Controls the pseudo random number generation for shuffling the data for probability estimates. Ignored when probability is False.
Pass an int for reproducible output across multiple function calls | +| | max_depth | \ | None | Specifies the maximum depth of the tree | +| \* | tol | \ | 1e-4 | Tolerance for stopping criterion. | +| \* | degree | \ | 3 | Degree of the polynomial kernel function (‘poly’). Ignored by all other kernels. | +| \* | gamma | {"scale", "auto"} or \ | scale | Kernel coefficient for ‘rbf’ and ‘poly’.
if gamma='scale' (default) is passed then it uses 1 / (n_features \* X.var()) as value of gamma,
if ‘auto’, uses 1 / n_features. | +| | split_criteria | {"impurity", "max_samples"} | impurity | Decides (just in case of a multi class classification) which column (class) use to split the dataset in a node\*\*. max_samples is incompatible with 'ovo' multiclass_strategy | +| | criterion | {“gini”, “entropy”} | entropy | The function to measure the quality of a split (only used if max_features != num_features).
Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. | +| | min_samples_split | \ | 0 | The minimum number of samples required to split an internal node. 0 (default) for any | +| | max_features | \, \

or {“auto”, “sqrt”, “log2”} | None | The number of features to consider when looking for the split:
If int, then consider max_features features at each split.
If float, then max_features is a fraction and int(max_features \* n_features) features are considered at each split.
If “auto”, then max_features=sqrt(n_features).
If “sqrt”, then max_features=sqrt(n_features).
If “log2”, then max_features=log2(n_features).
If None, then max_features=n_features. | +| | splitter | {"best", "random", "mutual"} | "random" | The strategy used to choose the feature set at each node (only used if max_features < num_features). Supported strategies are: **“best”**: sklearn SelectKBest algorithm is used in every node to choose the max_features best features. **“random”**: The algorithm generates 5 candidates and choose one randomly. **"mutual"**: Chooses the best features w.r.t. their mutual info with the label | +| | normalize | \ | False | If standardization of features should be applied on each node with the samples that reach it | +| \* | multiclass_strategy | {"ovo", "ovr"} | "ovo" | Strategy to use with multiclass datasets, **"ovo"**: one versus one. **"ovr"**: one versus rest | \* Hyperparameter used by the support vector classifier of every node diff --git a/stree/Strees.py b/stree/Strees.py index 15c794e..32c9ff2 100644 --- a/stree/Strees.py +++ b/stree/Strees.py @@ -373,16 +373,17 @@ class Splitter: tuple indices of the features selected """ + # No feature reduction if dataset.shape[1] == max_features: - # No feature reduction applies return tuple(range(dataset.shape[1])) + # Random feature reduction if self._feature_select == "random": features_sets = self._generate_spaces( dataset.shape[1], max_features ) return self._select_best_set(dataset, labels, features_sets) + # return the KBest features if self._feature_select == "best": - # Take KBest features return ( SelectKBest(k=max_features) .fit(dataset, labels) @@ -569,6 +570,7 @@ class Stree(BaseEstimator, ClassifierMixin): min_samples_split: int = 0, max_features=None, splitter: str = "random", + multiclass_strategy: str = "ovo", normalize: bool = False, ): self.max_iter = max_iter @@ -585,6 +587,7 @@ class Stree(BaseEstimator, ClassifierMixin): self.criterion = criterion self.splitter = splitter self.normalize = normalize + self.multiclass_strategy = multiclass_strategy def _more_tags(self) -> dict: """Required by sklearn to supply features of the classifier @@ -629,7 +632,23 @@ class Stree(BaseEstimator, ClassifierMixin): f"Maximum depth has to be greater than 1... got (max_depth=\ {self.max_depth})" ) - kernels = ["linear", "rbf", "poly", "sigmoid"] + if self.multiclass_strategy not in ["ovr", "ovo"]: + raise ValueError( + "mutliclass_strategy has to be either ovr or ovo" + f" but got {self.multiclass_strategy}" + ) + if self.multiclass_strategy == "ovo": + if self.kernel == "liblinear": + raise ValueError( + "The kernel liblinear is incompatible with ovo " + "multiclass_strategy" + ) + if self.split_criteria == "max_samples": + raise ValueError( + "The multiclass_strategy 'ovo' is incompatible with " + "split_criteria 'max_samples'" + ) + kernels = ["liblinear", "linear", "rbf", "poly", "sigmoid"] if self.kernel not in kernels: raise ValueError(f"Kernel {self.kernel} not in {kernels}") check_classification_targets(y) @@ -749,7 +768,7 @@ class Stree(BaseEstimator, ClassifierMixin): C=self.C, tol=self.tol, ) - if self.kernel == "linear" + if self.kernel == "liblinear" else SVC( kernel=self.kernel, max_iter=self.max_iter, @@ -758,6 +777,7 @@ class Stree(BaseEstimator, ClassifierMixin): gamma=self.gamma, degree=self.degree, random_state=self.random_state, + decision_function_shape=self.multiclass_strategy, ) ) diff --git a/stree/__init__.py b/stree/__init__.py index eddafae..3450f0b 100644 --- a/stree/__init__.py +++ b/stree/__init__.py @@ -1,6 +1,6 @@ from .Strees import Stree, Snode, Siterator, Splitter -__version__ = "1.0" +__version__ = "1.1" __author__ = "Ricardo Montañana Gómez" __copyright__ = "Copyright 2020-2021, Ricardo Montañana Gómez" diff --git a/stree/tests/Snode_test.py b/stree/tests/Snode_test.py index d60cbfc..51bcca5 100644 --- a/stree/tests/Snode_test.py +++ b/stree/tests/Snode_test.py @@ -8,7 +8,11 @@ from .utils import load_dataset class Snode_test(unittest.TestCase): def __init__(self, *args, **kwargs): self._random_state = 1 - self._clf = Stree(random_state=self._random_state) + self._clf = Stree( + random_state=self._random_state, + kernel="liblinear", + multiclass_strategy="ovr", + ) self._clf.fit(*load_dataset(self._random_state)) super().__init__(*args, **kwargs) diff --git a/stree/tests/Stree_test.py b/stree/tests/Stree_test.py index d05b322..5581b66 100644 --- a/stree/tests/Stree_test.py +++ b/stree/tests/Stree_test.py @@ -14,7 +14,7 @@ from .utils import load_dataset class Stree_test(unittest.TestCase): def __init__(self, *args, **kwargs): self._random_state = 1 - self._kernels = ["linear", "rbf", "poly"] + self._kernels = ["liblinear", "linear", "rbf", "poly", "sigmoid"] super().__init__(*args, **kwargs) @classmethod @@ -22,10 +22,9 @@ class Stree_test(unittest.TestCase): os.environ["TESTING"] = "1" def test_valid_kernels(self): - valid_kernels = ["linear", "rbf", "poly", "sigmoid"] X, y = load_dataset() - for kernel in valid_kernels: - clf = Stree(kernel=kernel) + for kernel in self._kernels: + clf = Stree(kernel=kernel, multiclass_strategy="ovr") clf.fit(X, y) self.assertIsNotNone(clf.tree_) @@ -55,14 +54,19 @@ class Stree_test(unittest.TestCase): # i.e. The partition algorithm didn't forget any sample self.assertEqual(node._y.shape[0], y_down.shape[0] + y_up.shape[0]) unique_y, count_y = np.unique(node._y, return_counts=True) - _, count_d = np.unique(y_down, return_counts=True) - _, count_u = np.unique(y_up, return_counts=True) + labels_d, count_d = np.unique(y_down, return_counts=True) + labels_u, count_u = np.unique(y_up, return_counts=True) + dict_d = {label: count_d[i] for i, label in enumerate(labels_d)} + dict_u = {label: count_u[i] for i, label in enumerate(labels_u)} # for i in unique_y: - number_up = count_u[i] try: - number_down = count_d[i] - except IndexError: + number_up = dict_u[i] + except KeyError: + number_up = 0 + try: + number_down = dict_d[i] + except KeyError: number_down = 0 self.assertEqual(count_y[i], number_down + number_up) # Is the partition made the same as the prediction? @@ -77,14 +81,22 @@ class Stree_test(unittest.TestCase): """Check if the tree is built the same way as predictions of models""" warnings.filterwarnings("ignore") for kernel in self._kernels: - clf = Stree(kernel=kernel, random_state=self._random_state) + clf = Stree( + kernel="sigmoid", + multiclass_strategy="ovr" if kernel == "liblinear" else "ovo", + random_state=self._random_state, + ) clf.fit(*load_dataset(self._random_state)) self._check_tree(clf.tree_) def test_single_prediction(self): X, y = load_dataset(self._random_state) for kernel in self._kernels: - clf = Stree(kernel=kernel, random_state=self._random_state) + clf = Stree( + kernel=kernel, + multiclass_strategy="ovr" if kernel == "liblinear" else "ovo", + random_state=self._random_state, + ) yp = clf.fit(X, y).predict((X[0, :].reshape(-1, X.shape[1]))) self.assertEqual(yp[0], y[0]) @@ -92,8 +104,12 @@ class Stree_test(unittest.TestCase): # First 27 elements the predictions are the same as the truth num = 27 X, y = load_dataset(self._random_state) - for kernel in self._kernels: - clf = Stree(kernel=kernel, random_state=self._random_state) + for kernel in ["liblinear", "linear", "rbf", "poly"]: + clf = Stree( + kernel=kernel, + multiclass_strategy="ovr" if kernel == "liblinear" else "ovo", + random_state=self._random_state, + ) yp = clf.fit(X, y).predict(X[:num, :]) self.assertListEqual(y[:num].tolist(), yp.tolist()) @@ -103,7 +119,11 @@ class Stree_test(unittest.TestCase): """ X, y = load_dataset(self._random_state) for kernel in self._kernels: - clf = Stree(kernel=kernel, random_state=self._random_state) + clf = Stree( + kernel=kernel, + multiclass_strategy="ovr" if kernel == "liblinear" else "ovo", + random_state=self._random_state, + ) clf.fit(X, y) # Compute prediction line by line yp_line = np.array([], dtype=int) @@ -135,7 +155,11 @@ class Stree_test(unittest.TestCase): ] computed = [] expected_string = "" - clf = Stree(kernel="linear", random_state=self._random_state) + clf = Stree( + kernel="liblinear", + multiclass_strategy="ovr", + random_state=self._random_state, + ) clf.fit(*load_dataset(self._random_state)) for node in clf: computed.append(str(node)) @@ -173,7 +197,12 @@ class Stree_test(unittest.TestCase): def test_check_max_depth(self): depths = (3, 4) for depth in depths: - tcl = Stree(random_state=self._random_state, max_depth=depth) + tcl = Stree( + kernel="liblinear", + multiclass_strategy="ovr", + random_state=self._random_state, + max_depth=depth, + ) tcl.fit(*load_dataset(self._random_state)) self.assertEqual(depth, tcl.depth_) @@ -194,7 +223,7 @@ class Stree_test(unittest.TestCase): for kernel in self._kernels: clf = Stree( kernel=kernel, - split_criteria="max_samples", + multiclass_strategy="ovr" if kernel == "liblinear" else "ovo", random_state=self._random_state, ) px = [[1, 2], [5, 6], [9, 10]] @@ -205,26 +234,36 @@ class Stree_test(unittest.TestCase): self.assertListEqual(py, clf.classes_.tolist()) def test_muticlass_dataset(self): + warnings.filterwarnings("ignore", category=ConvergenceWarning) + warnings.filterwarnings("ignore", category=RuntimeWarning) datasets = { "Synt": load_dataset(random_state=self._random_state, n_classes=3), "Iris": load_wine(return_X_y=True), } outcomes = { "Synt": { - "max_samples linear": 0.9606666666666667, + "max_samples liblinear": 0.9606666666666667, + "max_samples linear": 0.786, "max_samples rbf": 0.7133333333333334, "max_samples poly": 0.618, - "impurity linear": 0.9606666666666667, + "max_samples sigmoid": 0.8826666666666667, + "impurity liblinear": 0.9606666666666667, + "impurity linear": 0.786, "impurity rbf": 0.7133333333333334, "impurity poly": 0.618, + "impurity sigmoid": 0.8826666666666667, }, "Iris": { + "max_samples liblinear": 1.0, "max_samples linear": 1.0, "max_samples rbf": 0.6910112359550562, "max_samples poly": 0.6966292134831461, + "max_samples sigmoid": 0.6573033707865169, + "impurity liblinear": 1, "impurity linear": 1, "impurity rbf": 0.6910112359550562, "impurity poly": 0.6966292134831461, + "impurity sigmoid": 0.6573033707865169, }, } @@ -235,14 +274,15 @@ class Stree_test(unittest.TestCase): clf = Stree( C=55, max_iter=1e5, + multiclass_strategy="ovr", kernel=kernel, random_state=self._random_state, ) clf.fit(px, py) outcome = outcomes[name][f"{criteria} {kernel}"] # print( - # f"{name} {criteria} {kernel} {outcome} {clf.score(px" - # ", py)}" + # f"{name} {criteria} {kernel} {outcome} " + # f"{clf.score(px, py)}" # ) self.assertAlmostEqual(outcome, clf.score(px, py)) @@ -312,17 +352,19 @@ class Stree_test(unittest.TestCase): clf.predict(X[:, :3]) # Tests of score - def test_score_binary(self): X, y = load_dataset(self._random_state) accuracies = [ 0.9506666666666667, + 0.9493333333333334, 0.9606666666666667, 0.9433333333333334, + 0.9153333333333333, ] for kernel, accuracy_expected in zip(self._kernels, accuracies): clf = Stree( random_state=self._random_state, + multiclass_strategy="ovr" if kernel == "liblinear" else "ovo", kernel=kernel, ) clf.fit(X, y) @@ -334,7 +376,12 @@ class Stree_test(unittest.TestCase): def test_score_max_features(self): X, y = load_dataset(self._random_state) - clf = Stree(random_state=self._random_state, max_features=2) + clf = Stree( + kernel="liblinear", + multiclass_strategy="ovr", + random_state=self._random_state, + max_features=2, + ) clf.fit(X, y) self.assertAlmostEqual(0.9453333333333334, clf.score(X, y)) @@ -346,7 +393,9 @@ class Stree_test(unittest.TestCase): def test_multiclass_classifier_integrity(self): """Checks if the multiclass operation is done right""" X, y = load_iris(return_X_y=True) - clf = Stree(random_state=0) + clf = Stree( + kernel="liblinear", multiclass_strategy="ovr", random_state=0 + ) clf.fit(X, y) score = clf.score(X, y) # Check accuracy of the whole model @@ -402,10 +451,10 @@ class Stree_test(unittest.TestCase): clf2 = Stree( kernel="rbf", random_state=self._random_state, normalize=True ) - self.assertEqual(0.768, clf.fit(X, y).score(X, y)) - self.assertEqual(0.814, clf2.fit(X, y).score(X, y)) + self.assertEqual(0.966, clf.fit(X, y).score(X, y)) + self.assertEqual(0.964, clf2.fit(X, y).score(X, y)) X, y = load_wine(return_X_y=True) - self.assertEqual(0.6741573033707865, clf.fit(X, y).score(X, y)) + self.assertEqual(0.6685393258426966, clf.fit(X, y).score(X, y)) self.assertEqual(1.0, clf2.fit(X, y).score(X, y)) def test_score_multiclass_poly(self): @@ -423,24 +472,78 @@ class Stree_test(unittest.TestCase): random_state=self._random_state, normalize=True, ) - self.assertEqual(0.786, clf.fit(X, y).score(X, y)) - self.assertEqual(0.818, clf2.fit(X, y).score(X, y)) + self.assertEqual(0.946, clf.fit(X, y).score(X, y)) + self.assertEqual(0.972, clf2.fit(X, y).score(X, y)) X, y = load_wine(return_X_y=True) - self.assertEqual(0.702247191011236, clf.fit(X, y).score(X, y)) - self.assertEqual(0.6067415730337079, clf2.fit(X, y).score(X, y)) + self.assertEqual(0.7808988764044944, clf.fit(X, y).score(X, y)) + self.assertEqual(1.0, clf2.fit(X, y).score(X, y)) + + def test_score_multiclass_liblinear(self): + X, y = load_dataset( + random_state=self._random_state, + n_classes=3, + n_features=5, + n_samples=500, + ) + clf = Stree( + kernel="liblinear", + multiclass_strategy="ovr", + random_state=self._random_state, + C=10, + ) + clf2 = Stree( + kernel="liblinear", + multiclass_strategy="ovr", + random_state=self._random_state, + normalize=True, + ) + self.assertEqual(0.968, clf.fit(X, y).score(X, y)) + self.assertEqual(0.97, clf2.fit(X, y).score(X, y)) + X, y = load_wine(return_X_y=True) + self.assertEqual(1.0, clf.fit(X, y).score(X, y)) + self.assertEqual(1.0, clf2.fit(X, y).score(X, y)) + + def test_score_multiclass_sigmoid(self): + X, y = load_dataset( + random_state=self._random_state, + n_classes=3, + n_features=5, + n_samples=500, + ) + clf = Stree(kernel="sigmoid", random_state=self._random_state, C=10) + clf2 = Stree( + kernel="sigmoid", + random_state=self._random_state, + normalize=True, + C=10, + ) + self.assertEqual(0.796, clf.fit(X, y).score(X, y)) + self.assertEqual(0.952, clf2.fit(X, y).score(X, y)) + X, y = load_wine(return_X_y=True) + self.assertEqual(0.6910112359550562, clf.fit(X, y).score(X, y)) + self.assertEqual(0.9662921348314607, clf2.fit(X, y).score(X, y)) def test_score_multiclass_linear(self): + warnings.filterwarnings("ignore", category=ConvergenceWarning) + warnings.filterwarnings("ignore", category=RuntimeWarning) X, y = load_dataset( random_state=self._random_state, n_classes=3, n_features=5, n_samples=1500, ) - clf = Stree(kernel="linear", random_state=self._random_state) + clf = Stree( + kernel="liblinear", + multiclass_strategy="ovr", + random_state=self._random_state, + ) self.assertEqual(0.9533333333333334, clf.fit(X, y).score(X, y)) # Check with context based standardization clf2 = Stree( - kernel="linear", random_state=self._random_state, normalize=True + kernel="liblinear", + multiclass_strategy="ovr", + random_state=self._random_state, + normalize=True, ) self.assertEqual(0.9526666666666667, clf2.fit(X, y).score(X, y)) X, y = load_wine(return_X_y=True) @@ -467,7 +570,7 @@ class Stree_test(unittest.TestCase): ] ) y = np.array([1, 1, 1, 2, 2, 2, 5, 5, 5]) - yw = np.array([1, 1, 1, 5, 5, 5, 5, 5, 5]) + yw = np.array([1, 1, 1, 1, 1, 1, 5, 5, 5]) w = [1, 1, 1, 0, 0, 0, 1, 1, 1] model1 = Stree().fit(X, y) model2 = Stree().fit(X, y, w) @@ -504,14 +607,14 @@ class Stree_test(unittest.TestCase): clf = Stree(random_state=self._random_state) clf.fit(X, y) nodes, leaves = clf.nodes_leaves() - self.assertEqual(25, nodes) - self.assertEqual(13, leaves) + self.assertEqual(31, nodes) + self.assertEqual(16, leaves) X, y = load_wine(return_X_y=True) clf = Stree(random_state=self._random_state) clf.fit(X, y) nodes, leaves = clf.nodes_leaves() - self.assertEqual(9, nodes) - self.assertEqual(5, leaves) + self.assertEqual(11, nodes) + self.assertEqual(6, leaves) def test_nodes_leaves_artificial(self): n1 = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test1") @@ -530,3 +633,27 @@ class Stree_test(unittest.TestCase): nodes, leaves = clf.nodes_leaves() self.assertEqual(6, nodes) self.assertEqual(2, leaves) + + def test_bogus_multiclass_strategy(self): + clf = Stree(multiclass_strategy="other") + X, y = load_wine(return_X_y=True) + with self.assertRaises(ValueError): + clf.fit(X, y) + + def test_multiclass_strategy(self): + X, y = load_wine(return_X_y=True) + clf_o = Stree(multiclass_strategy="ovo") + clf_r = Stree(multiclass_strategy="ovr") + score_o = clf_o.fit(X, y).score(X, y) + score_r = clf_r.fit(X, y).score(X, y) + self.assertEqual(1.0, score_o) + self.assertEqual(0.9269662921348315, score_r) + + def test_incompatible_hyperparameters(self): + X, y = load_wine(return_X_y=True) + clf = Stree(kernel="liblinear", multiclass_strategy="ovo") + with self.assertRaises(ValueError): + clf.fit(X, y) + clf = Stree(multiclass_strategy="ovo", split_criteria="max_samples") + with self.assertRaises(ValueError): + clf.fit(X, y)