Show sample_weight use in test2 notebook

Update revision to RC4
Lint Stree grapher
This commit is contained in:
2020-05-30 23:59:40 +02:00
parent 5e5fea9c6a
commit b4816b2995
3 changed files with 55 additions and 58 deletions

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -42,7 +42,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -88,24 +88,40 @@
"Xtrain = data[0]\n",
"Xtest = data[1]\n",
"ytrain = data[2]\n",
"ytest = data[3]"
"ytest = data[3]\n",
"# Set weights inverse to its count class in dataset\n",
"weights = np.ones(Xtrain.shape[0],) * 1.00244\n",
"weights[ytrain==1] = 1.99755 "
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 21,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Accuracy of Train without weights 0.996415770609319\nAccuracy of Train with weights 0.994026284348865\nAccuracy of Tests without weights 0.9665738161559888\nAccuracy of Tests with weights 0.9721448467966574\n"
}
],
"source": [
"C = 23\n",
"print(\"Accuracy of Train without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtrain, ytrain))\n",
"print(\"Accuracy of Train with weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtrain, ytrain))\n",
"print(\"Accuracy of Tests without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtest, ytest))\n",
"print(\"Accuracy of Tests with weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtest, ytest))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"outputPrepend"
]
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9737\nClassifier's accuracy (test) : 0.9805\nroot\nroot - Down, <cgaf> - Leaf class=1 belief= 0.945736 counts=(array([0, 1]), array([ 7, 122]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.978784 counts=(array([0, 1]), array([692, 15]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9809\nClassifier's accuracy (test) : 0.9805\nroot\nroot - Down, <cgaf> - Leaf class=1 belief= 0.983871 counts=(array([0, 1]), array([ 2, 122]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.980170 counts=(array([0, 1]), array([692, 14]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9904\nClassifier's accuracy (test) : 0.9777\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([122]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([8]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.988669 counts=(array([0, 1]), array([698, 8]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9916\nClassifier's accuracy (test) : 0.9721\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([125]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([5]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.990071 counts=(array([0, 1]), array([698, 7]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9940\nClassifier's accuracy (test) : 0.9749\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([4]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.992867 counts=(array([0, 1]), array([696, 5]))\n\n**************************************************\n0.2412 secs\n"
}
],
"outputs": [],
"source": [
"t = time.time()\n",
"for C in (.001, .01, 1, 5, 17):\n",
@@ -121,15 +137,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "[[0.97223657 0.02776343]\n [0.96965421 0.03034579]\n [0.96918057 0.03081943]\n [0.94009975 0.05990025]]\n"
}
],
"outputs": [],
"source": [
"import numpy as np\n",
"from sklearn.preprocessing import StandardScaler\n",
@@ -144,15 +154,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([4]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.992867 counts=(array([0, 1]), array([696, 5]))\n"
}
],
"outputs": [],
"source": [
"#check iterator\n",
"for i in list(clf):\n",
@@ -161,15 +165,9 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([4]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.992867 counts=(array([0, 1]), array([696, 5]))\n"
}
],
"outputs": [],
"source": [
"#check iterator again\n",
"for i in clf:\n",
@@ -178,7 +176,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -189,15 +187,9 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12a2f1200>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x12a2e7320>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x12a2e7200>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12a2d7b00>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12a2d7c20>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x12a2d7d40>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x12a2d7e60>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x12a2d7f80>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12a2ec320>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12a2ec320>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x12a2e2170>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x12a2e20e0>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x12a2e7440>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x12a2e70e0>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x12a2e7560>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x12a2f10e0>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x12a2d79e0>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x12a2e77a0>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x12a2f1440>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x12a2e7e60>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x12a2ec8c0>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12a2e78c0>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12a2e7f80>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x12a2e7f80>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12a2e7f80>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x12a2f1ef0>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12a2d8b00>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x12a2ec560>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x12a2ec440>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12a2f1a70>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x12a2f6050>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x12a2e2680>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x12a2e2830>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x12a2e2950>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x12a2e2a70>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x12a2e2b90>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x12a2f1cb0>, 'Stree')\n38 functools.partial(<function check_set_params at 0x12a2f1dd0>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x12a2e2290>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x12a2e2560>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x12a2f6200>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x12a2f6290>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x12a2f6320>, 'Stree')\n"
}
],
"outputs": [],
"source": [
"# Make checks one by one\n",
"c = 0\n",

View File

@@ -1,6 +1,6 @@
import setuptools
__version__ = "0.9rc3"
__version__ = "0.9rc4"
__author__ = "Ricardo Montañana Gómez"

View File

@@ -15,6 +15,7 @@ from mpl_toolkits.mplot3d import Axes3D
from .Strees import Stree, Snode, Siterator
class Snode_graph(Snode):
def __init__(self, node: Stree):
@@ -45,7 +46,8 @@ class Snode_graph(Snode):
ax.set_ylim(self._ylimits)
ax.set_zlim(self._zlimits)
def save_hyperplane(self, save_folder: str = './', save_prefix: str = '', save_seq: int = 1):
def save_hyperplane(self, save_folder: str = './', save_prefix: str = '',
save_seq: int = 1):
_, fig = self.plot_hyperplane()
name = f"{save_folder}{save_prefix}STnode{save_seq}.png"
fig.savefig(name, bbox_inches='tight')
@@ -53,9 +55,8 @@ class Snode_graph(Snode):
def _get_cmap(self):
cmap = 'jet'
if self._is_pure():
if self._class == 1:
cmap = 'jet_r'
if self._is_pure() and self._class == 1:
cmap = 'jet_r'
return cmap
def _graph_title(self):
@@ -66,16 +67,20 @@ class Snode_graph(Snode):
fig = plt.figure(figsize=self._plot_size)
ax = fig.add_subplot(1, 1, 1, projection='3d')
if not self._is_pure():
# Can't plot hyperplane of leaves with one label because it hasn't classiffier
# Can't plot hyperplane of leaves with one label because it hasn't
# classiffier
# get the splitting hyperplane
def hyperplane(x, y): return (-self._interceptor - self._vector[0][0] * x
- self._vector[0][1] * y) / self._vector[0][2]
def hyperplane(x, y): return (-self._interceptor
- self._vector[0][0] * x
- self._vector[0][1] * y) \
/ self._vector[0][2]
tmpx = np.linspace(self._X[:, 0].min(), self._X[:, 0].max())
tmpy = np.linspace(self._X[:, 1].min(), self._X[:, 1].max())
xx, yy = np.meshgrid(tmpx, tmpy)
ax.plot_surface(xx, yy, hyperplane(xx, yy), alpha=.5, antialiased=True,
rstride=1, cstride=1, cmap='seismic')
ax.plot_surface(xx, yy, hyperplane(xx, yy), alpha=.5,
antialiased=True, rstride=1, cstride=1,
cmap='seismic')
self._set_graphics_axis(ax)
if plot_distribution:
self.plot_distribution(ax)
@@ -97,6 +102,7 @@ class Snode_graph(Snode):
ax.set_zlabel('X2')
plt.show()
class Stree_grapher(Stree):
"""Build 3d graphs of any dataset, if it's more than 3 features PCA shall
make its magic
@@ -114,7 +120,7 @@ class Stree_grapher(Stree):
def __del__(self):
try:
os.environ.pop('TESTING')
except:
except KeyError:
pass
plt.close('all')
@@ -181,4 +187,3 @@ class Stree_grapher(Stree):
def __iter__(self):
return Siterator(self._tree_gr)