diff --git a/notebooks/test2.ipynb b/notebooks/test2.ipynb index 7c94d2a..0c2c71b 100644 --- a/notebooks/test2.ipynb +++ b/notebooks/test2.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -88,24 +88,40 @@ "Xtrain = data[0]\n", "Xtest = data[1]\n", "ytrain = data[2]\n", - "ytest = data[3]" + "ytest = data[3]\n", + "# Set weights inverse to its count class in dataset\n", + "weights = np.ones(Xtrain.shape[0],) * 1.00244\n", + "weights[ytrain==1] = 1.99755 " ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": "Accuracy of Train without weights 0.996415770609319\nAccuracy of Train with weights 0.994026284348865\nAccuracy of Tests without weights 0.9665738161559888\nAccuracy of Tests with weights 0.9721448467966574\n" + } + ], + "source": [ + "C = 23\n", + "print(\"Accuracy of Train without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtrain, ytrain))\n", + "print(\"Accuracy of Train with weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtrain, ytrain))\n", + "print(\"Accuracy of Tests without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtest, ytest))\n", + "print(\"Accuracy of Tests with weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtest, ytest))" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "tags": [ "outputPrepend" ] }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9737\nClassifier's accuracy (test) : 0.9805\nroot\nroot - Down, - Leaf class=1 belief= 0.945736 counts=(array([0, 1]), array([ 7, 122]))\nroot - Up\nroot - Up - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up, - Leaf class=0 belief= 0.978784 counts=(array([0, 1]), array([692, 15]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9809\nClassifier's accuracy (test) : 0.9805\nroot\nroot - Down, - Leaf class=1 belief= 0.983871 counts=(array([0, 1]), array([ 2, 122]))\nroot - Up\nroot - Up - Down, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Down - Up, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up, - Leaf class=0 belief= 0.980170 counts=(array([0, 1]), array([692, 14]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9904\nClassifier's accuracy (test) : 0.9777\nroot\nroot - Down\nroot - Down - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([122]))\nroot - Down - Up, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([8]))\nroot - Up - Up, - Leaf class=0 belief= 0.988669 counts=(array([0, 1]), array([698, 8]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9916\nClassifier's accuracy (test) : 0.9721\nroot\nroot - Down\nroot - Down - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([125]))\nroot - Down - Up, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([5]))\nroot - Up - Up\nroot - Up - Up - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, - Leaf class=0 belief= 0.990071 counts=(array([0, 1]), array([698, 7]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9940\nClassifier's accuracy (test) : 0.9749\nroot\nroot - Down\nroot - Down - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([4]))\nroot - Up - Down - Up, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, - Leaf class=0 belief= 0.992867 counts=(array([0, 1]), array([696, 5]))\n\n**************************************************\n0.2412 secs\n" - } - ], + "outputs": [], "source": [ "t = time.time()\n", "for C in (.001, .01, 1, 5, 17):\n", @@ -121,15 +137,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "[[0.97223657 0.02776343]\n [0.96965421 0.03034579]\n [0.96918057 0.03081943]\n [0.94009975 0.05990025]]\n" - } - ], + "outputs": [], "source": [ "import numpy as np\n", "from sklearn.preprocessing import StandardScaler\n", @@ -144,15 +154,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "root\nroot - Down\nroot - Down - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([4]))\nroot - Up - Down - Up, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, - Leaf class=0 belief= 0.992867 counts=(array([0, 1]), array([696, 5]))\n" - } - ], + "outputs": [], "source": [ "#check iterator\n", "for i in list(clf):\n", @@ -161,15 +165,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "root\nroot - Down\nroot - Down - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([4]))\nroot - Up - Down - Up, - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, - Leaf class=0 belief= 0.992867 counts=(array([0, 1]), array([696, 5]))\n" - } - ], + "outputs": [], "source": [ "#check iterator again\n", "for i in clf:\n", @@ -178,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -189,15 +187,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": "1 functools.partial(, 'Stree')\n2 functools.partial(, 'Stree')\n3 functools.partial(, 'Stree')\n4 functools.partial(, 'Stree')\n5 functools.partial(, 'Stree')\n6 functools.partial(, 'Stree')\n7 functools.partial(, 'Stree')\n8 functools.partial(, 'Stree')\n9 functools.partial(, 'Stree')\n10 functools.partial(, 'Stree', readonly_memmap=True)\n11 functools.partial(, 'Stree')\n12 functools.partial(, 'Stree')\n13 functools.partial(, 'Stree')\n14 functools.partial(, 'Stree')\n15 functools.partial(, 'Stree')\n16 functools.partial(, 'Stree')\n17 functools.partial(, 'Stree')\n18 functools.partial(, 'Stree')\n19 functools.partial(, 'Stree')\n20 functools.partial(, 'Stree')\n21 functools.partial(, 'Stree')\n22 functools.partial(, 'Stree')\n23 functools.partial(, 'Stree')\n24 functools.partial(, 'Stree', readonly_memmap=True)\n25 functools.partial(, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(, 'Stree')\n27 functools.partial(, 'Stree')\n28 functools.partial(, 'Stree')\n29 functools.partial(, 'Stree')\n30 functools.partial(, 'Stree')\n31 functools.partial(, 'Stree')\n32 functools.partial(, 'Stree')\n33 functools.partial(, 'Stree')\n34 functools.partial(, 'Stree')\n35 functools.partial(, 'Stree')\n36 functools.partial(, 'Stree')\n37 functools.partial(, 'Stree')\n38 functools.partial(, 'Stree')\n39 functools.partial(, 'Stree')\n40 functools.partial(, 'Stree')\n41 functools.partial(, 'Stree')\n42 functools.partial(, 'Stree')\n43 functools.partial(, 'Stree')\n" - } - ], + "outputs": [], "source": [ "# Make checks one by one\n", "c = 0\n", diff --git a/setup.py b/setup.py index 95c0906..14acc0c 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ import setuptools -__version__ = "0.9rc3" +__version__ = "0.9rc4" __author__ = "Ricardo Montañana Gómez" diff --git a/stree/Strees_grapher.py b/stree/Strees_grapher.py index 6d0e46a..c6f7a46 100644 --- a/stree/Strees_grapher.py +++ b/stree/Strees_grapher.py @@ -15,6 +15,7 @@ from mpl_toolkits.mplot3d import Axes3D from .Strees import Stree, Snode, Siterator + class Snode_graph(Snode): def __init__(self, node: Stree): @@ -45,7 +46,8 @@ class Snode_graph(Snode): ax.set_ylim(self._ylimits) ax.set_zlim(self._zlimits) - def save_hyperplane(self, save_folder: str = './', save_prefix: str = '', save_seq: int = 1): + def save_hyperplane(self, save_folder: str = './', save_prefix: str = '', + save_seq: int = 1): _, fig = self.plot_hyperplane() name = f"{save_folder}{save_prefix}STnode{save_seq}.png" fig.savefig(name, bbox_inches='tight') @@ -53,9 +55,8 @@ class Snode_graph(Snode): def _get_cmap(self): cmap = 'jet' - if self._is_pure(): - if self._class == 1: - cmap = 'jet_r' + if self._is_pure() and self._class == 1: + cmap = 'jet_r' return cmap def _graph_title(self): @@ -66,16 +67,20 @@ class Snode_graph(Snode): fig = plt.figure(figsize=self._plot_size) ax = fig.add_subplot(1, 1, 1, projection='3d') if not self._is_pure(): - # Can't plot hyperplane of leaves with one label because it hasn't classiffier + # Can't plot hyperplane of leaves with one label because it hasn't + # classiffier # get the splitting hyperplane - def hyperplane(x, y): return (-self._interceptor - self._vector[0][0] * x - - self._vector[0][1] * y) / self._vector[0][2] + def hyperplane(x, y): return (-self._interceptor + - self._vector[0][0] * x + - self._vector[0][1] * y) \ + / self._vector[0][2] tmpx = np.linspace(self._X[:, 0].min(), self._X[:, 0].max()) tmpy = np.linspace(self._X[:, 1].min(), self._X[:, 1].max()) xx, yy = np.meshgrid(tmpx, tmpy) - ax.plot_surface(xx, yy, hyperplane(xx, yy), alpha=.5, antialiased=True, - rstride=1, cstride=1, cmap='seismic') + ax.plot_surface(xx, yy, hyperplane(xx, yy), alpha=.5, + antialiased=True, rstride=1, cstride=1, + cmap='seismic') self._set_graphics_axis(ax) if plot_distribution: self.plot_distribution(ax) @@ -97,6 +102,7 @@ class Snode_graph(Snode): ax.set_zlabel('X2') plt.show() + class Stree_grapher(Stree): """Build 3d graphs of any dataset, if it's more than 3 features PCA shall make its magic @@ -114,7 +120,7 @@ class Stree_grapher(Stree): def __del__(self): try: os.environ.pop('TESTING') - except: + except KeyError: pass plt.close('all') @@ -181,4 +187,3 @@ class Stree_grapher(Stree): def __iter__(self): return Siterator(self._tree_gr) -