Show sample_weight use in test2 notebook

Update revision to RC4 Lint Stree grapher
2025-08-16 07:56:06 +00:00 · 2020-05-30 23:59:40 +02:00
parent 5e5fea9c6a
commit b4816b2995
3 changed files with 55 additions and 58 deletions
--- a/notebooks/test2.ipynb
+++ b/notebooks/test2.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -14,7 +14,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -30,7 +30,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -42,7 +42,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
@@ -88,24 +88,40 @@
    "Xtrain = data[0]\n",
    "Xtest = data[1]\n",
    "ytrain = data[2]\n",
-    "ytest = data[3]"
+    "ytest = data[3]\n",
+    "# Set weights inverse to its count class in dataset\n",
+    "weights = np.ones(Xtrain.shape[0],) * 1.00244\n",
+    "weights[ytrain==1] = 1.99755 "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": "Accuracy of Train without weights 0.996415770609319\nAccuracy of Train with    weights 0.994026284348865\nAccuracy of Tests without weights 0.9665738161559888\nAccuracy of Tests with    weights 0.9721448467966574\n"
+    }
+   ],
+   "source": [
+    "C = 23\n",
+    "print(\"Accuracy of Train without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtrain, ytrain))\n",
+    "print(\"Accuracy of Train with    weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtrain, ytrain))\n",
+    "print(\"Accuracy of Tests without weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain).score(Xtest, ytest))\n",
+    "print(\"Accuracy of Tests with    weights\", Stree(C=C, random_state=1).fit(Xtrain, ytrain, sample_weight=weights).score(Xtest, ytest))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
   "metadata": {
    "tags": [
     "outputPrepend"
    ]
   },
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "************** C=0.001 ****************************\nClassifier's accuracy (train): 0.9737\nClassifier's accuracy (test) : 0.9805\nroot\nroot - Down, <cgaf> - Leaf class=1 belief= 0.945736 counts=(array([0, 1]), array([  7, 122]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.978784 counts=(array([0, 1]), array([692,  15]))\n\n**************************************************\n************** C=0.01 ****************************\nClassifier's accuracy (train): 0.9809\nClassifier's accuracy (test) : 0.9805\nroot\nroot - Down, <cgaf> - Leaf class=1 belief= 0.983871 counts=(array([0, 1]), array([  2, 122]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up\nroot - Up - Up - Up - Down\nroot - Up - Up - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([2]))\nroot - Up - Up - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.980170 counts=(array([0, 1]), array([692,  14]))\n\n**************************************************\n************** C=1 ****************************\nClassifier's accuracy (train): 0.9904\nClassifier's accuracy (test) : 0.9777\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([122]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([8]))\nroot - Up - Up, <cgaf> - Leaf class=0 belief= 0.988669 counts=(array([0, 1]), array([698,   8]))\n\n**************************************************\n************** C=5 ****************************\nClassifier's accuracy (train): 0.9916\nClassifier's accuracy (test) : 0.9721\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([125]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up\nroot - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([5]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.990071 counts=(array([0, 1]), array([698,   7]))\n\n**************************************************\n************** C=17 ****************************\nClassifier's accuracy (train): 0.9940\nClassifier's accuracy (test) : 0.9749\nroot\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([4]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.992867 counts=(array([0, 1]), array([696,   5]))\n\n**************************************************\n0.2412 secs\n"
-    }
-   ],
+   "outputs": [],
   "source": [
    "t = time.time()\n",
    "for C in (.001, .01, 1, 5, 17):\n",
@@ -121,15 +137,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "[[0.97223657 0.02776343]\n [0.96965421 0.03034579]\n [0.96918057 0.03081943]\n [0.94009975 0.05990025]]\n"
-    }
-   ],
+   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.preprocessing import StandardScaler\n",
@@ -144,15 +154,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([4]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.992867 counts=(array([0, 1]), array([696,   5]))\n"
-    }
-   ],
+   "outputs": [],
   "source": [
    "#check iterator\n",
    "for i in list(clf):\n",
@@ -161,15 +165,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "root\nroot - Down\nroot - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([128]))\nroot - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([2]))\nroot - Up\nroot - Up - Down\nroot - Up - Down - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([4]))\nroot - Up - Down - Up, <pure> - Leaf class=0 belief= 1.000000 counts=(array([0]), array([1]))\nroot - Up - Up\nroot - Up - Up - Down, <pure> - Leaf class=1 belief= 1.000000 counts=(array([1]), array([1]))\nroot - Up - Up - Up, <cgaf> - Leaf class=0 belief= 0.992867 counts=(array([0, 1]), array([696,   5]))\n"
-    }
-   ],
+   "outputs": [],
   "source": [
    "#check iterator again\n",
    "for i in clf:\n",
@@ -178,7 +176,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -189,15 +187,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": "1 functools.partial(<function check_no_attributes_set_in_init at 0x12a2f1200>, 'Stree')\n2 functools.partial(<function check_estimators_dtypes at 0x12a2e7320>, 'Stree')\n3 functools.partial(<function check_fit_score_takes_y at 0x12a2e7200>, 'Stree')\n4 functools.partial(<function check_sample_weights_pandas_series at 0x12a2d7b00>, 'Stree')\n5 functools.partial(<function check_sample_weights_not_an_array at 0x12a2d7c20>, 'Stree')\n6 functools.partial(<function check_sample_weights_list at 0x12a2d7d40>, 'Stree')\n7 functools.partial(<function check_sample_weights_shape at 0x12a2d7e60>, 'Stree')\n8 functools.partial(<function check_sample_weights_invariance at 0x12a2d7f80>, 'Stree')\n9 functools.partial(<function check_estimators_fit_returns_self at 0x12a2ec320>, 'Stree')\n10 functools.partial(<function check_estimators_fit_returns_self at 0x12a2ec320>, 'Stree', readonly_memmap=True)\n11 functools.partial(<function check_complex_data at 0x12a2e2170>, 'Stree')\n12 functools.partial(<function check_dtype_object at 0x12a2e20e0>, 'Stree')\n13 functools.partial(<function check_estimators_empty_data_messages at 0x12a2e7440>, 'Stree')\n14 functools.partial(<function check_pipeline_consistency at 0x12a2e70e0>, 'Stree')\n15 functools.partial(<function check_estimators_nan_inf at 0x12a2e7560>, 'Stree')\n16 functools.partial(<function check_estimators_overwrite_params at 0x12a2f10e0>, 'Stree')\n17 functools.partial(<function check_estimator_sparse_data at 0x12a2d79e0>, 'Stree')\n18 functools.partial(<function check_estimators_pickle at 0x12a2e77a0>, 'Stree')\n19 functools.partial(<function check_classifier_data_not_an_array at 0x12a2f1440>, 'Stree')\n20 functools.partial(<function check_classifiers_one_label at 0x12a2e7e60>, 'Stree')\n21 functools.partial(<function check_classifiers_classes at 0x12a2ec8c0>, 'Stree')\n22 functools.partial(<function check_estimators_partial_fit_n_features at 0x12a2e78c0>, 'Stree')\n23 functools.partial(<function check_classifiers_train at 0x12a2e7f80>, 'Stree')\n24 functools.partial(<function check_classifiers_train at 0x12a2e7f80>, 'Stree', readonly_memmap=True)\n25 functools.partial(<function check_classifiers_train at 0x12a2e7f80>, 'Stree', readonly_memmap=True, X_dtype='float32')\n26 functools.partial(<function check_classifiers_regression_target at 0x12a2f1ef0>, 'Stree')\n27 functools.partial(<function check_supervised_y_no_nan at 0x12a2d8b00>, 'Stree')\n28 functools.partial(<function check_supervised_y_2d at 0x12a2ec560>, 'Stree')\n29 functools.partial(<function check_estimators_unfitted at 0x12a2ec440>, 'Stree')\n30 functools.partial(<function check_non_transformer_estimators_n_iter at 0x12a2f1a70>, 'Stree')\n31 functools.partial(<function check_decision_proba_consistency at 0x12a2f6050>, 'Stree')\n32 functools.partial(<function check_fit2d_predict1d at 0x12a2e2680>, 'Stree')\n33 functools.partial(<function check_methods_subset_invariance at 0x12a2e2830>, 'Stree')\n34 functools.partial(<function check_fit2d_1sample at 0x12a2e2950>, 'Stree')\n35 functools.partial(<function check_fit2d_1feature at 0x12a2e2a70>, 'Stree')\n36 functools.partial(<function check_fit1d at 0x12a2e2b90>, 'Stree')\n37 functools.partial(<function check_get_params_invariance at 0x12a2f1cb0>, 'Stree')\n38 functools.partial(<function check_set_params at 0x12a2f1dd0>, 'Stree')\n39 functools.partial(<function check_dict_unchanged at 0x12a2e2290>, 'Stree')\n40 functools.partial(<function check_dont_overwrite_parameters at 0x12a2e2560>, 'Stree')\n41 functools.partial(<function check_fit_idempotent at 0x12a2f6200>, 'Stree')\n42 functools.partial(<function check_n_features_in at 0x12a2f6290>, 'Stree')\n43 functools.partial(<function check_requires_y_none at 0x12a2f6320>, 'Stree')\n"
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Make checks one by one\n",
    "c = 0\n",