Add big test and feature selection notebook

2025-08-15 07:26:02 +00:00 · 2021-04-26 01:08:57 +02:00
parent 84795b4c43
commit b061d40355
4 changed files with 650 additions and 8 deletions
--- a/FeatureSelection.ipynb
+++ b/FeatureSelection.ipynb
@@ -0,0 +1,433 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "intimate-convergence",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import load_iris\n",
+    "from sklearn.svm import LinearSVC\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.feature_selection import SelectKBest, mutual_info_classif, SelectFromModel\n",
+    "from experimentation import Datasets\n",
+    "from sklearn.model_selection import cross_val_score, KFold\n",
+    "import numpy as np\n",
+    "from stree import Stree\n",
+    "import warnings\n",
+    "import time\n",
+    "random_seeds = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]\n",
+    "#warnings.filterwarnings(\"ignore\", message=\"The least populated class in y has only 2 members, which is less than n_splits\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "specified-western",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(531, 100)\n"
+     ]
+    }
+   ],
+   "source": [
+    "random_state = 1\n",
+    "dt = Datasets(normalize=False, standardize=False, set_of_files=\"tanveer\")\n",
+    "Xt, yt = dt.load(\"low-res-spect\")\n",
+    "print(Xt.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "governing-botswana",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate(X, y, kernel=\"linear\", C=.05):\n",
+    "    results = []\n",
+    "    for seed in random_seeds:\n",
+    "        print(f\"{seed}, \", end=\"\", flush=True)\n",
+    "        kfold = KFold(shuffle=True, random_state=random_state, n_splits=5)\n",
+    "        clf = Stree(random_state=random_state, C=C, kernel=kernel)\n",
+    "        results.append(cross_val_score(clf, X, y, cv=kfold))\n",
+    "    print(\"end: \", end=\"\", flush=True)\n",
+    "    return np.mean(results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "warming-corner",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "low-res-spect  : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886951 in 22.44 seconds\n",
+      "KBest-10       : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.544225 in  2.89 seconds\n",
+      "KBest-50       : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.862511 in 16.75 seconds\n",
+      "KBest-75       : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900123 in 20.00 seconds\n",
+      "KBest-76       : 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.898237 in 20.59 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "experiment = {\n",
+    "    \"low-res-spect\": \"all\",\n",
+    "    \"KBest-10\": 10,\n",
+    "    \"KBest-50\": 50,\n",
+    "    \"KBest-75\": 75,\n",
+    "    \"KBest-76\": 76\n",
+    "}\n",
+    "for label, num_features in experiment.items():\n",
+    "    now = time.time()\n",
+    "    X = Xt if num_features == \"all\" else SelectKBest(k=num_features).fit_transform(Xt, yt)\n",
+    "    print(f\"{label:15s}: \", end=\"\", flush=True)\n",
+    "    accuracy = evaluate(X, yt)\n",
+    "    spent = time.time() - now\n",
+    "    print(f\"{accuracy:.6f} in {spent:5.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "confidential-opportunity",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "KBest-60: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900141 in 16.53 seconds\n",
+      "KBest-61: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.898272 in 17.12 seconds\n",
+      "KBest-62: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.902028 in 17.24 seconds\n",
+      "KBest-63: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.898254 in 17.25 seconds\n",
+      "KBest-64: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.901993 in 17.96 seconds\n",
+      "KBest-65: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.896367 in 17.54 seconds\n",
+      "KBest-66: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890707 in 17.63 seconds\n",
+      "KBest-67: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.883160 in 18.05 seconds\n",
+      "KBest-68: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.887004 in 18.01 seconds\n",
+      "KBest-69: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890760 in 18.71 seconds\n",
+      "KBest-70: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890707 in 18.11 seconds\n",
+      "KBest-71: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 19.67 seconds\n",
+      "KBest-72: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 19.16 seconds\n",
+      "KBest-73: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.896350 in 19.47 seconds\n",
+      "KBest-74: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894481 in 19.93 seconds\n",
+      "KBest-75: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900123 in 19.95 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "for num_features in range(60, 76):\n",
+    "    now = time.time()\n",
+    "    X = SelectKBest(k=num_features).fit_transform(Xt, yt)\n",
+    "    print(f\"KBest-{num_features:2d}: \", end=\"\", flush=True)\n",
+    "    accuracy = evaluate(X, yt)\n",
+    "    spent = time.time() - now\n",
+    "    print(f\"{accuracy:.6f} in {spent:5.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "recent-treasury",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "KBest-30: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.849356 in 11.43 seconds\n",
+      "KBest-31: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855017 in 11.54 seconds\n",
+      "KBest-32: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 11.90 seconds\n",
+      "KBest-33: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 11.98 seconds\n",
+      "KBest-34: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 12.07 seconds\n",
+      "KBest-35: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855052 in 12.56 seconds\n",
+      "KBest-36: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.860695 in 12.59 seconds\n",
+      "KBest-37: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.864468 in 13.06 seconds\n",
+      "KBest-38: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.860695 in 13.55 seconds\n",
+      "KBest-39: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.855034 in 13.75 seconds\n",
+      "KBest-40: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.864415 in 13.58 seconds\n",
+      "KBest-41: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.858790 in 14.13 seconds\n",
+      "KBest-42: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.862546 in 14.87 seconds\n",
+      "KBest-43: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.856886 in 15.23 seconds\n",
+      "KBest-44: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.853095 in 15.66 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "for num_features in range(30, 45):\n",
+    "    now = time.time()\n",
+    "    X = SelectKBest(k=num_features).fit_transform(Xt, yt)\n",
+    "    print(f\"KBest-{num_features:2d}: \", end=\"\", flush=True)\n",
+    "    accuracy = evaluate(X, yt)\n",
+    "    spent = time.time() - now\n",
+    "    print(f\"{accuracy:.6f} in {spent:5.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "indirect-poker",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "KBest-linear-84: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 21.51 seconds\n",
+      "KBest-linear-85: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 21.58 seconds\n",
+      "KBest-linear-86: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 22.06 seconds\n",
+      "KBest-linear-87: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.888803 in 21.44 seconds\n",
+      "KBest-linear-88: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 21.84 seconds\n",
+      "KBest-linear-89: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886934 in 22.40 seconds\n",
+      "KBest-linear-90: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 22.31 seconds\n",
+      "KBest-linear-91: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 22.49 seconds\n",
+      "KBest-linear-92: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890689 in 22.48 seconds\n",
+      "KBest-linear-93: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894463 in 23.19 seconds\n",
+      "KBest-linear-94: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.900141 in 23.67 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "kernel=\"linear\"\n",
+    "for num_features in range(84, 95):\n",
+    "    now = time.time()\n",
+    "    X = SelectKBest(k=num_features).fit_transform(Xt, yt)\n",
+    "    print(f\"KBest-{kernel}-{num_features:2d}: \", end=\"\", flush=True)\n",
+    "    accuracy = evaluate(X, yt, kernel=kernel, C=.05)\n",
+    "    spent = time.time() - now\n",
+    "    print(f\"{accuracy:.6f} in {spent:5.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "moving-hamburg",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "mui-linear-50: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894481 in 12.37 seconds\n",
+      "mui-linear-75: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886934 in 19.47 seconds\n",
+      "mui-linear-85: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.892576 in 21.31 seconds\n",
+      "mui-linear-90: 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.894463 in 23.06 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "kernel=\"linear\"\n",
+    "mui = mutual_info_classif(Xt, yt, random_state=random_state)\n",
+    "for num_features in [50, 75, 85, 90]:\n",
+    "    now = time.time()\n",
+    "    ind = np.argpartition(mui, -num_features)[-num_features:]\n",
+    "    print(f\"mui-{kernel}-{num_features:2d}: \", end=\"\", flush=True)\n",
+    "    accuracy = evaluate(Xt[:, ind], yt, kernel=kernel, C=.05)\n",
+    "    spent = time.time() - now\n",
+    "    print(f\"{accuracy:.6f} in {spent:5.2f} seconds\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "closed-policy",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "From model-linear-C=0.01-X.shape=(531, 16): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886898 in  1.91 seconds\n",
+      "From model-linear-C=0.10-X.shape=(531, 62): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.883213 in 10.30 seconds\n",
+      "From model-linear-C=1.00-X.shape=(531, 93): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.890725 in 20.57 seconds\n",
+      "From model-linear-C=5.00-X.shape=(531, 100): 57, 31, 1714, 17, 23, 79, 83, 97, 7, 1, end: 0.886951 in 22.83 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "# From model\n",
+    "warnings.filterwarnings(\"ignore\",message=\"Liblinear failed to converge\")\n",
+    "for C in [0.01, 0.1, 1, 5]:\n",
+    "    now = time.time()\n",
+    "    lsvc = LinearSVC(C=C, penalty=\"l1\", dual=False, random_state=random_state).fit(Xt, yt)\n",
+    "    model = SelectFromModel(lsvc, prefit=True)\n",
+    "    X = model.transform(Xt)\n",
+    "    print(f\"From model-{kernel}-C={C:4.2f}-X.shape={X.shape}: \", end=\"\", flush=True)\n",
+    "    accuracy = evaluate(X, yt, kernel=kernel, C=.05)\n",
+    "    spent = time.time() - now\n",
+    "    print(f\"{accuracy:.6f} in {spent:5.2f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "id": "chubby-patio",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(12,)"
+      ]
+     },
+     "execution_count": 104,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "lsvc = LinearSVC(C=.01, penalty=\"l1\", dual=False, random_state=random_state).fit(Xt, yt)\n",
+    "res = np.linalg.norm(lsvc.coef_, axis=0, ord=1)\n",
+    "res[res>1e-5].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "id": "refined-display",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(531, 48)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(100,)"
+      ]
+     },
+     "execution_count": 115,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "warnings.filterwarnings(\"ignore\",message=\"lbfgs failed to converge\")\n",
+    "logi = LogisticRegression(C=1, dual=False, random_state=random_state).fit(Xt, yt)\n",
+    "model = SelectFromModel(logi, prefit=True)\n",
+    "X = model.transform(Xt)\n",
+    "print(X.shape)\n",
+    "#res = np.mean(logi.coef_, axis=1)\n",
+    "#res = np.linalg.norm(logi.coef_, axis=0, ord=1)\n",
+    "res = (logi.coef_**2).sum(axis=0)\n",
+    "res[res>1e-5].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 118,
+   "id": "hydraulic-labor",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0, 1, 2, 3, 4, 5, 6]"
+      ]
+     },
+     "execution_count": 118,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(range(7))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
+   "id": "settled-rolling",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "select = SelectKBest(k=5).fit(Xt, yt).get_support(indices=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "id": "instructional-democracy",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([90, 92, 93, 94, 95])"
+      ]
+     },
+     "execution_count": 128,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "select"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 126,
+   "id": "detailed-optimum",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([90, 92, 93, 94, 95])"
+      ]
+     },
+     "execution_count": 126,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "select.get_support(indices=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "recent-paraguay",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/big_test.py
+++ b/big_test.py
@@ -0,0 +1,130 @@
+import time
+import warnings
+import numpy as np
+from stree import Stree
+from sklearn.model_selection import KFold, cross_validate
+from experimentation.Sets import Datasets
+from experimentation.Utils import TextColor
+
+kernels = ["linear", "sigmoid", "poly", "rbf"]
+
+results = [
+    ("balance-scale", 0.91072, 0.8456, 0.55824, 0.76864),
+    ("balloons", 0.653333, 0.696667, 0.595, 0.581667),
+    ("breast-cancer-wisc-diag", 0.968898, 0.94798, 0.920394, 0.972762),
+    ("breast-cancer-wisc-prog", 0.802051, 0.762679, 0.755103, 0.773295),
+    ("breast-cancer-wisc", 0.966661, 0.964666, 0.955221, 0.967809),
+    ("breast-cancer", 0.734211, 0.71043, 0.736624, 0.731754),
+    ("cardiotocography-10clases", 0.791487, 0.403616, 0.373194, 0.420877),
+    ("cardiotocography-3clases", 0.900613, 0.798167, 0.84365, 0.81289),
+    ("conn-bench-sonar-mines-rocks", 0.755528, 0.752439, 0.781243, 0.832091),
+    ("cylinder-bands", 0.715049, 0.690042, 0.697238, 0.747613),
+    ("dermatology", 0.966087, 0.531725, 0.576912, 0.381129),
+    ("echocardiogram", 0.808832, 0.844501, 0.79245, 0.825299),
+    ("fertility", 0.866, 0.88, 0.852, 0.88),
+    ("haberman-survival", 0.735637, 0.733718, 0.728477, 0.731713),
+    ("heart-hungarian", 0.817674, 0.807832, 0.811198, 0.823448),
+    ("hepatitis", 0.796129, 0.781935, 0.806452, 0.825161),
+    ("ilpd-indian-liver", 0.723498, 0.70739, 0.707907, 0.709788),
+    ("ionosphere", 0.866056, 0.85528, 0.77293, 0.940744),
+    ("iris", 0.965333, 0.832667, 0.952667, 0.952667),
+    ("led-display", 0.703, 0.4156, 0.2601, 0.3011),
+    ("libras", 0.747778, 0.165278, 0.108333, 0.177222),
+    ("low-res-spect", 0.853102, 0.522254, 0.529979, 0.527917),
+    ("lymphography", 0.773793, 0.547057, 0.547057, 0.547057),
+    ("mammographic", 0.81915, 0.796662, 0.817173, 0.826747),
+    ("molec-biol-promoter", 0.764416, 0.781039, 0.696017, 0.827143),
+    ("musk-1", 0.843463, 0.732531, 0.900004, 0.895811),
+    ("oocytes_merluccius_nucleus_4d", 0.810657, 0.702055, 0.714768, 0.770059),
+    ("oocytes_merluccius_states_2f", 0.915365, 0.74883, 0.710081, 0.718894),
+    ("oocytes_trisopterus_nucleus_2f", 0.800986, 0.674258, 0.690322, 0.799127),
+    ("oocytes_trisopterus_states_5b", 0.916655, 0.602868, 0.637082, 0.588284),
+    ("parkinsons", 0.882051, 0.839487, 0.864615, 0.874359),
+    ("pima", 0.766651, 0.745009, 0.741266, 0.756369),
+    ("pittsburg-bridges-MATERIAL", 0.791255, 0.854372, 0.830693, 0.846797),
+    ("pittsburg-bridges-REL-L", 0.632238, 0.472, 0.509429, 0.484476),
+    ("pittsburg-bridges-SPAN", 0.630234, 0.578129, 0.588596, 0.593275),
+    ("pittsburg-bridges-T-OR-D", 0.861619, 0.85881, 0.867762, 0.86481),
+    ("planning", 0.70455, 0.712207, 0.690751, 0.713258),
+    ("post-operative", 0.675556, 0.711111, 0.711111, 0.711111),
+    ("seeds", 0.949048, 0.890952, 0.9, 0.933333),
+    ("statlog-australian-credit", 0.667246, 0.668261, 0.664638, 0.672319),
+    ("statlog-german-credit", 0.7625, 0.7363, 0.7344, 0.758),
+    ("statlog-heart", 0.822963, 0.838148, 0.830741, 0.827037),
+    ("statlog-image", 0.952641, 0.383896, 0.420346, 0.379134),
+    ("statlog-vehicle", 0.793028, 0.445035, 0.415464, 0.57556),
+    ("synthetic-control", 0.938833, 0.511333, 0.439667, 0.5675),
+    ("tic-tac-toe", 0.983296, 0.752095, 0.984028, 0.986324),
+    ("vertebral-column-2clases", 0.852903, 0.812581, 0.730323, 0.845161),
+    ("wine", 0.97581, 0.571635, 0.562397, 0.926175),
+    ("zoo", 0.947619, 0.590667, 0.664095, 0.523524),
+]
+
+
+def process_dataset(dataset, kernel):
+    X, y = dt.load(dataset)
+    scores = []
+    times = []
+    for random_state in random_seeds:
+        kfold = KFold(shuffle=True, random_state=random_state, n_splits=5)
+        clf = Stree(kernel=kernel, random_state=random_state)
+        res = cross_validate(clf, X, y, cv=kfold, return_estimator=True)
+        scores.append(res["test_score"])
+        times.append(res["fit_time"])
+    return scores, times
+
+
+random_seeds = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
+dt = Datasets(normalize=False, standardize=False, set_of_files="tanveer")
+start = time.time()
+print(
+    TextColor.MAGENTA
+    + "Testing all datasets accuracies with default hyperparameters and all "
+    "kernels"
+)
+print(f"5 Fold Cross Validation with 10 random seeds {random_seeds}\n")
+header_cols = [
+    "Dataset",
+    "Linear",
+    "Sigmoid",
+    "Poly",
+    "RBF",
+]
+header_lengths = [30, 7, 7, 7, 7, 7]
+line_col = ""
+mistakes = correct = 0
+check_mark = "\N{heavy check mark}"
+cross_mark = "\N{heavy ballot x}"
+for field, underscore in zip(header_cols, header_lengths):
+    print(f"{field:{underscore}s} ", end="", flush=True)
+    line_col += "=" * underscore + " "
+print(f"\n{line_col}")
+color = ""
+warnings.filterwarnings("ignore", message="Solver terminated early")
+for name, linear, sigmoid, poly, rbf in results:
+    color = TextColor.LINE1 if color == TextColor.LINE2 else TextColor.LINE2
+    results_dataset = dict(linear=linear, sigmoid=sigmoid, poly=poly, rbf=rbf)
+    X, y = dt.load(name)
+    print(
+        color + f"{name:30s} ",
+        end="",
+    )
+    for kernel in kernels:
+        scores, times = process_dataset(name, kernel)
+        if round(np.mean(scores), 6) != results_dataset[kernel]:
+            mistakes += 1
+            item = cross_mark
+            item_color = TextColor.FAIL
+        else:
+            correct += 1
+            item = check_mark
+            item_color = TextColor.SUCCESS
+        item = item.center(7)
+        print(item_color + f"{item} ", end="", flush=True)
+    print("")
+print(TextColor.SUCCESS + f"Correct results : {correct:3d}")
+print(TextColor.FAIL + f"Mistaken results: {mistakes:3d}")
+stop = time.time()
+hours, rem = divmod(stop - start, 3600)
+minutes, seconds = divmod(rem, 60)
+print(color + f"Time: {int(hours):2d}h {int(minutes):2d}m {int(seconds):2d}s")
--- a/report_score.py
+++ b/report_score.py
@@ -51,6 +51,9 @@ def parse_arguments():
        type=int,
        required=True,
    )
+    ap.add_argument(
+        "-p", "--parameters", type=str, required=False, default="{}"
+    )
    args = ap.parse_args()
    return (
        args.set_of_files,
@@ -58,6 +61,7 @@ def parse_arguments():
        args.dataset,
        args.sql,
        bool(args.normalize),
+        args.parameters,
    )


@@ -72,7 +76,7 @@ def get_classifier(model, random_state, hyperparameters):
    return clf


-def process_dataset(dataset, verbose, model):
+def process_dataset(dataset, verbose, model, params):
    X, y = dt.load(dataset)
    scores = []
    times = []
@@ -87,7 +91,7 @@ def process_dataset(dataset, verbose, model):
        print(f"X.shape: {X.shape}")
        print(f"{X[:4]}")
        print(f"Random seeds: {random_seeds}")
-    hyperparameters = json.loads("{}")
+    hyperparameters = json.loads(params)
    if model == "stree":
        # Get the optimized parameters
        record = dbh.find_best(dataset, model, "gridsearch")
@@ -176,7 +180,7 @@ def store_string(

 random_seeds = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
 standardize = False
-(set_of_files, model, dataset, sql, normalize) = parse_arguments()
+(set_of_files, model, dataset, sql, normalize, parameters) = parse_arguments()
 dbh = MySQL()
 if sql:
    sql_output = open(f"{model}.sql", "w")
@@ -216,7 +220,7 @@ if dataset == "all":
            end="",
        )
        scores, times, hyperparameters, nodes, leaves, depth = process_dataset(
-            dataset[0], verbose=False, model=model
+            dataset[0], verbose=False, model=model, params=parameters
        )
        complexity = dict(
            nodes=float(np.mean(nodes)),
@@ -237,7 +241,7 @@ if dataset == "all":
            print(command, file=sql_output)
 else:
    scores, times, hyperparameters, nodes, leaves, depth = process_dataset(
-        dataset, verbose=True, model=model
+        dataset, verbose=True, model=model, params=parameters
    )
    record = dbh.find_best(dataset, model, "crossval")
    accuracy = np.mean(scores)
--- a/test.ipynb
+++ b/test.ipynb
@@ -2,14 +2,14 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from stree import Stree\n",
    "from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier\n",
    "from sklearn.model_selection import GridSearchCV\n",
-    "from experimentation.Sets import Dataset\n",
+    "from experimentation.Sets import Datasets\n",
    "from sklearn.svm import SVC"
   ]
  },
@@ -698,6 +698,81 @@
    "res"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "balance-scale                  ..: 0.91680 0.91680 Ok\n",
+      "balloons                       ..: 0.87500 0.87500 Ok\n",
+      "breast-cancer-wisc-diag        ..: 0.99297 0.99297 Ok\n",
+      "breast-cancer-wisc-prog        ..: 0.91919 0.91919 Ok\n",
+      "breast-cancer-wisc             ..: 0.98140 0.98140 Ok\n",
+      "breast-cancer                  ..: 0.82168 0.82168 Ok\n",
+      "cardiotocography-10clases      ..: 0.92897 0.92897 Ok\n",
+      "cardiotocography-3clases       ..: 0.94685 0.94685 Ok\n",
+      "conn-bench-sonar-mines-rocks   ..: 1.00000 1.00000 Ok\n",
+      "cylinder-bands                 ..: 0.86914 0.86914 Ok\n",
+      "dermatology                    ..: 1.00000 1.00000 Ok\n",
+      "echocardiogram                 ..: 0.87023 0.87023 Ok\n",
+      "fertility                      ..: 0.88000 0.88000 Ok\n",
+      "haberman-survival              ..: 0.81699 0.81699 Ok\n",
+      "heart-hungarian                ..: 0.88095 0.88095 Ok\n",
+      "hepatitis                      ..: 1.00000 1.00000 Ok\n",
+      "ilpd-indian-liver              ..: 0.75986 0.75986 Ok\n",
+      "ionosphere                     ..: 0.98575 0.98575 Ok\n",
+      "iris                           ..: 0.98000 0.98000 Ok\n",
+      "led-display                    ..: 0.74700 0.74700 Ok\n",
+      "libras                         ..: 1.00000 1.00000 Ok\n",
+      "low-res-spect                  ..: 1.00000 1.00000 Ok\n",
+      "lymphography                   ..: 1.00000 1.00000 Ok\n",
+      "mammographic                   ..: 0.82622 0.82622 Ok\n",
+      "molec-biol-promoter            ..: 1.00000 1.00000 Ok\n",
+      "musk-1                         ..: 1.00000 1.00000 Ok\n",
+      "oocytes_merluccius_nucleus_4d  ..: 0.84736 0.84736 Ok\n",
+      "oocytes_merluccius_states_2f   ..: 0.95401 0.95401 Ok\n",
+      "oocytes_trisopterus_nucleus_2f ..: 0.87500 0.87500 Ok\n",
+      "oocytes_trisopterus_states_5b  ..: 0.93202 0.93202 Ok\n",
+      "parkinsons                     ..: 0.94359 0.94359 Ok\n",
+      "pima                           ..: 0.78776 0.78776 Ok\n",
+      "pittsburg-bridges-MATERIAL     ..: 0.95283 0.95283 Ok\n",
+      "pittsburg-bridges-REL-L        ..: 0.80583 0.80583 Ok\n",
+      "pittsburg-bridges-SPAN         ..: 0.82609 0.82609 Ok\n",
+      "pittsburg-bridges-T-OR-D       ..: 0.91176 0.91176 Ok\n",
+      "planning                       ..: 0.72527 0.72527 Ok\n",
+      "post-operative                 ..: 0.72222 0.72222 Ok\n",
+      "seeds                          ..: 0.98095 0.98095 Ok\n",
+      "statlog-australian-credit      ..: 0.67971 0.67971 Ok\n",
+      "statlog-german-credit          ..: 0.82500 0.82500 Ok\n",
+      "statlog-heart                  ..: 0.88519 0.88519 Ok\n",
+      "statlog-image                  ..: 0.96277 0.96277 Ok\n",
+      "statlog-vehicle                ..: 0.83570 0.83570 Ok\n",
+      "synthetic-control              ..: 1.00000 1.00000 Ok\n",
+      "tic-tac-toe                    ..: 0.98330 0.98330 Ok\n",
+      "vertebral-column-2clases       ..: 0.87097 0.87097 Ok\n",
+      "wine                           ..: 1.00000 1.00000 Ok\n",
+      "zoo                            ..: 1.00000 1.00000 Ok\n"
+     ]
+    }
+   ],
+   "source": [
+    "clf = Stree(random_state=1)\n",
+    "dt = Datasets(normalize=False, standardize=False, set_of_files=\"tanveer\")\n",
+    "for item in dt:\n",
+    "    name = item[0]\n",
+    "    X, y = dt.load(name)\n",
+    "    clf.fit(X, y)\n",
+    "    accuracy = clf.score(X, y)\n",
+    "    accuracy2 = clf.score2(X, y)\n",
+    "    status = \"Error\" if accuracy != accuracy2 else \"Ok\"\n",
+    "    print(f\"{name:30s} ..: {accuracy:.5f} {accuracy2:.5f} {status}\")\n",
+    "    "
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -722,7 +797,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.9.2"
  }
 },
 "nbformat": 4,