From a2561072a5c3d533d7ef0b9de23da0879d178bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Monta=C3=B1ana?= Date: Mon, 14 Nov 2022 19:25:38 +0100 Subject: [PATCH] Remove ipnb checkpoints --- .ipynb_checkpoints/test-checkpoint.ipynb | 635 ----------------------- 1 file changed, 635 deletions(-) delete mode 100644 .ipynb_checkpoints/test-checkpoint.ipynb diff --git a/.ipynb_checkpoints/test-checkpoint.ipynb b/.ipynb_checkpoints/test-checkpoint.ipynb deleted file mode 100644 index e92fe3c..0000000 --- a/.ipynb_checkpoints/test-checkpoint.ipynb +++ /dev/null @@ -1,635 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "afc3548e-91c2-4443-bd96-457a57a202cc", - "metadata": {}, - "outputs": [], - "source": [ - "from mdlp import MDLP\n", - "import pandas as pd\n", - "from benchmark import Datasets\n", - "from bayesclass import TAN" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "8ff3f4d6-e681-4252-ac4d-dc5bd14dcede", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RINaMgAlSi'K'CaBaFeType
01.5179312.793.501.1273.030.648.770.00.000
11.5164312.163.521.3572.890.578.530.00.001
21.5179313.213.481.4172.640.598.430.00.000
31.5129914.401.741.5474.550.007.590.00.002
41.5339312.300.001.0070.160.1216.190.00.243
\n", - "
" - ], - "text/plain": [ - " RI Na Mg Al Si 'K' Ca Ba Fe Type\n", - "0 1.51793 12.79 3.50 1.12 73.03 0.64 8.77 0.0 0.00 0\n", - "1 1.51643 12.16 3.52 1.35 72.89 0.57 8.53 0.0 0.00 1\n", - "2 1.51793 13.21 3.48 1.41 72.64 0.59 8.43 0.0 0.00 0\n", - "3 1.51299 14.40 1.74 1.54 74.55 0.00 7.59 0.0 0.00 2\n", - "4 1.53393 12.30 0.00 1.00 70.16 0.12 16.19 0.0 0.24 3" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get data as a dataset\n", - "dt = Datasets()\n", - "data = dt.load(\"glass\", dataframe=True)\n", - "features = dt.dataset.features\n", - "class_name = dt.dataset.class_name\n", - "factorization, class_factors = pd.factorize(data[class_name])\n", - "data[class_name] = factorization\n", - "data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "7c9e1eae-6a66-4930-a125-f9f3def45574", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RINaMgAlSi'K'CaBaFeType
030.014.016.018.038.032.014.00.00.00
117.03.018.021.034.024.010.00.00.01
230.024.015.022.022.027.06.00.00.00
33.051.06.023.047.00.03.00.00.02
462.04.00.013.00.08.030.00.05.03
.................................
20913.033.011.019.023.027.04.00.00.01
21011.019.018.029.023.033.03.00.00.03
21114.041.018.020.034.014.03.00.05.03
21220.08.08.023.042.033.011.00.00.03
21343.046.06.023.023.00.015.00.00.02
\n", - "

214 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " RI Na Mg Al Si 'K' Ca Ba Fe Type\n", - "0 30.0 14.0 16.0 18.0 38.0 32.0 14.0 0.0 0.0 0\n", - "1 17.0 3.0 18.0 21.0 34.0 24.0 10.0 0.0 0.0 1\n", - "2 30.0 24.0 15.0 22.0 22.0 27.0 6.0 0.0 0.0 0\n", - "3 3.0 51.0 6.0 23.0 47.0 0.0 3.0 0.0 0.0 2\n", - "4 62.0 4.0 0.0 13.0 0.0 8.0 30.0 0.0 5.0 3\n", - ".. ... ... ... ... ... ... ... ... ... ...\n", - "209 13.0 33.0 11.0 19.0 23.0 27.0 4.0 0.0 0.0 1\n", - "210 11.0 19.0 18.0 29.0 23.0 33.0 3.0 0.0 0.0 3\n", - "211 14.0 41.0 18.0 20.0 34.0 14.0 3.0 0.0 5.0 3\n", - "212 20.0 8.0 8.0 23.0 42.0 33.0 11.0 0.0 0.0 3\n", - "213 43.0 46.0 6.0 23.0 23.0 0.0 15.0 0.0 0.0 2\n", - "\n", - "[214 rows x 10 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Fayyad Irani\n", - "discretiz = MDLP()\n", - "Xdisc = discretiz.fit_transform(\n", - " data[features].to_numpy(), data[class_name].to_numpy()\n", - ")\n", - "features_discretized = pd.DataFrame(Xdisc, columns=features)\n", - "dataset_discretized = features_discretized.copy()\n", - "dataset_discretized[class_name] = data[class_name]\n", - "X = dataset_discretized[features]\n", - "y = dataset_discretized[class_name]\n", - "dataset_discretized" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "2840a103-99fb-466f-ae75-45e11c1b9c5a", - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import cross_validate, StratifiedKFold, KFold, cross_val_score\n", - "import numpy as np\n", - "n_folds = 5\n", - "score_name = \"accuracy\"\n", - "random_state=17\n", - "def validate_classifier(model, X, y, stratified, fit_params):\n", - " stratified_class = StratifiedKFold if stratified else KFold\n", - " kfold = stratified_class(shuffle=True, random_state=random_state, n_splits=n_folds)\n", - " #return cross_validate(model, X, y, cv=kfold, return_estimator=True, scoring=score_name)\n", - " return cross_val_score(model, X, y, fit_params=fit_params)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "6a1aad95-370f-4854-ae9a-32205aff5d39", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b620372c05294afc853885da0848e389", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/43 [00:00 9\u001b[0m model\u001b[38;5;241m.\u001b[39mplot(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msimple_init=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msimple_init\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m head=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhead\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m score=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnp\u001b[38;5;241m.\u001b[39mmean(score[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtest_score\u001b[39m\u001b[38;5;124m'\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mIndexError\u001b[0m: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices" - ] - } - ], - "source": [ - "import warnings\n", - "from stree import Stree\n", - "warnings.filterwarnings('ignore')\n", - "for simple_init in [False, True]:\n", - " model = TAN(simple_init=simple_init)\n", - " for head in range(4):\n", - " #model.fit(X, y, head=head, features=features, class_name=class_name)\n", - " score = validate_classifier(model, X, y, stratified=False, fit_params=dict(head=head, features=features, class_name=class_name))\n", - " #model.plot(f\"simple_init={simple_init} head={head} score={np.mean(score['test_score'])}\")\n", - " model.plot(f\"simple_init={simple_init} head={head} score={np.mean(score)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c389ff1e-76d9-4c5b-9860-ea6d4752fac7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9c58629f-000b-4d8c-8896-efd032f1090c", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" - }, - "vscode": { - "interpreter": { - "hash": "a5f800306069c11c1b9a793f47dfeb8c7d63d06a771fda00cf3476e3d4088a52" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}