From e8b04f41e47df923a7d9cf7b1db527db1bc6bfb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Sun, 29 Nov 2020 23:21:46 +0100 Subject: [PATCH] Add server jobs control scripts --- core.txt | 22 ++++ datasets.txt | 75 +++++++++----- experimentation/.myconfig.dist | 5 + hardCore.txt | 6 ++ kite_tutorial.ipynb | 182 --------------------------------- scripts/.gitignore | 3 + scripts/experiment.template | 16 +++ scripts/genall.sh | 9 ++ scripts/genjobs.sh | 32 ++++++ scripts/interactive.sh | 2 + scripts/launchsome.sh | 40 ++++++++ scripts/notebook.sh | 4 + scripts/rmscripts.sh | 4 + test_arff.py | 48 --------- 14 files changed, 194 insertions(+), 254 deletions(-) create mode 100644 core.txt create mode 100644 experimentation/.myconfig.dist create mode 100644 hardCore.txt delete mode 100644 kite_tutorial.ipynb create mode 100644 scripts/.gitignore create mode 100644 scripts/experiment.template create mode 100755 scripts/genall.sh create mode 100755 scripts/genjobs.sh create mode 100755 scripts/interactive.sh create mode 100755 scripts/launchsome.sh create mode 100755 scripts/notebook.sh create mode 100755 scripts/rmscripts.sh delete mode 100644 test_arff.py diff --git a/core.txt b/core.txt new file mode 100644 index 0000000..4894912 --- /dev/null +++ b/core.txt @@ -0,0 +1,22 @@ +breast-cancer-wisc-diag +balance-scale +breast-cancer-wisc-prog +cardiotocography-10clases +cardiotocography-3clases +cylinder-bands +dermatology +ilpd-indian-liver +ionosphere +led-display +mammographic +oocytes_merluccius_nucleus_4d +oocytes_merluccius_states_2f +oocytes_trisopterus_nucleus_2f +oocytes_trisopterus_states_5b +pima +statlog-australian-credit +statlog-german-credit +statlog-image +statlog-vehicle +tic-tac-toe +zoo \ No newline at end of file diff --git a/datasets.txt b/datasets.txt index 8be5fc4..54dcfb9 100644 --- a/datasets.txt +++ b/datasets.txt @@ -1,24 +1,51 @@ -(#) Dataset Samples Feat. #Cl. y typ X type f_type -=== ========= ======== ===== ==== ===== ======= ====== -#01 breast 683 9 2 int16 int16 csv -#02 cardiotoc 2,126 41 3 int16 int16 csv -#03 cod-rna 331,152 8 2 int16 float16 sparse -#04 connect4 67,557 126 3 int16 int16 sparse -#05 covtype 581,012 54 7 int16 int16 npz -#06 diabetes 768 8 2 int16 float16 csv -#07 dna 3,186 180 3 int16 float16 csv -#08 fourclass 862 2 2 int16 int16 sparse -#09 glass 214 9 6 int16 float16 csv -#10 heart 270 13 2 int16 float16 csv -#11 ijcnn1 141,691 22 2 int16 float16 sparse -#12 iris 150 4 3 int16 float16 csv -#13 letter 20,000 16 26 int16 int16 npz -#14 mnist 70,000 784 10 int16 int16 npy -#15 pendigits 10,992 16 10 int16 int16 npy -#16 protein 24,387 357 3 int16 float16 sparse -#17 satimage 6,435 36 6 int16 int16 npy -#18 segment 2,310 19 7 int16 float16 sparse -#19 shuttle 58,000 9 7 int16 int16 npy -#20 usps 9,298 256 10 int16 float16 npz -#21 vehicle 846 18 4 int16 float16 sparse -#22 wine 178 13 3 int16 float16 csv \ No newline at end of file +(#) Dataset Samples Feat. #Cl. y typ X type f_type +=== ============================= ======== ===== ==== ===== ======= ====== +#01 balance-scale 625 4 3 int64 float64 Rdat +#02 balloons 16 4 2 int64 float64 Rdat +#03 breast-cancer-wisc-diag 569 30 2 int64 float64 Rdat +#04 breast-cancer-wisc-prog 198 33 2 int64 float64 Rdat +#05 breast-cancer-wisc 699 9 2 int64 float64 Rdat +#06 breast-cancer 286 9 2 int64 float64 Rdat +#07 cardiotocography-10clases 2,126 21 10 int64 float64 Rdat +#08 cardiotocography-3clases 2,126 21 3 int64 float64 Rdat +#09 conn-bench-sonar-mines-rocks 208 60 2 int64 float64 Rdat +#10 cylinder-bands 512 35 2 int64 float64 Rdat +#11 dermatology 366 34 6 int64 float64 Rdat +#12 echocardiogram 131 10 2 int64 float64 Rdat +#13 fertility 100 9 2 int64 float64 Rdat +#14 haberman-survival 306 3 2 int64 float64 Rdat +#15 heart-hungarian 294 12 2 int64 float64 Rdat +#16 hepatitis 155 19 2 int64 float64 Rdat +#17 ilpd-indian-liver 583 9 2 int64 float64 Rdat +#18 ionosphere 351 33 2 int64 float64 Rdat +#19 iris 150 4 3 int64 float64 Rdat +#20 led-display 1,000 7 10 int64 float64 Rdat +#21 libras 360 90 15 int64 float64 Rdat +#22 low-res-spect 531 100 9 int64 float64 Rdat +#23 lymphography 148 18 4 int64 float64 Rdat +#24 mammographic 961 5 2 int64 float64 Rdat +#25 molec-biol-promoter 106 57 2 int64 float64 Rdat +#26 musk-1 476 166 2 int64 float64 Rdat +#27 oocytes_merluccius_nucleus_4d 1,022 41 2 int64 float64 Rdat +#28 oocytes_merluccius_states_2f 1,022 25 3 int64 float64 Rdat +#29 oocytes_trisopterus_nucleus_2f 912 25 2 int64 float64 Rdat +#30 oocytes_trisopterus_states_5b 912 32 3 int64 float64 Rdat +#31 parkinsons 195 22 2 int64 float64 Rdat +#32 pima 768 8 2 int64 float64 Rdat +#33 pittsburg-bridges-MATERIAL 106 7 3 int64 float64 Rdat +#34 pittsburg-bridges-REL-L 103 7 3 int64 float64 Rdat +#35 pittsburg-bridges-SPAN 92 7 3 int64 float64 Rdat +#36 pittsburg-bridges-T-OR-D 102 7 2 int64 float64 Rdat +#37 planning 182 12 2 int64 float64 Rdat +#38 post-operative 90 8 3 int64 float64 Rdat +#39 seeds 210 7 3 int64 float64 Rdat +#40 statlog-australian-credit 690 14 2 int64 float64 Rdat +#41 statlog-german-credit 1,000 24 2 int64 float64 Rdat +#42 statlog-heart 270 13 2 int64 float64 Rdat +#43 statlog-image 2,310 18 7 int64 float64 Rdat +#44 statlog-vehicle 846 18 4 int64 float64 Rdat +#45 synthetic-control 600 60 6 int64 float64 Rdat +#46 tic-tac-toe 958 9 2 int64 float64 Rdat +#47 vertebral-column-2clases 310 6 2 int64 float64 Rdat +#48 wine 178 13 3 int64 float64 Rdat +#49 zoo 101 16 7 int64 float64 Rdat \ No newline at end of file diff --git a/experimentation/.myconfig.dist b/experimentation/.myconfig.dist new file mode 100644 index 0000000..6246c89 --- /dev/null +++ b/experimentation/.myconfig.dist @@ -0,0 +1,5 @@ +host= +port=3306 +user=stree +password= +database=stree_experiments \ No newline at end of file diff --git a/hardCore.txt b/hardCore.txt new file mode 100644 index 0000000..68cc89d --- /dev/null +++ b/hardCore.txt @@ -0,0 +1,6 @@ +molec-biol-promoter +musk-1 +conn-bench-sonar-mines-rocks +libras +low-res-spect +synthetic-control \ No newline at end of file diff --git a/kite_tutorial.ipynb b/kite_tutorial.ipynb deleted file mode 100644 index ddc3764..0000000 --- a/kite_tutorial.ipynb +++ /dev/null @@ -1,182 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![Logo](https://kite.com/kite-public/kite-plus-jlab-scaled.png)\n", - "\n", - "### Welcome to Kite's JupyterLab extension tutorial\n", - "\n", - "Kite gives you **ML-powered autocompletions** and **rich documentation** inside JupyterLab. This guide will teach you everything you need to know about Kite in 5 minutes or less.\n", - "\n", - "> 💡 _**Tip:** You can open this file at any time with the command `Kite: Open Tutorial` in JupyterLab's command palette._\n", - "\n", - "#### Before we start...\n", - "\n", - "Make sure that the Kite icon at the bottom of the window reads `Kite: ready`.\n", - "\n", - "![Kite icon](https://kite.com/kite-public/kite-status.png)\n", - "\n", - "* If it says `Kite: not running`, please start the Kite Engine first.\n", - "* If it says `Kite: not installed`, please [download and install Kite](https://kite.com/download) first." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Part 1: Autocompletions\n", - "\n", - "**Step 1a**
\n", - "Run the code cell below with all the necessary imports 👇" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Run me!\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Step 1b**
\n", - "Let's try typing out some code to plot a sine graph. As you type, Kite will automatically show you completions for what you're going to type next.\n", - "\n", - "![Autocompletions](https://www.kite.com/kite-public/kite-jlab-autocompletions.gif)\n", - "\n", - "> 💡 _**Tip:** You can turn completions docs on and off in JupyterLab's command palette with the command `Kite: Toggle Docs Panel`._\n", - "\n", - "> 💡 _**Tip:** The starred completions ★ are from Kite Pro. You can [start your free Kite Pro trial](https://www.kite.com/pro/trial/) anytime. Afterwards, if you choose not to upgrade, you can still use Kite 100% for free._\n", - "\n", - "Try typing out the code yourself to see Kite's autocompletions in action.
\n", - "\n", - "```python\n", - "x = np.linspace(-np.pi, np.pi, 50)\n", - "y = np.sin(x)\n", - "plt.plot(x, y)\n", - "```\n", - "\n", - "Type this code in the cell below 👇" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Put code in me\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Part 2: Manual completions\n", - "\n", - "You can still use JupyterLab's builtin kernel completions. These are particularly useful when you need to access a `DataFrame`'s column names.\n", - "\n", - "**Step 2a**
\n", - "First, run the code cell below to get some sample data to store in a `DataFrame` 👇" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Run me!\n", - "url = 'https://kite.com/kite-public/iris.csv'\n", - "df = pd.read_csv(url)\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Step 2b**
\n", - "Let's plot a scatter graph of sepal length vs. sepal width. When you are accessing a `DataFrame`'s columns, you'll still need to hit `tab` to request completions from the kernel.\n", - "\n", - "![Manual completions](https://www.kite.com/kite-public/kite-jlab-manual-completions.gif)\n", - "\n", - "Try requesting kernel completions yourself.\n", - "\n", - "```python\n", - "plt.scatter(df['sepal_length'], df['sepal_width'])\n", - "```\n", - "\n", - "Type this code in the cell below, making sure to hit `tab` when you are filling in the column names 👇" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Put code in me\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Part 3: Copilot Documentation\n", - "\n", - "If you've enabled \"docs following cursor\" in the Copilot, the Copilot will automatically update with the documentation of the identifier underneath your cursor.\n", - "\n", - "![Autosearch](https://www.kite.com/kite-public/kite-jlab-autosearch.gif)\n", - "\n", - "**Step 3a**
\n", - "Try it yourself! Just click around in the code cells of this notebook and see the Copilot update automatically." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### The End\n", - "\n", - "Now you know everything you need to know about Kite's JupyterLab plugin. Kite is under active development and we expect to ship improvements and more features in the near future.\n", - "\n", - "In the meantime, if you experience bugs or have feature requests, feel free to open an issue in our [public GitHub repo](https://github.com/kiteco/issue-tracker).\n", - "\n", - "Happy coding!" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.2" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/scripts/.gitignore b/scripts/.gitignore new file mode 100644 index 0000000..f68c005 --- /dev/null +++ b/scripts/.gitignore @@ -0,0 +1,3 @@ +gridsearch/* +gridbest/* +cross/* \ No newline at end of file diff --git a/scripts/experiment.template b/scripts/experiment.template new file mode 100644 index 0000000..a8a4311 --- /dev/null +++ b/scripts/experiment.template @@ -0,0 +1,16 @@ +#!/bin/bash +### Nombre de trabajo +#PBS -N --- +### Tiempo máximo de ejecución del trabajo +#PBS -l walltime=96:00:00 +### Seleccion de cola de trabajos +#PBS -q workq +### mezcla errores con la salida principal +#PBS -j oe +### Recursos +#PBS -l select=2:ncpus=2:mem=16Gb +### Esportar variables de entorno +#PBS -V +### Ejecutable con sus parametros +cd +python experiment.py -H galgo -e -m -d -S tanveer -k -n 1 \ No newline at end of file diff --git a/scripts/genall.sh b/scripts/genall.sh new file mode 100755 index 0000000..a687db8 --- /dev/null +++ b/scripts/genall.sh @@ -0,0 +1,9 @@ +#!/bin/bash +for i in gridsearch gridbest cross; do + echo "*** Building $i experiments" + for j in stree odte bagging adaBoost; do + for k in linear poly rbf; do + ./genjobs.sh $i $j $k + done + done +done \ No newline at end of file diff --git a/scripts/genjobs.sh b/scripts/genjobs.sh new file mode 100755 index 0000000..25782ea --- /dev/null +++ b/scripts/genjobs.sh @@ -0,0 +1,32 @@ +#!/bin/bash +if [ "$1" = "" -o "$2" = "" -o "$3" = "" ] ; then + echo "Hay que seleccionar:" + echo " - el tipo de experimento {gridsearch, gridbest, cross}" + echo " - el modelo {stree, adaBoost, bagging, odte}" + echo " - el kernel {linear, poly, rbf, any}" + exit 1 +fi +if [[ ! "gridsearchgridbestcross" == *$1* ]] ; then + echo "Hay que seleccionar el tipo de experimento {gridsearch, gridbest, cross}" + exit 1 +fi +if [[ ! "streeadaBoostbaggingodte" == *$2* ]] ; then + echo "Hay que seleccionar el modelo {stree, adaBoost, bagging, odte}" + exit 1 +fi +if [[ ! "linearpolyrbfany" == *$3* ]] ; then + echo "Hay que seleccionar el kernel {linear, poly, rbf, any}" + exit 1 +fi +script_path="$(pwd)/.." +cp experiment.template experiment_$1.sh +perl -i -pe"s//$2/g" experiment_$1.sh +perl -i -pe"s~~$script_path~g" experiment_$1.sh +perl -i -pe"s//$1/g" experiment_$1.sh +mkdir -p $1/$2/$3 +cat ../datasets.txt|cut -d " " -f 2|tail -49|while read a; do + cp experiment_$1.sh $1/$2/$3/experiment_$a.sh + perl -i -pe"s//$a/g" $1/$2/$3/experiment_$a.sh + perl -i -pe"s//$3/g" $1/$2/$3/experiment_$a.sh +done +rm experiment_$1.sh \ No newline at end of file diff --git a/scripts/interactive.sh b/scripts/interactive.sh new file mode 100755 index 0000000..568fc25 --- /dev/null +++ b/scripts/interactive.sh @@ -0,0 +1,2 @@ +#!/bin/bash +qsub -I -l select=2:ncpus=8:mem=16Gb \ No newline at end of file diff --git a/scripts/launchsome.sh b/scripts/launchsome.sh new file mode 100755 index 0000000..9bf893b --- /dev/null +++ b/scripts/launchsome.sh @@ -0,0 +1,40 @@ +#!/bin/bash +if [ "$1" = "" -o "$2" = "" -o "$3" = "" -o "$4" = "" ] ; then + echo "Hay que seleccionar:" + echo " - el tipo de experimento {gridsearch, gridbest, cross}" + echo " - el modelo {stree, adaBoost, bagging, odte}" + echo " - el kernel {linear, poly, rbf, any}" + echo " - el archivo con nombres de datasets" + echo "opcionalmente al final: dry-run" + exit 1 +fi +if [[ ! "gridsearchgridbestcross" == *$1* ]] ; then + echo "Hay que seleccionar el tipo de experimento {gridsearch, gridbest, cross}" + exit 1 +fi +if [[ ! "streeadaBoostbaggingodte" == *$2* ]] ; then + echo "Hay que seleccionar el modelo {stree, adaBoost, bagging, odte}" + exit 1 +fi +if [[ ! "linearpolyrbfany" == *$3* ]] ; then + echo "Hay que seleccionar el kernel {linear, poly, rbf, any}" + exit 1 +fi +script_path="$(pwd)" +cd $1/$2/$3 +counter=0 +lines="$(cat $script_path/$4|cut -d " " -f 2|tail -49)" +for a in $lines; do + echo "launch experiment_$a.sh" + if [ "$5" = "dry-run" ] ; then + echo "not launched" + else + qsub experiment_$a.sh + fi + let counter++ +done +if [ "$5" = "dry-run" ] ; then + echo "Not launched $counter jobs" +else + echo "Launched $counter jobs" +fi \ No newline at end of file diff --git a/scripts/notebook.sh b/scripts/notebook.sh new file mode 100755 index 0000000..65c658d --- /dev/null +++ b/scripts/notebook.sh @@ -0,0 +1,4 @@ +#!/bin/bash +NOTEBOOKPORT=1234 +ssh -N -f -R $NOTEBOOKPORT:localhost:$NOTEBOOKPORT Ricardo.Montanana@galgo.uclm.es +jupyter lab --port=$NOTEBOOKPORT --no-browser \ No newline at end of file diff --git a/scripts/rmscripts.sh b/scripts/rmscripts.sh new file mode 100755 index 0000000..76ae1d3 --- /dev/null +++ b/scripts/rmscripts.sh @@ -0,0 +1,4 @@ +#!/bin/bash +for folder in gridsearch gridbest cross; do + find $folder -type f -exec rm {} \; +done \ No newline at end of file diff --git a/test_arff.py b/test_arff.py deleted file mode 100644 index eab873c..0000000 --- a/test_arff.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -import time -import numpy as np -import pandas as pd -from scipy.io import arff -from stree import Stree -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import train_test_split - -folder = ( - "/Volumes/Datos/OneDrive - Universidad de Castilla-La Mancha/" - "Doctorado2019/Compartida/FuentesDescargados/data-4/" -) - -name = "yeast" -random_state = 1 - -file_name = os.path.join(folder, name, f"{name}.arff") -data, meta = arff.loadarff(file_name) -df = pd.DataFrame(data) -y = df["clase"].to_numpy().astype(np.int16) -df.drop(columns="clase", inplace=True) -X = df.to_numpy().astype(np.float16) -print(f"Xshape {X.shape} Xtype {X.dtype}") -print(f"yshape {y.shape} ytype {y.dtype}") - -X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.3, random_state=random_state -) - -clf = Stree( - random_state=random_state, - C=1e5, - max_iter=1e5, - kernel="poly", - degree=5, - gamma=0.8, -) -now = time.time() -scores = cross_val_score(clf, X, y, cv=5) -print(f"Accuracy for {name}: {scores.mean():.2f} (+/- {scores.std() * 2:.2f})") -print(f"Took : {time.time() - now:.2f} seconds") -print(f"Score one tree all samples .: {clf.fit(X, y).score(X, y):.4f}") -print( - f"Score one tree train/test .: " - f"{clf.fit(X_train, y_train).score(X_test, y_test):.4f}" -) -print("*" * 80)