Add server jobs control scripts

This commit is contained in:
2020-11-29 23:21:46 +01:00
parent 2decec05fb
commit e8b04f41e4
14 changed files with 194 additions and 254 deletions

22
core.txt Normal file
View File

@@ -0,0 +1,22 @@
breast-cancer-wisc-diag
balance-scale
breast-cancer-wisc-prog
cardiotocography-10clases
cardiotocography-3clases
cylinder-bands
dermatology
ilpd-indian-liver
ionosphere
led-display
mammographic
oocytes_merluccius_nucleus_4d
oocytes_merluccius_states_2f
oocytes_trisopterus_nucleus_2f
oocytes_trisopterus_states_5b
pima
statlog-australian-credit
statlog-german-credit
statlog-image
statlog-vehicle
tic-tac-toe
zoo

View File

@@ -1,24 +1,51 @@
(#) Dataset Samples Feat. #Cl. y typ X type f_type
=== ========= ======== ===== ==== ===== ======= ======
#01 breast 683 9 2 int16 int16 csv
#02 cardiotoc 2,126 41 3 int16 int16 csv
#03 cod-rna 331,152 8 2 int16 float16 sparse
#04 connect4 67,557 126 3 int16 int16 sparse
#05 covtype 581,012 54 7 int16 int16 npz
#06 diabetes 768 8 2 int16 float16 csv
#07 dna 3,186 180 3 int16 float16 csv
#08 fourclass 862 2 2 int16 int16 sparse
#09 glass 214 9 6 int16 float16 csv
#10 heart 270 13 2 int16 float16 csv
#11 ijcnn1 141,691 22 2 int16 float16 sparse
#12 iris 150 4 3 int16 float16 csv
#13 letter 20,000 16 26 int16 int16 npz
#14 mnist 70,000 784 10 int16 int16 npy
#15 pendigits 10,992 16 10 int16 int16 npy
#16 protein 24,387 357 3 int16 float16 sparse
#17 satimage 6,435 36 6 int16 int16 npy
#18 segment 2,310 19 7 int16 float16 sparse
#19 shuttle 58,000 9 7 int16 int16 npy
#20 usps 9,298 256 10 int16 float16 npz
#21 vehicle 846 18 4 int16 float16 sparse
#22 wine 178 13 3 int16 float16 csv
(#) Dataset Samples Feat. #Cl. y typ X type f_type
=== ============================= ======== ===== ==== ===== ======= ======
#01 balance-scale 625 4 3 int64 float64 Rdat
#02 balloons 16 4 2 int64 float64 Rdat
#03 breast-cancer-wisc-diag 569 30 2 int64 float64 Rdat
#04 breast-cancer-wisc-prog 198 33 2 int64 float64 Rdat
#05 breast-cancer-wisc 699 9 2 int64 float64 Rdat
#06 breast-cancer 286 9 2 int64 float64 Rdat
#07 cardiotocography-10clases 2,126 21 10 int64 float64 Rdat
#08 cardiotocography-3clases 2,126 21 3 int64 float64 Rdat
#09 conn-bench-sonar-mines-rocks 208 60 2 int64 float64 Rdat
#10 cylinder-bands 512 35 2 int64 float64 Rdat
#11 dermatology 366 34 6 int64 float64 Rdat
#12 echocardiogram 131 10 2 int64 float64 Rdat
#13 fertility 100 9 2 int64 float64 Rdat
#14 haberman-survival 306 3 2 int64 float64 Rdat
#15 heart-hungarian 294 12 2 int64 float64 Rdat
#16 hepatitis 155 19 2 int64 float64 Rdat
#17 ilpd-indian-liver 583 9 2 int64 float64 Rdat
#18 ionosphere 351 33 2 int64 float64 Rdat
#19 iris 150 4 3 int64 float64 Rdat
#20 led-display 1,000 7 10 int64 float64 Rdat
#21 libras 360 90 15 int64 float64 Rdat
#22 low-res-spect 531 100 9 int64 float64 Rdat
#23 lymphography 148 18 4 int64 float64 Rdat
#24 mammographic 961 5 2 int64 float64 Rdat
#25 molec-biol-promoter 106 57 2 int64 float64 Rdat
#26 musk-1 476 166 2 int64 float64 Rdat
#27 oocytes_merluccius_nucleus_4d 1,022 41 2 int64 float64 Rdat
#28 oocytes_merluccius_states_2f 1,022 25 3 int64 float64 Rdat
#29 oocytes_trisopterus_nucleus_2f 912 25 2 int64 float64 Rdat
#30 oocytes_trisopterus_states_5b 912 32 3 int64 float64 Rdat
#31 parkinsons 195 22 2 int64 float64 Rdat
#32 pima 768 8 2 int64 float64 Rdat
#33 pittsburg-bridges-MATERIAL 106 7 3 int64 float64 Rdat
#34 pittsburg-bridges-REL-L 103 7 3 int64 float64 Rdat
#35 pittsburg-bridges-SPAN 92 7 3 int64 float64 Rdat
#36 pittsburg-bridges-T-OR-D 102 7 2 int64 float64 Rdat
#37 planning 182 12 2 int64 float64 Rdat
#38 post-operative 90 8 3 int64 float64 Rdat
#39 seeds 210 7 3 int64 float64 Rdat
#40 statlog-australian-credit 690 14 2 int64 float64 Rdat
#41 statlog-german-credit 1,000 24 2 int64 float64 Rdat
#42 statlog-heart 270 13 2 int64 float64 Rdat
#43 statlog-image 2,310 18 7 int64 float64 Rdat
#44 statlog-vehicle 846 18 4 int64 float64 Rdat
#45 synthetic-control 600 60 6 int64 float64 Rdat
#46 tic-tac-toe 958 9 2 int64 float64 Rdat
#47 vertebral-column-2clases 310 6 2 int64 float64 Rdat
#48 wine 178 13 3 int64 float64 Rdat
#49 zoo 101 16 7 int64 float64 Rdat

View File

@@ -0,0 +1,5 @@
host=<server>
port=3306
user=stree
password=<password>
database=stree_experiments

6
hardCore.txt Normal file
View File

@@ -0,0 +1,6 @@
molec-biol-promoter
musk-1
conn-bench-sonar-mines-rocks
libras
low-res-spect
synthetic-control

View File

@@ -1,182 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Logo](https://kite.com/kite-public/kite-plus-jlab-scaled.png)\n",
"\n",
"### Welcome to Kite's JupyterLab extension tutorial\n",
"\n",
"Kite gives you **ML-powered autocompletions** and **rich documentation** inside JupyterLab. This guide will teach you everything you need to know about Kite in 5 minutes or less.\n",
"\n",
"> 💡 _**Tip:** You can open this file at any time with the command `Kite: Open Tutorial` in JupyterLab's command palette._\n",
"\n",
"#### Before we start...\n",
"\n",
"Make sure that the Kite icon at the bottom of the window reads `Kite: ready`.\n",
"\n",
"![Kite icon](https://kite.com/kite-public/kite-status.png)\n",
"\n",
"* If it says `Kite: not running`, please start the Kite Engine first.\n",
"* If it says `Kite: not installed`, please [download and install Kite](https://kite.com/download) first."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Part 1: Autocompletions\n",
"\n",
"**Step 1a**<br/>\n",
"Run the code cell below with all the necessary imports 👇"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Run me!\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Step 1b**<br/>\n",
"Let's try typing out some code to plot a sine graph. As you type, Kite will automatically show you completions for what you're going to type next.\n",
"\n",
"![Autocompletions](https://www.kite.com/kite-public/kite-jlab-autocompletions.gif)\n",
"\n",
"> 💡 _**Tip:** You can turn completions docs on and off in JupyterLab's command palette with the command `Kite: Toggle Docs Panel`._\n",
"\n",
"> 💡 _**Tip:** The starred completions ★ are from Kite Pro. You can [start your free Kite Pro trial](https://www.kite.com/pro/trial/) anytime. Afterwards, if you choose not to upgrade, you can still use Kite 100% for free._\n",
"\n",
"Try typing out the code yourself to see Kite's autocompletions in action.<br/>\n",
"\n",
"```python\n",
"x = np.linspace(-np.pi, np.pi, 50)\n",
"y = np.sin(x)\n",
"plt.plot(x, y)\n",
"```\n",
"\n",
"Type this code in the cell below 👇"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Put code in me\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Part 2: Manual completions\n",
"\n",
"You can still use JupyterLab's builtin kernel completions. These are particularly useful when you need to access a `DataFrame`'s column names.\n",
"\n",
"**Step 2a**<br/>\n",
"First, run the code cell below to get some sample data to store in a `DataFrame` 👇"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Run me!\n",
"url = 'https://kite.com/kite-public/iris.csv'\n",
"df = pd.read_csv(url)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Step 2b**<br/>\n",
"Let's plot a scatter graph of sepal length vs. sepal width. When you are accessing a `DataFrame`'s columns, you'll still need to hit `tab` to request completions from the kernel.\n",
"\n",
"![Manual completions](https://www.kite.com/kite-public/kite-jlab-manual-completions.gif)\n",
"\n",
"Try requesting kernel completions yourself.\n",
"\n",
"```python\n",
"plt.scatter(df['sepal_length'], df['sepal_width'])\n",
"```\n",
"\n",
"Type this code in the cell below, making sure to hit `tab` when you are filling in the column names 👇"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Put code in me\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Part 3: Copilot Documentation\n",
"\n",
"If you've enabled \"docs following cursor\" in the Copilot, the Copilot will automatically update with the documentation of the identifier underneath your cursor.\n",
"\n",
"![Autosearch](https://www.kite.com/kite-public/kite-jlab-autosearch.gif)\n",
"\n",
"**Step 3a**<br/>\n",
"Try it yourself! Just click around in the code cells of this notebook and see the Copilot update automatically."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### The End\n",
"\n",
"Now you know everything you need to know about Kite's JupyterLab plugin. Kite is under active development and we expect to ship improvements and more features in the near future.\n",
"\n",
"In the meantime, if you experience bugs or have feature requests, feel free to open an issue in our [public GitHub repo](https://github.com/kiteco/issue-tracker).\n",
"\n",
"Happy coding!"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

3
scripts/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
gridsearch/*
gridbest/*
cross/*

View File

@@ -0,0 +1,16 @@
#!/bin/bash
### Nombre de trabajo
#PBS -N <experiment>-<data>-<model>-<kernel>
### Tiempo máximo de ejecución del trabajo
#PBS -l walltime=96:00:00
### Seleccion de cola de trabajos
#PBS -q workq
### mezcla errores con la salida principal
#PBS -j oe
### Recursos
#PBS -l select=2:ncpus=2:mem=16Gb
### Esportar variables de entorno
#PBS -V
### Ejecutable con sus parametros
cd <folder>
python experiment.py -H galgo -e <experiment> -m <model> -d <data> -S tanveer -k <kernel> -n 1

9
scripts/genall.sh Executable file
View File

@@ -0,0 +1,9 @@
#!/bin/bash
for i in gridsearch gridbest cross; do
echo "*** Building $i experiments"
for j in stree odte bagging adaBoost; do
for k in linear poly rbf; do
./genjobs.sh $i $j $k
done
done
done

32
scripts/genjobs.sh Executable file
View File

@@ -0,0 +1,32 @@
#!/bin/bash
if [ "$1" = "" -o "$2" = "" -o "$3" = "" ] ; then
echo "Hay que seleccionar:"
echo " - el tipo de experimento {gridsearch, gridbest, cross}"
echo " - el modelo {stree, adaBoost, bagging, odte}"
echo " - el kernel {linear, poly, rbf, any}"
exit 1
fi
if [[ ! "gridsearchgridbestcross" == *$1* ]] ; then
echo "Hay que seleccionar el tipo de experimento {gridsearch, gridbest, cross}"
exit 1
fi
if [[ ! "streeadaBoostbaggingodte" == *$2* ]] ; then
echo "Hay que seleccionar el modelo {stree, adaBoost, bagging, odte}"
exit 1
fi
if [[ ! "linearpolyrbfany" == *$3* ]] ; then
echo "Hay que seleccionar el kernel {linear, poly, rbf, any}"
exit 1
fi
script_path="$(pwd)/.."
cp experiment.template experiment_$1.sh
perl -i -pe"s/<model>/$2/g" experiment_$1.sh
perl -i -pe"s~<folder>~$script_path~g" experiment_$1.sh
perl -i -pe"s/<experiment>/$1/g" experiment_$1.sh
mkdir -p $1/$2/$3
cat ../datasets.txt|cut -d " " -f 2|tail -49|while read a; do
cp experiment_$1.sh $1/$2/$3/experiment_$a.sh
perl -i -pe"s/<data>/$a/g" $1/$2/$3/experiment_$a.sh
perl -i -pe"s/<kernel>/$3/g" $1/$2/$3/experiment_$a.sh
done
rm experiment_$1.sh

2
scripts/interactive.sh Executable file
View File

@@ -0,0 +1,2 @@
#!/bin/bash
qsub -I -l select=2:ncpus=8:mem=16Gb

40
scripts/launchsome.sh Executable file
View File

@@ -0,0 +1,40 @@
#!/bin/bash
if [ "$1" = "" -o "$2" = "" -o "$3" = "" -o "$4" = "" ] ; then
echo "Hay que seleccionar:"
echo " - el tipo de experimento {gridsearch, gridbest, cross}"
echo " - el modelo {stree, adaBoost, bagging, odte}"
echo " - el kernel {linear, poly, rbf, any}"
echo " - el archivo con nombres de datasets"
echo "opcionalmente al final: dry-run"
exit 1
fi
if [[ ! "gridsearchgridbestcross" == *$1* ]] ; then
echo "Hay que seleccionar el tipo de experimento {gridsearch, gridbest, cross}"
exit 1
fi
if [[ ! "streeadaBoostbaggingodte" == *$2* ]] ; then
echo "Hay que seleccionar el modelo {stree, adaBoost, bagging, odte}"
exit 1
fi
if [[ ! "linearpolyrbfany" == *$3* ]] ; then
echo "Hay que seleccionar el kernel {linear, poly, rbf, any}"
exit 1
fi
script_path="$(pwd)"
cd $1/$2/$3
counter=0
lines="$(cat $script_path/$4|cut -d " " -f 2|tail -49)"
for a in $lines; do
echo "launch experiment_$a.sh"
if [ "$5" = "dry-run" ] ; then
echo "not launched"
else
qsub experiment_$a.sh
fi
let counter++
done
if [ "$5" = "dry-run" ] ; then
echo "Not launched $counter jobs"
else
echo "Launched $counter jobs"
fi

4
scripts/notebook.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
NOTEBOOKPORT=1234
ssh -N -f -R $NOTEBOOKPORT:localhost:$NOTEBOOKPORT Ricardo.Montanana@galgo.uclm.es
jupyter lab --port=$NOTEBOOKPORT --no-browser

4
scripts/rmscripts.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
for folder in gridsearch gridbest cross; do
find $folder -type f -exec rm {} \;
done

View File

@@ -1,48 +0,0 @@
import os
import time
import numpy as np
import pandas as pd
from scipy.io import arff
from stree import Stree
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
folder = (
"/Volumes/Datos/OneDrive - Universidad de Castilla-La Mancha/"
"Doctorado2019/Compartida/FuentesDescargados/data-4/"
)
name = "yeast"
random_state = 1
file_name = os.path.join(folder, name, f"{name}.arff")
data, meta = arff.loadarff(file_name)
df = pd.DataFrame(data)
y = df["clase"].to_numpy().astype(np.int16)
df.drop(columns="clase", inplace=True)
X = df.to_numpy().astype(np.float16)
print(f"Xshape {X.shape} Xtype {X.dtype}")
print(f"yshape {y.shape} ytype {y.dtype}")
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=random_state
)
clf = Stree(
random_state=random_state,
C=1e5,
max_iter=1e5,
kernel="poly",
degree=5,
gamma=0.8,
)
now = time.time()
scores = cross_val_score(clf, X, y, cv=5)
print(f"Accuracy for {name}: {scores.mean():.2f} (+/- {scores.std() * 2:.2f})")
print(f"Took : {time.time() - now:.2f} seconds")
print(f"Score one tree all samples .: {clf.fit(X, y).score(X, y):.4f}")
print(
f"Score one tree train/test .: "
f"{clf.fit(X_train, y_train).score(X_test, y_test):.4f}"
)
print("*" * 80)