mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-15 07:26:02 +00:00
Add server jobs control scripts
This commit is contained in:
22
core.txt
Normal file
22
core.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
breast-cancer-wisc-diag
|
||||
balance-scale
|
||||
breast-cancer-wisc-prog
|
||||
cardiotocography-10clases
|
||||
cardiotocography-3clases
|
||||
cylinder-bands
|
||||
dermatology
|
||||
ilpd-indian-liver
|
||||
ionosphere
|
||||
led-display
|
||||
mammographic
|
||||
oocytes_merluccius_nucleus_4d
|
||||
oocytes_merluccius_states_2f
|
||||
oocytes_trisopterus_nucleus_2f
|
||||
oocytes_trisopterus_states_5b
|
||||
pima
|
||||
statlog-australian-credit
|
||||
statlog-german-credit
|
||||
statlog-image
|
||||
statlog-vehicle
|
||||
tic-tac-toe
|
||||
zoo
|
75
datasets.txt
75
datasets.txt
@@ -1,24 +1,51 @@
|
||||
(#) Dataset Samples Feat. #Cl. y typ X type f_type
|
||||
=== ========= ======== ===== ==== ===== ======= ======
|
||||
#01 breast 683 9 2 int16 int16 csv
|
||||
#02 cardiotoc 2,126 41 3 int16 int16 csv
|
||||
#03 cod-rna 331,152 8 2 int16 float16 sparse
|
||||
#04 connect4 67,557 126 3 int16 int16 sparse
|
||||
#05 covtype 581,012 54 7 int16 int16 npz
|
||||
#06 diabetes 768 8 2 int16 float16 csv
|
||||
#07 dna 3,186 180 3 int16 float16 csv
|
||||
#08 fourclass 862 2 2 int16 int16 sparse
|
||||
#09 glass 214 9 6 int16 float16 csv
|
||||
#10 heart 270 13 2 int16 float16 csv
|
||||
#11 ijcnn1 141,691 22 2 int16 float16 sparse
|
||||
#12 iris 150 4 3 int16 float16 csv
|
||||
#13 letter 20,000 16 26 int16 int16 npz
|
||||
#14 mnist 70,000 784 10 int16 int16 npy
|
||||
#15 pendigits 10,992 16 10 int16 int16 npy
|
||||
#16 protein 24,387 357 3 int16 float16 sparse
|
||||
#17 satimage 6,435 36 6 int16 int16 npy
|
||||
#18 segment 2,310 19 7 int16 float16 sparse
|
||||
#19 shuttle 58,000 9 7 int16 int16 npy
|
||||
#20 usps 9,298 256 10 int16 float16 npz
|
||||
#21 vehicle 846 18 4 int16 float16 sparse
|
||||
#22 wine 178 13 3 int16 float16 csv
|
||||
(#) Dataset Samples Feat. #Cl. y typ X type f_type
|
||||
=== ============================= ======== ===== ==== ===== ======= ======
|
||||
#01 balance-scale 625 4 3 int64 float64 Rdat
|
||||
#02 balloons 16 4 2 int64 float64 Rdat
|
||||
#03 breast-cancer-wisc-diag 569 30 2 int64 float64 Rdat
|
||||
#04 breast-cancer-wisc-prog 198 33 2 int64 float64 Rdat
|
||||
#05 breast-cancer-wisc 699 9 2 int64 float64 Rdat
|
||||
#06 breast-cancer 286 9 2 int64 float64 Rdat
|
||||
#07 cardiotocography-10clases 2,126 21 10 int64 float64 Rdat
|
||||
#08 cardiotocography-3clases 2,126 21 3 int64 float64 Rdat
|
||||
#09 conn-bench-sonar-mines-rocks 208 60 2 int64 float64 Rdat
|
||||
#10 cylinder-bands 512 35 2 int64 float64 Rdat
|
||||
#11 dermatology 366 34 6 int64 float64 Rdat
|
||||
#12 echocardiogram 131 10 2 int64 float64 Rdat
|
||||
#13 fertility 100 9 2 int64 float64 Rdat
|
||||
#14 haberman-survival 306 3 2 int64 float64 Rdat
|
||||
#15 heart-hungarian 294 12 2 int64 float64 Rdat
|
||||
#16 hepatitis 155 19 2 int64 float64 Rdat
|
||||
#17 ilpd-indian-liver 583 9 2 int64 float64 Rdat
|
||||
#18 ionosphere 351 33 2 int64 float64 Rdat
|
||||
#19 iris 150 4 3 int64 float64 Rdat
|
||||
#20 led-display 1,000 7 10 int64 float64 Rdat
|
||||
#21 libras 360 90 15 int64 float64 Rdat
|
||||
#22 low-res-spect 531 100 9 int64 float64 Rdat
|
||||
#23 lymphography 148 18 4 int64 float64 Rdat
|
||||
#24 mammographic 961 5 2 int64 float64 Rdat
|
||||
#25 molec-biol-promoter 106 57 2 int64 float64 Rdat
|
||||
#26 musk-1 476 166 2 int64 float64 Rdat
|
||||
#27 oocytes_merluccius_nucleus_4d 1,022 41 2 int64 float64 Rdat
|
||||
#28 oocytes_merluccius_states_2f 1,022 25 3 int64 float64 Rdat
|
||||
#29 oocytes_trisopterus_nucleus_2f 912 25 2 int64 float64 Rdat
|
||||
#30 oocytes_trisopterus_states_5b 912 32 3 int64 float64 Rdat
|
||||
#31 parkinsons 195 22 2 int64 float64 Rdat
|
||||
#32 pima 768 8 2 int64 float64 Rdat
|
||||
#33 pittsburg-bridges-MATERIAL 106 7 3 int64 float64 Rdat
|
||||
#34 pittsburg-bridges-REL-L 103 7 3 int64 float64 Rdat
|
||||
#35 pittsburg-bridges-SPAN 92 7 3 int64 float64 Rdat
|
||||
#36 pittsburg-bridges-T-OR-D 102 7 2 int64 float64 Rdat
|
||||
#37 planning 182 12 2 int64 float64 Rdat
|
||||
#38 post-operative 90 8 3 int64 float64 Rdat
|
||||
#39 seeds 210 7 3 int64 float64 Rdat
|
||||
#40 statlog-australian-credit 690 14 2 int64 float64 Rdat
|
||||
#41 statlog-german-credit 1,000 24 2 int64 float64 Rdat
|
||||
#42 statlog-heart 270 13 2 int64 float64 Rdat
|
||||
#43 statlog-image 2,310 18 7 int64 float64 Rdat
|
||||
#44 statlog-vehicle 846 18 4 int64 float64 Rdat
|
||||
#45 synthetic-control 600 60 6 int64 float64 Rdat
|
||||
#46 tic-tac-toe 958 9 2 int64 float64 Rdat
|
||||
#47 vertebral-column-2clases 310 6 2 int64 float64 Rdat
|
||||
#48 wine 178 13 3 int64 float64 Rdat
|
||||
#49 zoo 101 16 7 int64 float64 Rdat
|
5
experimentation/.myconfig.dist
Normal file
5
experimentation/.myconfig.dist
Normal file
@@ -0,0 +1,5 @@
|
||||
host=<server>
|
||||
port=3306
|
||||
user=stree
|
||||
password=<password>
|
||||
database=stree_experiments
|
6
hardCore.txt
Normal file
6
hardCore.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
molec-biol-promoter
|
||||
musk-1
|
||||
conn-bench-sonar-mines-rocks
|
||||
libras
|
||||
low-res-spect
|
||||
synthetic-control
|
@@ -1,182 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"### Welcome to Kite's JupyterLab extension tutorial\n",
|
||||
"\n",
|
||||
"Kite gives you **ML-powered autocompletions** and **rich documentation** inside JupyterLab. This guide will teach you everything you need to know about Kite in 5 minutes or less.\n",
|
||||
"\n",
|
||||
"> 💡 _**Tip:** You can open this file at any time with the command `Kite: Open Tutorial` in JupyterLab's command palette._\n",
|
||||
"\n",
|
||||
"#### Before we start...\n",
|
||||
"\n",
|
||||
"Make sure that the Kite icon at the bottom of the window reads `Kite: ready`.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"* If it says `Kite: not running`, please start the Kite Engine first.\n",
|
||||
"* If it says `Kite: not installed`, please [download and install Kite](https://kite.com/download) first."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Part 1: Autocompletions\n",
|
||||
"\n",
|
||||
"**Step 1a**<br/>\n",
|
||||
"Run the code cell below with all the necessary imports 👇"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Run me!\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Step 1b**<br/>\n",
|
||||
"Let's try typing out some code to plot a sine graph. As you type, Kite will automatically show you completions for what you're going to type next.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"> 💡 _**Tip:** You can turn completions docs on and off in JupyterLab's command palette with the command `Kite: Toggle Docs Panel`._\n",
|
||||
"\n",
|
||||
"> 💡 _**Tip:** The starred completions ★ are from Kite Pro. You can [start your free Kite Pro trial](https://www.kite.com/pro/trial/) anytime. Afterwards, if you choose not to upgrade, you can still use Kite 100% for free._\n",
|
||||
"\n",
|
||||
"Try typing out the code yourself to see Kite's autocompletions in action.<br/>\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"x = np.linspace(-np.pi, np.pi, 50)\n",
|
||||
"y = np.sin(x)\n",
|
||||
"plt.plot(x, y)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Type this code in the cell below 👇"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Put code in me\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Part 2: Manual completions\n",
|
||||
"\n",
|
||||
"You can still use JupyterLab's builtin kernel completions. These are particularly useful when you need to access a `DataFrame`'s column names.\n",
|
||||
"\n",
|
||||
"**Step 2a**<br/>\n",
|
||||
"First, run the code cell below to get some sample data to store in a `DataFrame` 👇"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Run me!\n",
|
||||
"url = 'https://kite.com/kite-public/iris.csv'\n",
|
||||
"df = pd.read_csv(url)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Step 2b**<br/>\n",
|
||||
"Let's plot a scatter graph of sepal length vs. sepal width. When you are accessing a `DataFrame`'s columns, you'll still need to hit `tab` to request completions from the kernel.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Try requesting kernel completions yourself.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"plt.scatter(df['sepal_length'], df['sepal_width'])\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Type this code in the cell below, making sure to hit `tab` when you are filling in the column names 👇"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Put code in me\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Part 3: Copilot Documentation\n",
|
||||
"\n",
|
||||
"If you've enabled \"docs following cursor\" in the Copilot, the Copilot will automatically update with the documentation of the identifier underneath your cursor.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"**Step 3a**<br/>\n",
|
||||
"Try it yourself! Just click around in the code cells of this notebook and see the Copilot update automatically."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### The End\n",
|
||||
"\n",
|
||||
"Now you know everything you need to know about Kite's JupyterLab plugin. Kite is under active development and we expect to ship improvements and more features in the near future.\n",
|
||||
"\n",
|
||||
"In the meantime, if you experience bugs or have feature requests, feel free to open an issue in our [public GitHub repo](https://github.com/kiteco/issue-tracker).\n",
|
||||
"\n",
|
||||
"Happy coding!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
3
scripts/.gitignore
vendored
Normal file
3
scripts/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
gridsearch/*
|
||||
gridbest/*
|
||||
cross/*
|
16
scripts/experiment.template
Normal file
16
scripts/experiment.template
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
### Nombre de trabajo
|
||||
#PBS -N <experiment>-<data>-<model>-<kernel>
|
||||
### Tiempo máximo de ejecución del trabajo
|
||||
#PBS -l walltime=96:00:00
|
||||
### Seleccion de cola de trabajos
|
||||
#PBS -q workq
|
||||
### mezcla errores con la salida principal
|
||||
#PBS -j oe
|
||||
### Recursos
|
||||
#PBS -l select=2:ncpus=2:mem=16Gb
|
||||
### Esportar variables de entorno
|
||||
#PBS -V
|
||||
### Ejecutable con sus parametros
|
||||
cd <folder>
|
||||
python experiment.py -H galgo -e <experiment> -m <model> -d <data> -S tanveer -k <kernel> -n 1
|
9
scripts/genall.sh
Executable file
9
scripts/genall.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
for i in gridsearch gridbest cross; do
|
||||
echo "*** Building $i experiments"
|
||||
for j in stree odte bagging adaBoost; do
|
||||
for k in linear poly rbf; do
|
||||
./genjobs.sh $i $j $k
|
||||
done
|
||||
done
|
||||
done
|
32
scripts/genjobs.sh
Executable file
32
scripts/genjobs.sh
Executable file
@@ -0,0 +1,32 @@
|
||||
#!/bin/bash
|
||||
if [ "$1" = "" -o "$2" = "" -o "$3" = "" ] ; then
|
||||
echo "Hay que seleccionar:"
|
||||
echo " - el tipo de experimento {gridsearch, gridbest, cross}"
|
||||
echo " - el modelo {stree, adaBoost, bagging, odte}"
|
||||
echo " - el kernel {linear, poly, rbf, any}"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! "gridsearchgridbestcross" == *$1* ]] ; then
|
||||
echo "Hay que seleccionar el tipo de experimento {gridsearch, gridbest, cross}"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! "streeadaBoostbaggingodte" == *$2* ]] ; then
|
||||
echo "Hay que seleccionar el modelo {stree, adaBoost, bagging, odte}"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! "linearpolyrbfany" == *$3* ]] ; then
|
||||
echo "Hay que seleccionar el kernel {linear, poly, rbf, any}"
|
||||
exit 1
|
||||
fi
|
||||
script_path="$(pwd)/.."
|
||||
cp experiment.template experiment_$1.sh
|
||||
perl -i -pe"s/<model>/$2/g" experiment_$1.sh
|
||||
perl -i -pe"s~<folder>~$script_path~g" experiment_$1.sh
|
||||
perl -i -pe"s/<experiment>/$1/g" experiment_$1.sh
|
||||
mkdir -p $1/$2/$3
|
||||
cat ../datasets.txt|cut -d " " -f 2|tail -49|while read a; do
|
||||
cp experiment_$1.sh $1/$2/$3/experiment_$a.sh
|
||||
perl -i -pe"s/<data>/$a/g" $1/$2/$3/experiment_$a.sh
|
||||
perl -i -pe"s/<kernel>/$3/g" $1/$2/$3/experiment_$a.sh
|
||||
done
|
||||
rm experiment_$1.sh
|
2
scripts/interactive.sh
Executable file
2
scripts/interactive.sh
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
qsub -I -l select=2:ncpus=8:mem=16Gb
|
40
scripts/launchsome.sh
Executable file
40
scripts/launchsome.sh
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
if [ "$1" = "" -o "$2" = "" -o "$3" = "" -o "$4" = "" ] ; then
|
||||
echo "Hay que seleccionar:"
|
||||
echo " - el tipo de experimento {gridsearch, gridbest, cross}"
|
||||
echo " - el modelo {stree, adaBoost, bagging, odte}"
|
||||
echo " - el kernel {linear, poly, rbf, any}"
|
||||
echo " - el archivo con nombres de datasets"
|
||||
echo "opcionalmente al final: dry-run"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! "gridsearchgridbestcross" == *$1* ]] ; then
|
||||
echo "Hay que seleccionar el tipo de experimento {gridsearch, gridbest, cross}"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! "streeadaBoostbaggingodte" == *$2* ]] ; then
|
||||
echo "Hay que seleccionar el modelo {stree, adaBoost, bagging, odte}"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! "linearpolyrbfany" == *$3* ]] ; then
|
||||
echo "Hay que seleccionar el kernel {linear, poly, rbf, any}"
|
||||
exit 1
|
||||
fi
|
||||
script_path="$(pwd)"
|
||||
cd $1/$2/$3
|
||||
counter=0
|
||||
lines="$(cat $script_path/$4|cut -d " " -f 2|tail -49)"
|
||||
for a in $lines; do
|
||||
echo "launch experiment_$a.sh"
|
||||
if [ "$5" = "dry-run" ] ; then
|
||||
echo "not launched"
|
||||
else
|
||||
qsub experiment_$a.sh
|
||||
fi
|
||||
let counter++
|
||||
done
|
||||
if [ "$5" = "dry-run" ] ; then
|
||||
echo "Not launched $counter jobs"
|
||||
else
|
||||
echo "Launched $counter jobs"
|
||||
fi
|
4
scripts/notebook.sh
Executable file
4
scripts/notebook.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
NOTEBOOKPORT=1234
|
||||
ssh -N -f -R $NOTEBOOKPORT:localhost:$NOTEBOOKPORT Ricardo.Montanana@galgo.uclm.es
|
||||
jupyter lab --port=$NOTEBOOKPORT --no-browser
|
4
scripts/rmscripts.sh
Executable file
4
scripts/rmscripts.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
for folder in gridsearch gridbest cross; do
|
||||
find $folder -type f -exec rm {} \;
|
||||
done
|
48
test_arff.py
48
test_arff.py
@@ -1,48 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy.io import arff
|
||||
from stree import Stree
|
||||
from sklearn.model_selection import cross_val_score
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
folder = (
|
||||
"/Volumes/Datos/OneDrive - Universidad de Castilla-La Mancha/"
|
||||
"Doctorado2019/Compartida/FuentesDescargados/data-4/"
|
||||
)
|
||||
|
||||
name = "yeast"
|
||||
random_state = 1
|
||||
|
||||
file_name = os.path.join(folder, name, f"{name}.arff")
|
||||
data, meta = arff.loadarff(file_name)
|
||||
df = pd.DataFrame(data)
|
||||
y = df["clase"].to_numpy().astype(np.int16)
|
||||
df.drop(columns="clase", inplace=True)
|
||||
X = df.to_numpy().astype(np.float16)
|
||||
print(f"Xshape {X.shape} Xtype {X.dtype}")
|
||||
print(f"yshape {y.shape} ytype {y.dtype}")
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.3, random_state=random_state
|
||||
)
|
||||
|
||||
clf = Stree(
|
||||
random_state=random_state,
|
||||
C=1e5,
|
||||
max_iter=1e5,
|
||||
kernel="poly",
|
||||
degree=5,
|
||||
gamma=0.8,
|
||||
)
|
||||
now = time.time()
|
||||
scores = cross_val_score(clf, X, y, cv=5)
|
||||
print(f"Accuracy for {name}: {scores.mean():.2f} (+/- {scores.std() * 2:.2f})")
|
||||
print(f"Took : {time.time() - now:.2f} seconds")
|
||||
print(f"Score one tree all samples .: {clf.fit(X, y).score(X, y):.4f}")
|
||||
print(
|
||||
f"Score one tree train/test .: "
|
||||
f"{clf.fit(X_train, y_train).score(X_test, y_test):.4f}"
|
||||
)
|
||||
print("*" * 80)
|
Reference in New Issue
Block a user