diff --git a/gen_csv.py b/gen_csv.py new file mode 100644 index 0000000..368616c --- /dev/null +++ b/gen_csv.py @@ -0,0 +1,11 @@ +import pandas as pd +from experimentation.Sets import Datasets + +dt = Datasets(normalize=False, standardize=False, set_of_files="tanveer") +for data in dt: + name = data[0] + X, y = dt.load(name) + z = pd.DataFrame(X) + z[X.shape[1]] = y + print(name, z.shape) + z.to_csv(f"test/{name}.csv", header=False, index=False) diff --git a/oc1datasets.py b/oc1datasets.py index a638b84..1832998 100755 --- a/oc1datasets.py +++ b/oc1datasets.py @@ -2,7 +2,7 @@ import os import pandas as pd from experimentation.Sets import Datasets -dt = Datasets(normalize=True, set_of_files="tanveer") +dt = Datasets(normalize=False, set_of_files="tanveer") print("Generating: ", end="") for data in dt: name = data[0] diff --git a/scripts/experiment.pbs b/scripts/experiment.pbs index 675a567..72acece 100644 --- a/scripts/experiment.pbs +++ b/scripts/experiment.pbs @@ -17,4 +17,4 @@ ### Ejecutable con sus parametros cd -python experiment.py -H galgo -e -m -d -S tanveer -k -n 1 -t 12 \ No newline at end of file +python experiment.py -H galgo -e -m -d -S tanveer -k -t 12 \ No newline at end of file diff --git a/scripts/experiment.slurm b/scripts/experiment.slurm index db48341..49baea4 100644 --- a/scripts/experiment.slurm +++ b/scripts/experiment.slurm @@ -9,4 +9,4 @@ # LOAD MODULES, INSERT CODE, AND RUN YOUR PROGRAMS HERE cd -python experiment.py -H galgo -e -m -d -S tanveer -k -n 1 -t 4 \ No newline at end of file +python experiment.py -H galgo -e -m -d -S tanveer -k -t 4 \ No newline at end of file diff --git a/stats_stree.py b/stats_stree.py index 43ef876..215e016 100644 --- a/stats_stree.py +++ b/stats_stree.py @@ -25,7 +25,7 @@ def compute_depth(node, depth): ) -dt = Datasets(True, False, "tanveer") +dt = Datasets(False, False, "tanveer") for dataset in dt: dataset_name = dataset[0] X, y = dt.load(dataset_name) diff --git a/test.ipynb b/test.ipynb index f71ebcd..84dc5db 100644 --- a/test.ipynb +++ b/test.ipynb @@ -797,7 +797,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.9.5" } }, "nbformat": 4, diff --git a/test_validation.ipynb b/test_validation.ipynb index 33be434..a34999f 100644 --- a/test_validation.ipynb +++ b/test_validation.ipynb @@ -39,7 +39,7 @@ "metadata": {}, "outputs": [], "source": [ - "datasets = Datasets(normalize=True, standardize=False, set_of_files=\"tanveer\")\n", + "datasets = Datasets(normalize=False, standardize=False, set_of_files=\"tanveer\")\n", "X, y = datasets.load(dataset_name)" ] }, @@ -821,6 +821,60 @@ "generate_subspaces(200, 10)" ] }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "dd = pd.read_csv(\"data/csv/balloons.csv\", header=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "data = dd.values[:,:-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 1., 1.],\n", + " [1., 0., 1., 0.],\n", + " [1., 0., 0., 1.],\n", + " [1., 0., 0., 0.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 0.],\n", + " [1., 1., 0., 1.],\n", + " [1., 1., 0., 0.],\n", + " [0., 0., 1., 1.],\n", + " [0., 0., 1., 0.],\n", + " [0., 0., 0., 1.],\n", + " [0., 0., 0., 0.],\n", + " [0., 1., 1., 1.],\n", + " [0., 1., 1., 0.],\n", + " [0., 1., 0., 1.],\n", + " [0., 1., 0., 0.]])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, { "cell_type": "code", "execution_count": null, @@ -845,9 +899,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.2" + "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +}