mirror of
https://github.com/Doctorado-ML/benchmark.git
synced 2025-08-17 16:35:54 +00:00
Compare commits
16 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
c77feff54b | ||
1e83db7956
|
|||
8cf823e843
|
|||
97718e6e82
|
|||
|
5532beb88a | ||
db61911ca6
|
|||
b24a508d1c
|
|||
29c4b4ceef
|
|||
2362f66c7a
|
|||
8001c7f2eb
|
|||
47bf6eeda6
|
|||
34b3bd94de
|
|||
7875e2e6ac
|
|||
34b25756ea
|
|||
e15ab3dcab
|
|||
12024df4d8
|
@@ -4,3 +4,4 @@ n_folds=5
|
||||
model=ODTE
|
||||
stratified=0
|
||||
source_data=Tanveer
|
||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
|
14
.github/workflows/build.yml
vendored
14
.github/workflows/build.yml
vendored
@@ -11,19 +11,15 @@ jobs:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Get project version
|
||||
id: step_one
|
||||
run: |
|
||||
version=$(git describe --tags --abbrev=0)
|
||||
echo "project_version=$version" >> $GITHUB_ENV
|
||||
- run: echo "project_version=$(git describe --tags --abbrev=0)" >> $GITHUB_ENV
|
||||
- uses: sonarsource/sonarqube-scan-action@master
|
||||
env:
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
|
||||
with:
|
||||
args: >
|
||||
-Dsonar.projectVersion=${{ env.project_version }}
|
||||
-Dsonar.python.version=3.10
|
||||
with:
|
||||
args: >
|
||||
-Dsonar.projectVersion=${{ env.project_version }}
|
||||
-Dsonar.python.version=3.10
|
||||
# If you wish to fail your job when the Quality Gate is red, uncomment the
|
||||
# following lines. This would typically be used to fail a deployment.
|
||||
- uses: sonarsource/sonarqube-quality-gate-action@master
|
||||
|
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@@ -46,7 +46,7 @@ jobs:
|
||||
- name: Lint
|
||||
run: |
|
||||
black --check --diff benchmark
|
||||
flake8 --count benchmark
|
||||
flake8 --count benchmark --ignore=E203,W503
|
||||
- name: Tests
|
||||
run: |
|
||||
coverage run -m unittest -v benchmark.tests
|
||||
|
526
Untitled.ipynb
Normal file
526
Untitled.ipynb
Normal file
@@ -0,0 +1,526 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "0e48f7d2-7481-4eca-9c38-56d21c203093",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"DEBUG:weka.core.jvm:Adding bundled jars\n",
|
||||
"DEBUG:weka.core.jvm:Classpath=['/Users/rmontanana/miniconda3/envs/pyweka/lib/python3.10/site-packages/javabridge/jars/rhino-1.7R4.jar', '/Users/rmontanana/miniconda3/envs/pyweka/lib/python3.10/site-packages/javabridge/jars/runnablequeue.jar', '/Users/rmontanana/miniconda3/envs/pyweka/lib/python3.10/site-packages/javabridge/jars/cpython.jar', '/Users/rmontanana/miniconda3/envs/pyweka/lib/python3.10/site-packages/weka/lib/python-weka-wrapper.jar', '/Users/rmontanana/miniconda3/envs/pyweka/lib/python3.10/site-packages/weka/lib/weka.jar']\n",
|
||||
"DEBUG:weka.core.jvm:MaxHeapSize=default\n",
|
||||
"DEBUG:weka.core.jvm:Package support disabled\n",
|
||||
"WARNING: An illegal reflective access operation has occurred\n",
|
||||
"WARNING: Illegal reflective access by weka.core.WekaPackageClassLoaderManager (file:/Users/rmontanana/miniconda3/envs/pyweka/lib/python3.10/site-packages/weka/lib/weka.jar) to method java.lang.ClassLoader.defineClass(java.lang.String,byte[],int,int,java.security.ProtectionDomain)\n",
|
||||
"WARNING: Please consider reporting this to the maintainers of weka.core.WekaPackageClassLoaderManager\n",
|
||||
"WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations\n",
|
||||
"WARNING: All illegal access operations will be denied in a future release\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import weka.core.jvm as jvm\n",
|
||||
"jvm.start()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "2ac4e479-3818-4562-a967-bb303d8dd573",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from weka.core.converters import Loader\n",
|
||||
"data_dir = \"/Users/rmontanana/Code/discretizbench/datasets/\"\n",
|
||||
"loader = Loader(classname=\"weka.core.converters.ArffLoader\")\n",
|
||||
"data = loader.load_file(data_dir + \"iris.arff\")\n",
|
||||
"data.class_is_last()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ceb9f912-db42-4cbc-808f-48b5a9d89d44",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"@relation iris\n",
|
||||
"\n",
|
||||
"@attribute sepallength numeric\n",
|
||||
"@attribute sepalwidth numeric\n",
|
||||
"@attribute petallength numeric\n",
|
||||
"@attribute petalwidth numeric\n",
|
||||
"@attribute class {Iris-setosa,Iris-versicolor,Iris-virginica}\n",
|
||||
"\n",
|
||||
"@data\n",
|
||||
"5.1,3.5,1.4,0.2,Iris-setosa\n",
|
||||
"4.9,3,1.4,0.2,Iris-setosa\n",
|
||||
"4.7,3.2,1.3,0.2,Iris-setosa\n",
|
||||
"4.6,3.1,1.5,0.2,Iris-setosa\n",
|
||||
"5,3.6,1.4,0.2,Iris-setosa\n",
|
||||
"5.4,3.9,1.7,0.4,Iris-setosa\n",
|
||||
"4.6,3.4,1.4,0.3,Iris-setosa\n",
|
||||
"5,3.4,1.5,0.2,Iris-setosa\n",
|
||||
"4.4,2.9,1.4,0.2,Iris-setosa\n",
|
||||
"4.9,3.1,1.5,0.1,Iris-setosa\n",
|
||||
"5.4,3.7,1.5,0.2,Iris-setosa\n",
|
||||
"4.8,3.4,1.6,0.2,Iris-setosa\n",
|
||||
"4.8,3,1.4,0.1,Iris-setosa\n",
|
||||
"4.3,3,1.1,0.1,Iris-setosa\n",
|
||||
"5.8,4,1.2,0.2,Iris-setosa\n",
|
||||
"5.7,4.4,1.5,0.4,Iris-setosa\n",
|
||||
"5.4,3.9,1.3,0.4,Iris-setosa\n",
|
||||
"5.1,3.5,1.4,0.3,Iris-setosa\n",
|
||||
"5.7,3.8,1.7,0.3,Iris-setosa\n",
|
||||
"5.1,3.8,1.5,0.3,Iris-setosa\n",
|
||||
"5.4,3.4,1.7,0.2,Iris-setosa\n",
|
||||
"5.1,3.7,1.5,0.4,Iris-setosa\n",
|
||||
"4.6,3.6,1,0.2,Iris-setosa\n",
|
||||
"5.1,3.3,1.7,0.5,Iris-setosa\n",
|
||||
"4.8,3.4,1.9,0.2,Iris-setosa\n",
|
||||
"5,3,1.6,0.2,Iris-setosa\n",
|
||||
"5,3.4,1.6,0.4,Iris-setosa\n",
|
||||
"5.2,3.5,1.5,0.2,Iris-setosa\n",
|
||||
"5.2,3.4,1.4,0.2,Iris-setosa\n",
|
||||
"4.7,3.2,1.6,0.2,Iris-setosa\n",
|
||||
"4.8,3.1,1.6,0.2,Iris-setosa\n",
|
||||
"5.4,3.4,1.5,0.4,Iris-setosa\n",
|
||||
"5.2,4.1,1.5,0.1,Iris-setosa\n",
|
||||
"5.5,4.2,1.4,0.2,Iris-setosa\n",
|
||||
"4.9,3.1,1.5,0.1,Iris-setosa\n",
|
||||
"5,3.2,1.2,0.2,Iris-setosa\n",
|
||||
"5.5,3.5,1.3,0.2,Iris-setosa\n",
|
||||
"4.9,3.1,1.5,0.1,Iris-setosa\n",
|
||||
"4.4,3,1.3,0.2,Iris-setosa\n",
|
||||
"5.1,3.4,1.5,0.2,Iris-setosa\n",
|
||||
"5,3.5,1.3,0.3,Iris-setosa\n",
|
||||
"4.5,2.3,1.3,0.3,Iris-setosa\n",
|
||||
"4.4,3.2,1.3,0.2,Iris-setosa\n",
|
||||
"5,3.5,1.6,0.6,Iris-setosa\n",
|
||||
"5.1,3.8,1.9,0.4,Iris-setosa\n",
|
||||
"4.8,3,1.4,0.3,Iris-setosa\n",
|
||||
"5.1,3.8,1.6,0.2,Iris-setosa\n",
|
||||
"4.6,3.2,1.4,0.2,Iris-setosa\n",
|
||||
"5.3,3.7,1.5,0.2,Iris-setosa\n",
|
||||
"5,3.3,1.4,0.2,Iris-setosa\n",
|
||||
"7,3.2,4.7,1.4,Iris-versicolor\n",
|
||||
"6.4,3.2,4.5,1.5,Iris-versicolor\n",
|
||||
"6.9,3.1,4.9,1.5,Iris-versicolor\n",
|
||||
"5.5,2.3,4,1.3,Iris-versicolor\n",
|
||||
"6.5,2.8,4.6,1.5,Iris-versicolor\n",
|
||||
"5.7,2.8,4.5,1.3,Iris-versicolor\n",
|
||||
"6.3,3.3,4.7,1.6,Iris-versicolor\n",
|
||||
"4.9,2.4,3.3,1,Iris-versicolor\n",
|
||||
"6.6,2.9,4.6,1.3,Iris-versicolor\n",
|
||||
"5.2,2.7,3.9,1.4,Iris-versicolor\n",
|
||||
"5,2,3.5,1,Iris-versicolor\n",
|
||||
"5.9,3,4.2,1.5,Iris-versicolor\n",
|
||||
"6,2.2,4,1,Iris-versicolor\n",
|
||||
"6.1,2.9,4.7,1.4,Iris-versicolor\n",
|
||||
"5.6,2.9,3.6,1.3,Iris-versicolor\n",
|
||||
"6.7,3.1,4.4,1.4,Iris-versicolor\n",
|
||||
"5.6,3,4.5,1.5,Iris-versicolor\n",
|
||||
"5.8,2.7,4.1,1,Iris-versicolor\n",
|
||||
"6.2,2.2,4.5,1.5,Iris-versicolor\n",
|
||||
"5.6,2.5,3.9,1.1,Iris-versicolor\n",
|
||||
"5.9,3.2,4.8,1.8,Iris-versicolor\n",
|
||||
"6.1,2.8,4,1.3,Iris-versicolor\n",
|
||||
"6.3,2.5,4.9,1.5,Iris-versicolor\n",
|
||||
"6.1,2.8,4.7,1.2,Iris-versicolor\n",
|
||||
"6.4,2.9,4.3,1.3,Iris-versicolor\n",
|
||||
"6.6,3,4.4,1.4,Iris-versicolor\n",
|
||||
"6.8,2.8,4.8,1.4,Iris-versicolor\n",
|
||||
"6.7,3,5,1.7,Iris-versicolor\n",
|
||||
"6,2.9,4.5,1.5,Iris-versicolor\n",
|
||||
"5.7,2.6,3.5,1,Iris-versicolor\n",
|
||||
"5.5,2.4,3.8,1.1,Iris-versicolor\n",
|
||||
"5.5,2.4,3.7,1,Iris-versicolor\n",
|
||||
"5.8,2.7,3.9,1.2,Iris-versicolor\n",
|
||||
"6,2.7,5.1,1.6,Iris-versicolor\n",
|
||||
"5.4,3,4.5,1.5,Iris-versicolor\n",
|
||||
"6,3.4,4.5,1.6,Iris-versicolor\n",
|
||||
"6.7,3.1,4.7,1.5,Iris-versicolor\n",
|
||||
"6.3,2.3,4.4,1.3,Iris-versicolor\n",
|
||||
"5.6,3,4.1,1.3,Iris-versicolor\n",
|
||||
"5.5,2.5,4,1.3,Iris-versicolor\n",
|
||||
"5.5,2.6,4.4,1.2,Iris-versicolor\n",
|
||||
"6.1,3,4.6,1.4,Iris-versicolor\n",
|
||||
"5.8,2.6,4,1.2,Iris-versicolor\n",
|
||||
"5,2.3,3.3,1,Iris-versicolor\n",
|
||||
"5.6,2.7,4.2,1.3,Iris-versicolor\n",
|
||||
"5.7,3,4.2,1.2,Iris-versicolor\n",
|
||||
"5.7,2.9,4.2,1.3,Iris-versicolor\n",
|
||||
"6.2,2.9,4.3,1.3,Iris-versicolor\n",
|
||||
"5.1,2.5,3,1.1,Iris-versicolor\n",
|
||||
"5.7,2.8,4.1,1.3,Iris-versicolor\n",
|
||||
"6.3,3.3,6,2.5,Iris-virginica\n",
|
||||
"5.8,2.7,5.1,1.9,Iris-virginica\n",
|
||||
"7.1,3,5.9,2.1,Iris-virginica\n",
|
||||
"6.3,2.9,5.6,1.8,Iris-virginica\n",
|
||||
"6.5,3,5.8,2.2,Iris-virginica\n",
|
||||
"7.6,3,6.6,2.1,Iris-virginica\n",
|
||||
"4.9,2.5,4.5,1.7,Iris-virginica\n",
|
||||
"7.3,2.9,6.3,1.8,Iris-virginica\n",
|
||||
"6.7,2.5,5.8,1.8,Iris-virginica\n",
|
||||
"7.2,3.6,6.1,2.5,Iris-virginica\n",
|
||||
"6.5,3.2,5.1,2,Iris-virginica\n",
|
||||
"6.4,2.7,5.3,1.9,Iris-virginica\n",
|
||||
"6.8,3,5.5,2.1,Iris-virginica\n",
|
||||
"5.7,2.5,5,2,Iris-virginica\n",
|
||||
"5.8,2.8,5.1,2.4,Iris-virginica\n",
|
||||
"6.4,3.2,5.3,2.3,Iris-virginica\n",
|
||||
"6.5,3,5.5,1.8,Iris-virginica\n",
|
||||
"7.7,3.8,6.7,2.2,Iris-virginica\n",
|
||||
"7.7,2.6,6.9,2.3,Iris-virginica\n",
|
||||
"6,2.2,5,1.5,Iris-virginica\n",
|
||||
"6.9,3.2,5.7,2.3,Iris-virginica\n",
|
||||
"5.6,2.8,4.9,2,Iris-virginica\n",
|
||||
"7.7,2.8,6.7,2,Iris-virginica\n",
|
||||
"6.3,2.7,4.9,1.8,Iris-virginica\n",
|
||||
"6.7,3.3,5.7,2.1,Iris-virginica\n",
|
||||
"7.2,3.2,6,1.8,Iris-virginica\n",
|
||||
"6.2,2.8,4.8,1.8,Iris-virginica\n",
|
||||
"6.1,3,4.9,1.8,Iris-virginica\n",
|
||||
"6.4,2.8,5.6,2.1,Iris-virginica\n",
|
||||
"7.2,3,5.8,1.6,Iris-virginica\n",
|
||||
"7.4,2.8,6.1,1.9,Iris-virginica\n",
|
||||
"7.9,3.8,6.4,2,Iris-virginica\n",
|
||||
"6.4,2.8,5.6,2.2,Iris-virginica\n",
|
||||
"6.3,2.8,5.1,1.5,Iris-virginica\n",
|
||||
"6.1,2.6,5.6,1.4,Iris-virginica\n",
|
||||
"7.7,3,6.1,2.3,Iris-virginica\n",
|
||||
"6.3,3.4,5.6,2.4,Iris-virginica\n",
|
||||
"6.4,3.1,5.5,1.8,Iris-virginica\n",
|
||||
"6,3,4.8,1.8,Iris-virginica\n",
|
||||
"6.9,3.1,5.4,2.1,Iris-virginica\n",
|
||||
"6.7,3.1,5.6,2.4,Iris-virginica\n",
|
||||
"6.9,3.1,5.1,2.3,Iris-virginica\n",
|
||||
"5.8,2.7,5.1,1.9,Iris-virginica\n",
|
||||
"6.8,3.2,5.9,2.3,Iris-virginica\n",
|
||||
"6.7,3.3,5.7,2.5,Iris-virginica\n",
|
||||
"6.7,3,5.2,2.3,Iris-virginica\n",
|
||||
"6.3,2.5,5,1.9,Iris-virginica\n",
|
||||
"6.5,3,5.2,2,Iris-virginica\n",
|
||||
"6.2,3.4,5.4,2.3,Iris-virginica\n",
|
||||
"5.9,3,5.1,1.8,Iris-virginica\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "ded59d25-c34c-4fb8-a35f-1162f1218414",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from weka.classifiers import Classifier\n",
|
||||
"cls = Classifier(classname=\"weka.classifiers.trees.J48\", options=[\"-C\", \"0.3\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "4c82f2ae-4071-4571-9a19-433b98463143",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['-C', '0.3', '-M', '2']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(cls.options)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "4c5c7893-ebbe-407d-872c-fd0bf41f8dc8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"weka.classifiers.trees.J48 -C 0.3 -M 2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(cls.to_commandline())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "7b73c18d-e0b0-469d-8a60-03bae8e01128",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"2: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"3: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"4: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"5: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"6: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"7: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"8: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"9: label index=0.0, class distribution=[0.96326708 0.02223308 0.01449983]\n",
|
||||
"10: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"11: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"12: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"13: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"14: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"15: label index=0.0, class distribution=[0.9382677 0.03162683 0.03010547]\n",
|
||||
"16: label index=0.0, class distribution=[0.9382677 0.03162683 0.03010547]\n",
|
||||
"17: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"18: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"19: label index=0.0, class distribution=[0.9382677 0.03162683 0.03010547]\n",
|
||||
"20: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"21: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"22: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"23: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"24: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"25: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"26: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"27: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"28: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"29: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"30: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"31: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"32: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"33: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"34: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"35: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"36: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"37: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"38: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"39: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"40: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"41: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"42: label index=0.0, class distribution=[0.96326708 0.02223308 0.01449983]\n",
|
||||
"43: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"44: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"45: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"46: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"47: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"48: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"49: label index=0.0, class distribution=[0.99688403 0.00188598 0.00122999]\n",
|
||||
"50: label index=0.0, class distribution=[0.99487322 0.00310305 0.00202373]\n",
|
||||
"51: label index=1.0, class distribution=[0.00545355 0.97466198 0.01988447]\n",
|
||||
"52: label index=1.0, class distribution=[0.00545355 0.97466198 0.01988447]\n",
|
||||
"53: label index=1.0, class distribution=[0.010867 0.52425197 0.46488102]\n",
|
||||
"54: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"55: label index=1.0, class distribution=[0.00228744 0.97269152 0.02502104]\n",
|
||||
"56: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"57: label index=1.0, class distribution=[0.00545355 0.97466198 0.01988447]\n",
|
||||
"58: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"59: label index=1.0, class distribution=[0.00228744 0.97269152 0.02502104]\n",
|
||||
"60: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"61: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"62: label index=1.0, class distribution=[0.00732671 0.98195521 0.01071808]\n",
|
||||
"63: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"64: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"65: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"66: label index=1.0, class distribution=[0.00545355 0.97466198 0.01988447]\n",
|
||||
"67: label index=1.0, class distribution=[0.00732671 0.98195521 0.01071808]\n",
|
||||
"68: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"69: label index=1.0, class distribution=[0.00228744 0.97269152 0.02502104]\n",
|
||||
"70: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"71: label index=2.0, class distribution=[0.00920087 0.06127297 0.92952615]\n",
|
||||
"72: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"73: label index=2.0, class distribution=[0.00409632 0.47019227 0.5257114 ]\n",
|
||||
"74: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"75: label index=1.0, class distribution=[0.00228744 0.97269152 0.02502104]\n",
|
||||
"76: label index=1.0, class distribution=[0.00545355 0.97466198 0.01988447]\n",
|
||||
"77: label index=2.0, class distribution=[0.00409632 0.47019227 0.5257114 ]\n",
|
||||
"78: label index=1.0, class distribution=[0.010867 0.52425197 0.46488102]\n",
|
||||
"79: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"80: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"81: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"82: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"83: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"84: label index=1.0, class distribution=[0.02353491 0.65433551 0.32212958]\n",
|
||||
"85: label index=1.0, class distribution=[0.01727259 0.943168 0.03955941]\n",
|
||||
"86: label index=1.0, class distribution=[0.06513736 0.90310001 0.03176263]\n",
|
||||
"87: label index=1.0, class distribution=[0.00545355 0.97466198 0.01988447]\n",
|
||||
"88: label index=1.0, class distribution=[0.00228744 0.97269152 0.02502104]\n",
|
||||
"89: label index=1.0, class distribution=[0.00732671 0.98195521 0.01071808]\n",
|
||||
"90: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"91: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"92: label index=1.0, class distribution=[0.00732671 0.98195521 0.01071808]\n",
|
||||
"93: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"94: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"95: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"96: label index=1.0, class distribution=[0.00732671 0.98195521 0.01071808]\n",
|
||||
"97: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"98: label index=1.0, class distribution=[0.00228744 0.97269152 0.02502104]\n",
|
||||
"99: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"100: label index=1.0, class distribution=[0.00308382 0.98338244 0.01353374]\n",
|
||||
"101: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"102: label index=2.0, class distribution=[0.01274667 0.02829538 0.95895795]\n",
|
||||
"103: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"104: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"105: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"106: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"107: label index=1.0, class distribution=[0.00725727 0.94287877 0.04986396]\n",
|
||||
"108: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"109: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"110: label index=2.0, class distribution=[0.00431289 0.0395258 0.95616131]\n",
|
||||
"111: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"112: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"113: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"114: label index=2.0, class distribution=[0.01274667 0.02829538 0.95895795]\n",
|
||||
"115: label index=2.0, class distribution=[0.01274667 0.02829538 0.95895795]\n",
|
||||
"116: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"117: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"118: label index=2.0, class distribution=[0.00431289 0.0395258 0.95616131]\n",
|
||||
"119: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"120: label index=1.0, class distribution=[0.02353491 0.65433551 0.32212958]\n",
|
||||
"121: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"122: label index=2.0, class distribution=[0.01274667 0.02829538 0.95895795]\n",
|
||||
"123: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"124: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"125: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"126: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"127: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"128: label index=2.0, class distribution=[0.00920087 0.06127297 0.92952615]\n",
|
||||
"129: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"130: label index=1.0, class distribution=[0.010867 0.52425197 0.46488102]\n",
|
||||
"131: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"132: label index=2.0, class distribution=[0.00431289 0.0395258 0.95616131]\n",
|
||||
"133: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"134: label index=2.0, class distribution=[0.00409632 0.47019227 0.5257114 ]\n",
|
||||
"135: label index=1.0, class distribution=[0.02353491 0.65433551 0.32212958]\n",
|
||||
"136: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"137: label index=2.0, class distribution=[0.00431289 0.0395258 0.95616131]\n",
|
||||
"138: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"139: label index=2.0, class distribution=[0.00920087 0.06127297 0.92952615]\n",
|
||||
"140: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"141: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"142: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"143: label index=2.0, class distribution=[0.01274667 0.02829538 0.95895795]\n",
|
||||
"144: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"145: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"146: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"147: label index=2.0, class distribution=[0.00139749 0.01280739 0.98579512]\n",
|
||||
"148: label index=2.0, class distribution=[0.00102485 0.02817698 0.97079816]\n",
|
||||
"149: label index=2.0, class distribution=[0.00431289 0.0395258 0.95616131]\n",
|
||||
"150: label index=2.0, class distribution=[0.00920087 0.06127297 0.92952615]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from weka.classifiers import Classifier\n",
|
||||
"cls = Classifier(classname=\"weka.classifiers.bayes.BayesNet\", options=[\"-Q\", \"weka.classifiers.bayes.net.search.local.TAN\"])\n",
|
||||
"cls.build_classifier(data)\n",
|
||||
"\n",
|
||||
"for index, inst in enumerate(data):\n",
|
||||
" pred = cls.classify_instance(inst)\n",
|
||||
" dist = cls.distribution_for_instance(inst)\n",
|
||||
" print(str(index+1) + \": label index=\" + str(pred) + \", class distribution=\" + str(dist))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "0b74f00a-15b3-4177-bb8c-e02ed1a3fd38",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Bayes Network Classifier\n",
|
||||
"Using ADTree\n",
|
||||
"#attributes=5 #classindex=4\n",
|
||||
"Network structure (nodes followed by parents)\n",
|
||||
"sepallength(3): class \n",
|
||||
"sepalwidth(3): class petalwidth \n",
|
||||
"petallength(3): class sepallength \n",
|
||||
"petalwidth(3): class petallength \n",
|
||||
"class(3): \n",
|
||||
"LogScore Bayes: -484.0749140715054\n",
|
||||
"LogScore BDeu: -653.8524681760015\n",
|
||||
"LogScore MDL: -654.6252712234647\n",
|
||||
"LogScore ENTROPY: -499.2955771064808\n",
|
||||
"LogScore AIC: -561.2955771064808\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "OSError",
|
||||
"evalue": "[Errno 63] File name too long: '<?xml version=\"1.0\"?>\\n<!-- DTD for the XMLBIF 0.3 format -->\\n<!DOCTYPE BIF [\\n\\t<!ELEMENT BIF ( NETWORK )*>\\n\\t <!ATTLIST BIF VERSION CDATA #REQUIRED>\\n\\t<!ELEMENT NETWORK ( NAME, ( PROPERTY | VARIABLE | DEFINITION )* )>\\n\\t<!ELEMENT NAME (#PCDATA)>\\n\\t<!ELEMENT VARIABLE ( NAME, ( OUTCOME | PROPERTY )* ) >\\n\\t <!ATTLIST VARIABLE TYPE (nature|decision|utility) \"nature\">\\n\\t<!ELEMENT OUTCOME (#PCDATA)>\\n\\t<!ELEMENT DEFINITION ( FOR | GIVEN | TABLE | PROPERTY )* >\\n\\t<!ELEMENT FOR (#PCDATA)>\\n\\t<!ELEMENT GIVEN (#PCDATA)>\\n\\t<!ELEMENT TABLE (#PCDATA)>\\n\\t<!ELEMENT PROPERTY (#PCDATA)>\\n]>\\n\\n\\n<BIF VERSION=\"0.3\">\\n<NETWORK>\\n<NAME>iris-weka.filters.supervised.attribute.Discretize-Rfirst-last-precision6-weka.filters.unsupervised.attribute.ReplaceMissingValues</NAME>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>sepallength</NAME>\\n<OUTCOME>'\\\\'(-inf-5.55]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(5.55-6.15]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(6.15-inf)\\\\''</OUTCOME>\\n</VARIABLE>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>sepalwidth</NAME>\\n<OUTCOME>'\\\\'(-inf-2.95]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(2.95-3.35]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(3.35-inf)\\\\''</OUTCOME>\\n</VARIABLE>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>petallength</NAME>\\n<OUTCOME>'\\\\'(-inf-2.45]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(2.45-4.75]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(4.75-inf)\\\\''</OUTCOME>\\n</VARIABLE>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>petalwidth</NAME>\\n<OUTCOME>'\\\\'(-inf-0.8]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(0.8-1.75]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(1.75-inf)\\\\''</OUTCOME>\\n</VARIABLE>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>class</NAME>\\n<OUTCOME>Iris-setosa</OUTCOME>\\n<OUTCOME>Iris-versicolor</OUTCOME>\\n<OUTCOME>Iris-virginica</OUTCOME>\\n</VARIABLE>\\n<DEFINITION>\\n<FOR>sepallength</FOR>\\n<GIVEN>class</GIVEN>\\n<TABLE>\\n0.9223300970873787 0.06796116504854369 0.009708737864077669 \\n0.22330097087378642 0.4563106796116505 0.32038834951456313 \\n0.02912621359223301 0.20388349514563106 0.7669902912621359 \\n</TABLE>\\n</DEFINITION>\\n<DEFINITION>\\n<FOR>sepalwidth</FOR>\\n<GIVEN>class</GIVEN>\\n<GIVEN>petalwidth</GIVEN>\\n<TABLE>\\n0.04854368932038835 0.3592233009708738 0.5922330097087378 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.6831683168316832 0.2871287128712871 0.0297029702970297 \\n0.2 0.6 0.2 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.6923076923076923 0.23076923076923078 0.07692307692307693 \\n0.3763440860215054 0.5053763440860215 0.11827956989247312 \\n</TABLE>\\n</DEFINITION>\\n<DEFINITION>\\n<FOR>petallength</FOR>\\n<GIVEN>class</GIVEN>\\n<GIVEN>sepallength</GIVEN>\\n<TABLE>\\n0.979381443298969 0.010309278350515464 0.010309278350515464 \\n0.7777777777777778 0.1111111111111111 0.1111111111111111 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.04 0.92 0.04 \\n0.02040816326530612 0.8775510204081632 0.10204081632653061 \\n0.02857142857142857 0.7142857142857143 0.2571428571428571 \\n0.2 0.6 0.2 \\n0.043478260869565216 0.043478260869565216 0.9130434782608695 \\n0.012345679012345678 0.012345679012345678 0.9753086419753086 \\n</TABLE>\\n</DEFINITION>\\n<DEFINITION>\\n<FOR>petalwidth</FOR>\\n<GIVEN>class</GIVEN>\\n<GIVEN>petallength</GIVEN>\\n<TABLE>\\n0.9805825242718447 0.009708737864077669 0.009708737864077669 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.01098901098901099 0.978021978021978 0.01098901098901099 \\n0.06666666666666667 0.7333333333333333 0.2 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.2 0.6 0.2 \\n0.009900990099009901 0.0891089108910891 0.900990099009901 \\n</TABLE>\\n</DEFINITION>\\n<DEFINITION>\\n<FOR>class</FOR>\\n<TABLE>\\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n</TABLE>\\n</DEFINITION>\\n</NETWORK>\\n</BIF>\\n'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn [13], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mweka\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mplot\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgraph\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgraph\u001b[39;00m \u001b[38;5;66;03m# NB: pygraphviz and PIL are required\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[43mgraph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot_dot_graph\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/miniconda3/envs/pyweka/lib/python3.10/site-packages/weka/plot/graph.py:49\u001b[0m, in \u001b[0;36mplot_dot_graph\u001b[0;34m(graph, filename)\u001b[0m\n\u001b[1;32m 46\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPIL is not installed, cannot display graph plot!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m---> 49\u001b[0m agraph \u001b[38;5;241m=\u001b[39m \u001b[43mAGraph\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgraph\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 50\u001b[0m agraph\u001b[38;5;241m.\u001b[39mlayout(prog\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdot\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m filename \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
||||
"File \u001b[0;32m~/miniconda3/envs/pyweka/lib/python3.10/site-packages/pygraphviz/agraph.py:157\u001b[0m, in \u001b[0;36mAGraph.__init__\u001b[0;34m(self, thing, filename, data, string, handle, name, strict, directed, **attr)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_owns_handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m filename \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 156\u001b[0m \u001b[38;5;66;03m# load new graph from file (creates self.handle)\u001b[39;00m\n\u001b[0;32m--> 157\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m string \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 159\u001b[0m \u001b[38;5;66;03m# load new graph from string (creates self.handle)\u001b[39;00m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;66;03m# get the charset from the string to properly encode it for\u001b[39;00m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# writing to the temporary file in from_string()\u001b[39;00m\n\u001b[1;32m 162\u001b[0m match \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msearch(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcharset\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124ms*=\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124ms*\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m([^\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m]+)\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m, string)\n",
|
||||
"File \u001b[0;32m~/miniconda3/envs/pyweka/lib/python3.10/site-packages/pygraphviz/agraph.py:1243\u001b[0m, in \u001b[0;36mAGraph.read\u001b[0;34m(self, path)\u001b[0m\n\u001b[1;32m 1233\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread\u001b[39m(\u001b[38;5;28mself\u001b[39m, path):\n\u001b[1;32m 1234\u001b[0m \u001b[38;5;124;03m\"\"\"Read graph from dot format file on path.\u001b[39;00m\n\u001b[1;32m 1235\u001b[0m \n\u001b[1;32m 1236\u001b[0m \u001b[38;5;124;03m path can be a file name or file handle\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1241\u001b[0m \n\u001b[1;32m 1242\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1243\u001b[0m fh \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_fh\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1244\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1245\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_handle()\n",
|
||||
"File \u001b[0;32m~/miniconda3/envs/pyweka/lib/python3.10/site-packages/pygraphviz/agraph.py:1791\u001b[0m, in \u001b[0;36mAGraph._get_fh\u001b[0;34m(self, path, mode)\u001b[0m\n\u001b[1;32m 1789\u001b[0m fh \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpopen(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbzcat \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m path) \u001b[38;5;66;03m# probably not portable\u001b[39;00m\n\u001b[1;32m 1790\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1791\u001b[0m fh \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1792\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(path, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwrite\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 1793\u001b[0m \u001b[38;5;66;03m# Note, mode of file handle is unchanged.\u001b[39;00m\n\u001b[1;32m 1794\u001b[0m fh \u001b[38;5;241m=\u001b[39m path\n",
|
||||
"\u001b[0;31mOSError\u001b[0m: [Errno 63] File name too long: '<?xml version=\"1.0\"?>\\n<!-- DTD for the XMLBIF 0.3 format -->\\n<!DOCTYPE BIF [\\n\\t<!ELEMENT BIF ( NETWORK )*>\\n\\t <!ATTLIST BIF VERSION CDATA #REQUIRED>\\n\\t<!ELEMENT NETWORK ( NAME, ( PROPERTY | VARIABLE | DEFINITION )* )>\\n\\t<!ELEMENT NAME (#PCDATA)>\\n\\t<!ELEMENT VARIABLE ( NAME, ( OUTCOME | PROPERTY )* ) >\\n\\t <!ATTLIST VARIABLE TYPE (nature|decision|utility) \"nature\">\\n\\t<!ELEMENT OUTCOME (#PCDATA)>\\n\\t<!ELEMENT DEFINITION ( FOR | GIVEN | TABLE | PROPERTY )* >\\n\\t<!ELEMENT FOR (#PCDATA)>\\n\\t<!ELEMENT GIVEN (#PCDATA)>\\n\\t<!ELEMENT TABLE (#PCDATA)>\\n\\t<!ELEMENT PROPERTY (#PCDATA)>\\n]>\\n\\n\\n<BIF VERSION=\"0.3\">\\n<NETWORK>\\n<NAME>iris-weka.filters.supervised.attribute.Discretize-Rfirst-last-precision6-weka.filters.unsupervised.attribute.ReplaceMissingValues</NAME>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>sepallength</NAME>\\n<OUTCOME>'\\\\'(-inf-5.55]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(5.55-6.15]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(6.15-inf)\\\\''</OUTCOME>\\n</VARIABLE>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>sepalwidth</NAME>\\n<OUTCOME>'\\\\'(-inf-2.95]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(2.95-3.35]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(3.35-inf)\\\\''</OUTCOME>\\n</VARIABLE>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>petallength</NAME>\\n<OUTCOME>'\\\\'(-inf-2.45]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(2.45-4.75]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(4.75-inf)\\\\''</OUTCOME>\\n</VARIABLE>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>petalwidth</NAME>\\n<OUTCOME>'\\\\'(-inf-0.8]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(0.8-1.75]\\\\''</OUTCOME>\\n<OUTCOME>'\\\\'(1.75-inf)\\\\''</OUTCOME>\\n</VARIABLE>\\n<VARIABLE TYPE=\"nature\">\\n<NAME>class</NAME>\\n<OUTCOME>Iris-setosa</OUTCOME>\\n<OUTCOME>Iris-versicolor</OUTCOME>\\n<OUTCOME>Iris-virginica</OUTCOME>\\n</VARIABLE>\\n<DEFINITION>\\n<FOR>sepallength</FOR>\\n<GIVEN>class</GIVEN>\\n<TABLE>\\n0.9223300970873787 0.06796116504854369 0.009708737864077669 \\n0.22330097087378642 0.4563106796116505 0.32038834951456313 \\n0.02912621359223301 0.20388349514563106 0.7669902912621359 \\n</TABLE>\\n</DEFINITION>\\n<DEFINITION>\\n<FOR>sepalwidth</FOR>\\n<GIVEN>class</GIVEN>\\n<GIVEN>petalwidth</GIVEN>\\n<TABLE>\\n0.04854368932038835 0.3592233009708738 0.5922330097087378 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.6831683168316832 0.2871287128712871 0.0297029702970297 \\n0.2 0.6 0.2 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.6923076923076923 0.23076923076923078 0.07692307692307693 \\n0.3763440860215054 0.5053763440860215 0.11827956989247312 \\n</TABLE>\\n</DEFINITION>\\n<DEFINITION>\\n<FOR>petallength</FOR>\\n<GIVEN>class</GIVEN>\\n<GIVEN>sepallength</GIVEN>\\n<TABLE>\\n0.979381443298969 0.010309278350515464 0.010309278350515464 \\n0.7777777777777778 0.1111111111111111 0.1111111111111111 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.04 0.92 0.04 \\n0.02040816326530612 0.8775510204081632 0.10204081632653061 \\n0.02857142857142857 0.7142857142857143 0.2571428571428571 \\n0.2 0.6 0.2 \\n0.043478260869565216 0.043478260869565216 0.9130434782608695 \\n0.012345679012345678 0.012345679012345678 0.9753086419753086 \\n</TABLE>\\n</DEFINITION>\\n<DEFINITION>\\n<FOR>petalwidth</FOR>\\n<GIVEN>class</GIVEN>\\n<GIVEN>petallength</GIVEN>\\n<TABLE>\\n0.9805825242718447 0.009708737864077669 0.009708737864077669 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.01098901098901099 0.978021978021978 0.01098901098901099 \\n0.06666666666666667 0.7333333333333333 0.2 \\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n0.2 0.6 0.2 \\n0.009900990099009901 0.0891089108910891 0.900990099009901 \\n</TABLE>\\n</DEFINITION>\\n<DEFINITION>\\n<FOR>class</FOR>\\n<TABLE>\\n0.3333333333333333 0.3333333333333333 0.3333333333333333 \\n</TABLE>\\n</DEFINITION>\\n</NETWORK>\\n</BIF>\\n'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from weka.classifiers import Classifier\n",
|
||||
"\n",
|
||||
"cls = Classifier(classname=\"weka.classifiers.bayes.BayesNet\", options=[\"-Q\", \"weka.classifiers.bayes.net.search.local.TAN\"])\n",
|
||||
"cls.build_classifier(data)\n",
|
||||
"\n",
|
||||
"print(cls)\n",
|
||||
"\n",
|
||||
"import weka.plot.graph as graph # NB: pygraphviz and PIL are required\n",
|
||||
"graph.plot_dot_graph(cls.graph)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3f59f200-4f23-4add-86ae-6df1494ede82",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
import sys
|
||||
import argparse
|
||||
from .Experiments import Models
|
||||
from .Models import Models
|
||||
from .Utils import Files, NO_ENV
|
||||
|
||||
ALL_METRICS = (
|
||||
|
126
benchmark/Datasets.py
Normal file
126
benchmark/Datasets.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
from scipy.io import arff
|
||||
from .Utils import Files
|
||||
from .Arguments import EnvData
|
||||
|
||||
|
||||
class Diterator:
|
||||
def __init__(self, data):
|
||||
self._stack = data.copy()
|
||||
|
||||
def __next__(self):
|
||||
if len(self._stack) == 0:
|
||||
raise StopIteration()
|
||||
return self._stack.pop(0)
|
||||
|
||||
|
||||
class DatasetsArff:
|
||||
@staticmethod
|
||||
def dataset_names(name):
|
||||
return f"{name}.arff"
|
||||
|
||||
@staticmethod
|
||||
def folder():
|
||||
return "datasets"
|
||||
|
||||
def load(self, name, class_name):
|
||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||
data = arff.loadarff(file_name)
|
||||
df = pd.DataFrame(data[0])
|
||||
df = df.dropna()
|
||||
X = df.drop(class_name, axis=1).to_numpy()
|
||||
y, _ = pd.factorize(df[class_name])
|
||||
return X, y
|
||||
|
||||
|
||||
class DatasetsTanveer:
|
||||
@staticmethod
|
||||
def dataset_names(name):
|
||||
return f"{name}_R.dat"
|
||||
|
||||
@staticmethod
|
||||
def folder():
|
||||
return "data"
|
||||
|
||||
def load(self, name, _):
|
||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||
data = pd.read_csv(
|
||||
file_name,
|
||||
sep="\t",
|
||||
index_col=0,
|
||||
)
|
||||
X = data.drop("clase", axis=1).to_numpy()
|
||||
y = data["clase"].to_numpy()
|
||||
return X, y
|
||||
|
||||
|
||||
class DatasetsSurcov:
|
||||
@staticmethod
|
||||
def dataset_names(name):
|
||||
return f"{name}.csv"
|
||||
|
||||
@staticmethod
|
||||
def folder():
|
||||
return "datasets"
|
||||
|
||||
def load(self, name, _):
|
||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||
data = pd.read_csv(
|
||||
file_name,
|
||||
index_col=0,
|
||||
)
|
||||
data.dropna(axis=0, how="any", inplace=True)
|
||||
self.columns = data.columns
|
||||
col_list = ["class"]
|
||||
X = data.drop(col_list, axis=1).to_numpy()
|
||||
y = data["class"].to_numpy()
|
||||
return X, y
|
||||
|
||||
|
||||
class Datasets:
|
||||
def __init__(self, dataset_name=None):
|
||||
|
||||
envData = EnvData.load()
|
||||
class_name = getattr(
|
||||
__import__(__name__),
|
||||
f"Datasets{envData['source_data']}",
|
||||
)
|
||||
self.dataset = class_name()
|
||||
self.class_names = []
|
||||
self.load_names()
|
||||
if dataset_name is not None:
|
||||
try:
|
||||
class_name = self.class_names[
|
||||
self.data_sets.index(dataset_name)
|
||||
]
|
||||
self.class_names = [class_name]
|
||||
except ValueError:
|
||||
raise ValueError(f"Unknown dataset: {dataset_name}")
|
||||
self.data_sets = [dataset_name]
|
||||
|
||||
def load_names(self):
|
||||
file_name = os.path.join(self.dataset.folder(), Files.index)
|
||||
default_class = "class"
|
||||
with open(file_name) as f:
|
||||
self.data_sets = f.read().splitlines()
|
||||
self.class_names = [default_class] * len(self.data_sets)
|
||||
if "," in self.data_sets[0]:
|
||||
result = []
|
||||
class_names = []
|
||||
for data in self.data_sets:
|
||||
name, class_name = data.split(",")
|
||||
result.append(name)
|
||||
class_names.append(class_name)
|
||||
self.data_sets = result
|
||||
self.class_names = class_names
|
||||
|
||||
def load(self, name):
|
||||
try:
|
||||
class_name = self.class_names[self.data_sets.index(name)]
|
||||
return self.dataset.load(name, class_name)
|
||||
except (ValueError, FileNotFoundError):
|
||||
raise ValueError(f"Unknown dataset: {name}")
|
||||
|
||||
def __iter__(self) -> Diterator:
|
||||
return Diterator(self.data_sets)
|
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import random
|
||||
import warnings
|
||||
@@ -6,7 +7,6 @@ import time
|
||||
from datetime import datetime
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import (
|
||||
StratifiedKFold,
|
||||
KFold,
|
||||
@@ -14,91 +14,15 @@ from sklearn.model_selection import (
|
||||
cross_validate,
|
||||
)
|
||||
from .Utils import Folders, Files, NO_RESULTS
|
||||
from .Datasets import Datasets
|
||||
from .Models import Models
|
||||
from .Arguments import EnvData
|
||||
|
||||
|
||||
class Randomized:
|
||||
seeds = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
|
||||
|
||||
class Diterator:
|
||||
def __init__(self, data):
|
||||
self._stack = data.copy()
|
||||
|
||||
def __next__(self):
|
||||
if len(self._stack) == 0:
|
||||
raise StopIteration()
|
||||
return self._stack.pop(0)
|
||||
|
||||
|
||||
class DatasetsTanveer:
|
||||
@staticmethod
|
||||
def dataset_names(name):
|
||||
return f"{name}_R.dat"
|
||||
|
||||
@staticmethod
|
||||
def folder():
|
||||
return "data"
|
||||
|
||||
def load(self, name):
|
||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||
data = pd.read_csv(
|
||||
file_name,
|
||||
sep="\t",
|
||||
index_col=0,
|
||||
)
|
||||
X = data.drop("clase", axis=1).to_numpy()
|
||||
y = data["clase"].to_numpy()
|
||||
return X, y
|
||||
|
||||
|
||||
class DatasetsSurcov:
|
||||
@staticmethod
|
||||
def dataset_names(name):
|
||||
return f"{name}.csv"
|
||||
|
||||
@staticmethod
|
||||
def folder():
|
||||
return "datasets"
|
||||
|
||||
def load(self, name):
|
||||
file_name = os.path.join(self.folder(), self.dataset_names(name))
|
||||
data = pd.read_csv(
|
||||
file_name,
|
||||
index_col=0,
|
||||
)
|
||||
data.dropna(axis=0, how="any", inplace=True)
|
||||
self.columns = data.columns
|
||||
col_list = ["class"]
|
||||
X = data.drop(col_list, axis=1).to_numpy()
|
||||
y = data["class"].to_numpy()
|
||||
return X, y
|
||||
|
||||
|
||||
class Datasets:
|
||||
def __init__(self, dataset_name=None):
|
||||
envData = EnvData.load()
|
||||
class_name = getattr(
|
||||
__import__(__name__),
|
||||
f"Datasets{envData['source_data']}",
|
||||
)
|
||||
self.dataset = class_name()
|
||||
if dataset_name is None:
|
||||
file_name = os.path.join(self.dataset.folder(), Files.index)
|
||||
with open(file_name) as f:
|
||||
self.data_sets = f.read().splitlines()
|
||||
else:
|
||||
self.data_sets = [dataset_name]
|
||||
|
||||
def load(self, name):
|
||||
try:
|
||||
return self.dataset.load(name)
|
||||
except FileNotFoundError:
|
||||
raise ValueError(f"Unknown dataset: {name}")
|
||||
|
||||
def __iter__(self) -> Diterator:
|
||||
return Diterator(self.data_sets)
|
||||
def seeds():
|
||||
return json.loads(EnvData.load()["seeds"])
|
||||
|
||||
|
||||
class BestResults:
|
||||
@@ -234,7 +158,7 @@ class Experiment:
|
||||
self.platform = platform
|
||||
self.progress_bar = progress_bar
|
||||
self.folds = folds
|
||||
self.random_seeds = Randomized.seeds
|
||||
self.random_seeds = Randomized.seeds()
|
||||
self.results = []
|
||||
self.duration = 0
|
||||
self._init_experiment()
|
||||
@@ -242,6 +166,10 @@ class Experiment:
|
||||
def get_output_file(self):
|
||||
return self.output_file
|
||||
|
||||
@staticmethod
|
||||
def get_python_version():
|
||||
return "{}.{}".format(sys.version_info.major, sys.version_info.minor)
|
||||
|
||||
def _build_classifier(self, random_state, hyperparameters):
|
||||
self.model = Models.get_model(self.model_name, random_state)
|
||||
clf = self.model
|
||||
@@ -273,7 +201,7 @@ class Experiment:
|
||||
shuffle=True, random_state=random_state, n_splits=self.folds
|
||||
)
|
||||
clf = self._build_classifier(random_state, hyperparameters)
|
||||
self.version = clf.version() if hasattr(clf, "version") else "-"
|
||||
self.version = Models.get_version(self.model_name, clf)
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore")
|
||||
res = cross_validate(
|
||||
@@ -323,6 +251,8 @@ class Experiment:
|
||||
output["duration"] = self.duration
|
||||
output["seeds"] = self.random_seeds
|
||||
output["platform"] = self.platform
|
||||
output["language_version"] = self.get_python_version()
|
||||
output["language"] = "Python"
|
||||
output["results"] = self.results
|
||||
with open(self.output_file, "w") as f:
|
||||
json.dump(output, f)
|
||||
@@ -381,7 +311,7 @@ class GridSearch:
|
||||
self.progress_bar = progress_bar
|
||||
self.folds = folds
|
||||
self.platform = platform
|
||||
self.random_seeds = Randomized.seeds
|
||||
self.random_seeds = Randomized.seeds()
|
||||
self.grid_file = os.path.join(
|
||||
Folders.results, Files.grid_input(score_name, model_name)
|
||||
)
|
||||
|
@@ -11,6 +11,8 @@ from stree import Stree
|
||||
from wodt import Wodt
|
||||
from odte import Odte
|
||||
from xgboost import XGBClassifier
|
||||
import sklearn
|
||||
import xgboost
|
||||
|
||||
|
||||
class Models:
|
||||
@@ -89,3 +91,15 @@ class Models:
|
||||
nodes, leaves = result.nodes_leaves()
|
||||
depth = result.depth_ if hasattr(result, "depth_") else 0
|
||||
return nodes, leaves, depth
|
||||
|
||||
@staticmethod
|
||||
def get_version(name, clf):
|
||||
if hasattr(clf, "version"):
|
||||
return clf.version()
|
||||
if name in ["Cart", "ExtraTree", "RandomForest", "GBC", "SVC"]:
|
||||
return sklearn.__version__
|
||||
elif name.startswith("Bagging") or name.startswith("AdaBoost"):
|
||||
return sklearn.__version__
|
||||
elif name == "XGBoost":
|
||||
return xgboost.__version__
|
||||
return "Error"
|
||||
|
@@ -7,17 +7,39 @@ import shutil
|
||||
import subprocess
|
||||
import xlsxwriter
|
||||
import numpy as np
|
||||
from .Experiments import Datasets, BestResults
|
||||
from .Experiments import BestResults
|
||||
from .Datasets import Datasets
|
||||
from .Arguments import EnvData, ALL_METRICS
|
||||
from .Utils import (
|
||||
Folders,
|
||||
Files,
|
||||
Symbols,
|
||||
BEST_ACCURACY_STREE,
|
||||
TextColor,
|
||||
NO_RESULTS,
|
||||
)
|
||||
|
||||
|
||||
class BestResultsEver:
|
||||
def __init__(self):
|
||||
self.data = {}
|
||||
for i in ["Tanveer", "Surcov", "Arff"]:
|
||||
self.data[i] = {}
|
||||
for metric in ALL_METRICS:
|
||||
self.data[i][metric.replace("-", "_")] = ["self", 1.0]
|
||||
self.data[i][metric] = ["self", 1.0]
|
||||
self.data["Tanveer"]["accuracy"] = [
|
||||
"STree_default (liblinear-ovr)",
|
||||
40.282203,
|
||||
]
|
||||
self.data["Arff"]["accuracy"] = [
|
||||
"STree_default (linear-ovo)",
|
||||
21.9765,
|
||||
]
|
||||
|
||||
def get_name_value(self, key, score):
|
||||
return self.data[key][score]
|
||||
|
||||
|
||||
class BaseReport(abc.ABC):
|
||||
def __init__(self, file_name, best_file=False):
|
||||
self.file_name = file_name
|
||||
@@ -29,7 +51,20 @@ class BaseReport(abc.ABC):
|
||||
with open(self.file_name) as f:
|
||||
self.data = json.load(f)
|
||||
self.best_acc_file = best_file
|
||||
self.lines = self.data if best_file else self.data["results"]
|
||||
if best_file:
|
||||
self.lines = self.data
|
||||
else:
|
||||
self.lines = self.data["results"]
|
||||
self.score_name = self.data["score_name"]
|
||||
self.__compute_best_results_ever()
|
||||
|
||||
def __compute_best_results_ever(self):
|
||||
args = EnvData.load()
|
||||
key = args["source_data"]
|
||||
best = BestResultsEver()
|
||||
self.best_score_name, self.best_score_value = best.get_name_value(
|
||||
key, self.score_name
|
||||
)
|
||||
|
||||
def _get_accuracy(self, item):
|
||||
return self.data[item][0] if self.best_acc_file else item["score"]
|
||||
@@ -68,6 +103,12 @@ class BaseReport(abc.ABC):
|
||||
}
|
||||
return meaning[status]
|
||||
|
||||
def _get_best_accuracy(self):
|
||||
return self.best_score_value
|
||||
|
||||
def _get_message_best_accuracy(self):
|
||||
return f"{self.score_name} compared to {self.best_score_name} .:"
|
||||
|
||||
@abc.abstractmethod
|
||||
def header(self) -> None:
|
||||
pass
|
||||
@@ -82,10 +123,10 @@ class BaseReport(abc.ABC):
|
||||
|
||||
|
||||
class Report(BaseReport):
|
||||
header_lengths = [30, 5, 5, 3, 7, 7, 7, 15, 16, 15]
|
||||
header_lengths = [30, 6, 5, 3, 7, 7, 7, 15, 16, 15]
|
||||
header_cols = [
|
||||
"Dataset",
|
||||
"Samp",
|
||||
"Sampl.",
|
||||
"Feat.",
|
||||
"Cls",
|
||||
"Nodes",
|
||||
@@ -155,7 +196,8 @@ class Report(BaseReport):
|
||||
self._compare_totals = {}
|
||||
self.header_line("*")
|
||||
self.header_line(
|
||||
f" Report {self.data['model']} ver. {self.data['version']}"
|
||||
f" {self.data['model']} ver. {self.data['version']}"
|
||||
f" {self.data['language']} ver. {self.data['language_version']}"
|
||||
f" with {self.data['folds']} Folds "
|
||||
f"cross validation and {len(self.data['seeds'])} random seeds. "
|
||||
f"{self.data['date']} {self.data['time']}"
|
||||
@@ -187,8 +229,8 @@ class Report(BaseReport):
|
||||
f" {key} {self._status_meaning(key)} .....: {value:2d}"
|
||||
)
|
||||
self.header_line(
|
||||
f" Accuracy compared to stree_default (liblinear-ovr) .: "
|
||||
f"{accuracy/BEST_ACCURACY_STREE:7.4f}"
|
||||
f" {self._get_message_best_accuracy()} "
|
||||
f"{accuracy/self._get_best_accuracy():7.4f}"
|
||||
)
|
||||
self.header_line("*")
|
||||
|
||||
@@ -208,12 +250,12 @@ class ReportBest(BaseReport):
|
||||
if best
|
||||
else Files.grid_output(score, model)
|
||||
)
|
||||
file_name = os.path.join(Folders.results, name)
|
||||
self.best = best
|
||||
self.grid = grid
|
||||
file_name = os.path.join(Folders.results, name)
|
||||
super().__init__(file_name, best_file=True)
|
||||
self.score_name = score
|
||||
self.model = model
|
||||
super().__init__(file_name, best_file=True)
|
||||
|
||||
def header_line(self, text: str) -> None:
|
||||
length = sum(self.header_lengths) + len(self.header_lengths) - 3
|
||||
@@ -253,8 +295,8 @@ class ReportBest(BaseReport):
|
||||
def footer(self, accuracy):
|
||||
self.header_line("*")
|
||||
self.header_line(
|
||||
f" Scores compared to stree_default accuracy (liblinear-ovr) .: "
|
||||
f"{accuracy/BEST_ACCURACY_STREE:7.4f}"
|
||||
f" {self._get_message_best_accuracy()} "
|
||||
f"{accuracy/self._get_best_accuracy():7.4f}"
|
||||
)
|
||||
self.header_line("*")
|
||||
|
||||
@@ -306,7 +348,8 @@ class Excel(BaseReport):
|
||||
|
||||
def get_title(self):
|
||||
return (
|
||||
f" Report {self.data['model']} ver. {self.data['version']}"
|
||||
f" {self.data['model']} ver. {self.data['version']}"
|
||||
f" {self.data['language']} ver. {self.data['language_version']}"
|
||||
f" with {self.data['folds']} Folds "
|
||||
f"cross validation and {len(self.data['seeds'])} random seeds. "
|
||||
f"{self.data['date']} {self.data['time']}"
|
||||
@@ -508,8 +551,8 @@ class Excel(BaseReport):
|
||||
self.sheet.write(self.row, 3, self._status_meaning(key), bold)
|
||||
self.row += 1
|
||||
message = (
|
||||
f"** Accuracy compared to stree_default (liblinear-ovr) .: "
|
||||
f"{accuracy/BEST_ACCURACY_STREE:7.4f}"
|
||||
f"** {self._get_message_best_accuracy()} "
|
||||
f"{accuracy/self._get_best_accuracy():7.4f}"
|
||||
)
|
||||
bold = self.book.add_format({"bold": True, "font_size": 14})
|
||||
# set width of the hyperparams column with the maximum width
|
||||
@@ -534,8 +577,8 @@ class ReportDatasets:
|
||||
data_sets = Datasets()
|
||||
color_line = TextColor.LINE1
|
||||
print(color_line, end="")
|
||||
print(f"{'Dataset':30s} Samp. Feat. Cls Balance")
|
||||
print("=" * 30 + " ===== ===== === " + "=" * 40)
|
||||
print(f"{'Dataset':30s} Sampl. Feat. Cls Balance")
|
||||
print("=" * 30 + " ===== ====== === " + "=" * 40)
|
||||
for dataset in data_sets:
|
||||
X, y = data_sets.load(dataset)
|
||||
color_line = (
|
||||
@@ -551,7 +594,7 @@ class ReportDatasets:
|
||||
sep = "/ "
|
||||
print(color_line, end="")
|
||||
print(
|
||||
f"{dataset:30s} {X.shape[0]:5,d} {X.shape[1]:5,d} "
|
||||
f"{dataset:30s} {X.shape[0]:6,d} {X.shape[1]:5,d} "
|
||||
f"{len(np.unique(y)):3d} {comp:40s}"
|
||||
)
|
||||
|
||||
@@ -633,6 +676,13 @@ class Benchmark:
|
||||
self._report = {}
|
||||
self._datasets = set()
|
||||
self.visualize = visualize
|
||||
self.__compute_best_results_ever()
|
||||
|
||||
def __compute_best_results_ever(self):
|
||||
args = EnvData.load()
|
||||
key = args["source_data"]
|
||||
best = BestResultsEver()
|
||||
_, self.best_score_value = best.get_name_value(key, self._score)
|
||||
|
||||
def get_result_file_name(self):
|
||||
return os.path.join(Folders.exreport, Files.exreport(self._score))
|
||||
@@ -970,7 +1020,7 @@ class Benchmark:
|
||||
sheet.write_formula(
|
||||
row,
|
||||
col + 1,
|
||||
f"=sum({range_metric})/{BEST_ACCURACY_STREE}",
|
||||
f"=sum({range_metric})/{self.best_score_value}",
|
||||
decimal_total,
|
||||
)
|
||||
range_rank = (
|
||||
@@ -1062,7 +1112,7 @@ class StubReport(BaseReport):
|
||||
|
||||
def footer(self, accuracy: float) -> None:
|
||||
self.accuracy = accuracy
|
||||
self.score = accuracy / BEST_ACCURACY_STREE
|
||||
self.score = accuracy / self._get_best_accuracy()
|
||||
|
||||
|
||||
class Summary:
|
||||
|
@@ -1,7 +1,8 @@
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
BEST_ACCURACY_STREE = 40.282203
|
||||
PYTHON_VERSION = "{}.{}".format(sys.version_info.major, sys.version_info.minor)
|
||||
NO_RESULTS = "** No results found **"
|
||||
NO_ENV = "File .env not found"
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
from .Experiments import Experiment, Datasets, DatasetsSurcov, DatasetsTanveer
|
||||
from .Datasets import Datasets, DatasetsSurcov, DatasetsTanveer, DatasetsArff
|
||||
from .Experiments import Experiment
|
||||
from .Results import Report, Summary
|
||||
|
||||
__author__ = "Ricardo Montañana Gómez"
|
||||
|
@@ -1 +1 @@
|
||||
__version__ = "0.1.1"
|
||||
__version__ = "0.2.0"
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
from benchmark.Results import ReportBest
|
||||
from benchmark.Experiments import Datasets, BestResults
|
||||
from benchmark.Experiments import BestResults
|
||||
from benchmark.Datasets import Datasets
|
||||
from benchmark.Arguments import Arguments
|
||||
|
||||
"""Build a json file with the best results of a model and its hyperparameters
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
from benchmark.Experiments import GridSearch, Datasets
|
||||
from benchmark.Experiments import GridSearch
|
||||
from benchmark.Datasets import Datasets
|
||||
from benchmark.Arguments import Arguments
|
||||
|
||||
"""Do experiment and build result file, optionally print report with results
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
from benchmark.Experiments import Experiment, Datasets
|
||||
from benchmark.Experiments import Experiment
|
||||
from benchmark.Datasets import Datasets
|
||||
from benchmark.Results import Report
|
||||
from benchmark.Arguments import Arguments
|
||||
|
||||
|
@@ -3,7 +3,7 @@ import os
|
||||
import json
|
||||
from stree import Stree
|
||||
from graphviz import Source
|
||||
from benchmark.Experiments import Datasets
|
||||
from benchmark.Datasets import Datasets
|
||||
from benchmark.Utils import Files, Folders
|
||||
from benchmark.Arguments import Arguments
|
||||
|
||||
|
@@ -5,3 +5,4 @@ model=ODTE
|
||||
stratified=0
|
||||
# Source of data Tanveer/Surcov
|
||||
source_data=Tanveer
|
||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
|
7
benchmark/tests/.env.arff
Normal file
7
benchmark/tests/.env.arff
Normal file
@@ -0,0 +1,7 @@
|
||||
score=accuracy
|
||||
platform=MacBookpro16
|
||||
n_folds=5
|
||||
model=ODTE
|
||||
stratified=0
|
||||
source_data=Arff
|
||||
seeds=[271, 314, 171]
|
@@ -5,3 +5,4 @@ model=ODTE
|
||||
stratified=0
|
||||
# Source of data Tanveer/Surcov
|
||||
source_data=Tanveer
|
||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
|
@@ -5,3 +5,4 @@ model=ODTE
|
||||
stratified=0
|
||||
# Source of data Tanveer/Surcov
|
||||
source_data=Surcov
|
||||
seeds=[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
from .TestBase import TestBase
|
||||
from ..Experiments import BestResults, Datasets
|
||||
from ..Experiments import BestResults
|
||||
from ..Datasets import Datasets
|
||||
|
||||
|
||||
class BestResultTest(TestBase):
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import shutil
|
||||
from .TestBase import TestBase
|
||||
from ..Experiments import Randomized, Datasets
|
||||
from ..Experiments import Randomized
|
||||
from ..Datasets import Datasets
|
||||
|
||||
|
||||
class DatasetTest(TestBase):
|
||||
@@ -22,12 +23,18 @@ class DatasetTest(TestBase):
|
||||
|
||||
def test_Randomized(self):
|
||||
expected = [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]
|
||||
self.assertSequenceEqual(Randomized.seeds, expected)
|
||||
self.assertSequenceEqual(Randomized.seeds(), expected)
|
||||
|
||||
def test_Randomized_3_seeds(self):
|
||||
self.set_env(".env.arff")
|
||||
expected = [271, 314, 171]
|
||||
self.assertSequenceEqual(Randomized.seeds(), expected)
|
||||
|
||||
def test_Datasets_iterator(self):
|
||||
test = {
|
||||
".env.dist": ["balance-scale", "balloons"],
|
||||
".env.surcov": ["iris", "wine"],
|
||||
".env.arff": ["iris", "wine"],
|
||||
}
|
||||
for key, value in test.items():
|
||||
self.set_env(key)
|
||||
@@ -51,6 +58,11 @@ class DatasetTest(TestBase):
|
||||
self.assertSequenceEqual(X.shape, (625, 4))
|
||||
self.assertSequenceEqual(y.shape, (625,))
|
||||
|
||||
def test_create_with_unknown_dataset(self):
|
||||
with self.assertRaises(ValueError) as msg:
|
||||
Datasets("unknown")
|
||||
self.assertEqual(str(msg.exception), "Unknown dataset: unknown")
|
||||
|
||||
def test_load_unknown_dataset(self):
|
||||
dt = Datasets()
|
||||
with self.assertRaises(ValueError) as msg:
|
||||
@@ -61,6 +73,7 @@ class DatasetTest(TestBase):
|
||||
test = {
|
||||
".env.dist": "balloons",
|
||||
".env.surcov": "wine",
|
||||
".env.arff": "iris",
|
||||
}
|
||||
for key, value in test.items():
|
||||
self.set_env(key)
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import json
|
||||
from .TestBase import TestBase
|
||||
from ..Experiments import Experiment, Datasets
|
||||
from ..Experiments import Experiment
|
||||
from ..Datasets import Datasets
|
||||
|
||||
|
||||
class ExperimentTest(TestBase):
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import json
|
||||
from .TestBase import TestBase
|
||||
from ..Experiments import GridSearch, Datasets
|
||||
from ..Experiments import GridSearch
|
||||
from ..Datasets import Datasets
|
||||
|
||||
|
||||
class GridSearchTest(TestBase):
|
||||
@@ -77,7 +78,9 @@ class GridSearchTest(TestBase):
|
||||
"v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s",
|
||||
],
|
||||
}
|
||||
self.assertSequenceEqual(data, expected)
|
||||
for key, value in expected.items():
|
||||
self.assertEqual(data[key][0], value[0])
|
||||
self.assertDictEqual(data[key][1], value[1])
|
||||
|
||||
def test_duration_message(self):
|
||||
expected = ["47.234s", "5.421m", "1.177h"]
|
||||
|
@@ -15,6 +15,8 @@ from odte import Odte
|
||||
from xgboost import XGBClassifier
|
||||
from .TestBase import TestBase
|
||||
from ..Models import Models
|
||||
import xgboost
|
||||
import sklearn
|
||||
|
||||
|
||||
class ModelTest(TestBase):
|
||||
@@ -33,6 +35,38 @@ class ModelTest(TestBase):
|
||||
for key, value in test.items():
|
||||
self.assertIsInstance(Models.get_model(key), value)
|
||||
|
||||
def test_Models_version(self):
|
||||
def ver_stree():
|
||||
return "1.2.3"
|
||||
|
||||
def ver_wodt():
|
||||
return "h.j.k"
|
||||
|
||||
def ver_odte():
|
||||
return "4.5.6"
|
||||
|
||||
test = {
|
||||
"STree": [ver_stree, "1.2.3"],
|
||||
"Wodt": [ver_wodt, "h.j.k"],
|
||||
"ODTE": [ver_odte, "4.5.6"],
|
||||
"RandomForest": [None, "7.8.9"],
|
||||
"BaggingStree": [None, "x.y.z"],
|
||||
"AdaBoostStree": [None, "w.x.z"],
|
||||
"XGBoost": [None, "10.11.12"],
|
||||
}
|
||||
for key, value in test.items():
|
||||
clf = Models.get_model(key)
|
||||
if key in ["STree", "Wodt", "ODTE"]:
|
||||
clf.version = value[0]
|
||||
elif key == "XGBoost":
|
||||
xgboost.__version__ = value[1]
|
||||
else:
|
||||
sklearn.__version__ = value[1]
|
||||
self.assertEqual(Models.get_version(key, clf), value[1])
|
||||
|
||||
def test_bogus_Model_Version(self):
|
||||
self.assertEqual(Models.get_version("unknown", None), "Error")
|
||||
|
||||
def test_BaggingStree(self):
|
||||
clf = Models.get_model("BaggingStree")
|
||||
self.assertIsInstance(clf, BaggingClassifier)
|
||||
|
@@ -75,7 +75,17 @@ class ReportTest(TestBase):
|
||||
report = ReportBest("accuracy", "STree", best=False, grid=True)
|
||||
with patch(self.output, new=StringIO()) as stdout:
|
||||
report.report()
|
||||
self.check_output_file(stdout, "report_grid")
|
||||
file_name = "report_grid.test"
|
||||
with open(os.path.join(self.test_files, file_name)) as f:
|
||||
expected = f.read().splitlines()
|
||||
output_text = stdout.getvalue().splitlines()
|
||||
# Compare replacing STree version
|
||||
for line, index in zip(expected, range(len(expected))):
|
||||
if "1.2.4" in line:
|
||||
# replace STree version
|
||||
line = self.replace_STree_version(line, output_text, index)
|
||||
|
||||
self.assertEqual(line, output_text[index])
|
||||
|
||||
def test_report_best_both(self):
|
||||
report = ReportBest("accuracy", "STree", best=True, grid=True)
|
||||
|
@@ -14,6 +14,7 @@ class TestBase(unittest.TestCase):
|
||||
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
||||
self.test_files = "test_files"
|
||||
self.output = "sys.stdout"
|
||||
self.ext = ".test"
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def remove_files(self, files, folder):
|
||||
@@ -31,7 +32,7 @@ class TestBase(unittest.TestCase):
|
||||
print(f'{row};{col};"{value}"', file=f)
|
||||
|
||||
def check_excel_sheet(self, sheet, file_name):
|
||||
file_name += ".test"
|
||||
file_name += self.ext
|
||||
with open(os.path.join(self.test_files, file_name), "r") as f:
|
||||
expected = csv.reader(f, delimiter=";")
|
||||
for row, col, value in expected:
|
||||
@@ -45,15 +46,20 @@ class TestBase(unittest.TestCase):
|
||||
self.assertEqual(sheet.cell(int(row), int(col)).value, value)
|
||||
|
||||
def check_output_file(self, output, file_name):
|
||||
file_name += ".test"
|
||||
file_name += self.ext
|
||||
with open(os.path.join(self.test_files, file_name)) as f:
|
||||
expected = f.read()
|
||||
self.assertEqual(output.getvalue(), expected)
|
||||
|
||||
@staticmethod
|
||||
def replace_STree_version(line, output, index):
|
||||
idx = line.find("1.2.4")
|
||||
return line.replace("1.2.4", output[index][idx : idx + 5])
|
||||
|
||||
def check_file_file(self, computed_file, expected_file):
|
||||
with open(computed_file) as f:
|
||||
computed = f.read()
|
||||
expected_file += ".test"
|
||||
expected_file += self.ext
|
||||
with open(os.path.join(self.test_files, expected_file)) as f:
|
||||
expected = f.read()
|
||||
self.assertEqual(computed, expected)
|
||||
|
@@ -178,6 +178,7 @@ class UtilTest(TestBase):
|
||||
"model": "ODTE",
|
||||
"stratified": "0",
|
||||
"source_data": "Tanveer",
|
||||
"seeds": "[57, 31, 1714, 17, 23, 79, 83, 97, 7, 1]",
|
||||
}
|
||||
computed = EnvData().load()
|
||||
self.assertDictEqual(computed, expected)
|
||||
|
@@ -1,2 +1,2 @@
|
||||
iris
|
||||
wine
|
||||
iris,class
|
||||
wine,class
|
||||
|
305
benchmark/tests/datasets/hayes-roth.arff
Executable file
305
benchmark/tests/datasets/hayes-roth.arff
Executable file
@@ -0,0 +1,305 @@
|
||||
% 1. Title: Hayes-Roth & Hayes-Roth (1977) Database
|
||||
%
|
||||
% 2. Source Information:
|
||||
% (a) Creators: Barbara and Frederick Hayes-Roth
|
||||
% (b) Donor: David W. Aha (aha@ics.uci.edu) (714) 856-8779
|
||||
% (c) Date: March, 1989
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% 1. Hayes-Roth, B., & Hayes-Roth, F. (1977). Concept learning and the
|
||||
% recognition and classification of exemplars. Journal of Verbal Learning
|
||||
% and Verbal Behavior, 16, 321-338.
|
||||
% -- Results:
|
||||
% -- Human subjects classification and recognition performance:
|
||||
% 1. decreases with distance from the prototype,
|
||||
% 2. is better on unseen prototypes than old instances, and
|
||||
% 3. improves with presentation frequency during learning.
|
||||
% 2. Anderson, J.R., & Kline, P.J. (1979). A learning system and its
|
||||
% psychological implications. In Proceedings of the Sixth International
|
||||
% Joint Conference on Artificial Intelligence (pp. 16-21). Tokyo, Japan:
|
||||
% Morgan Kaufmann.
|
||||
% -- Partitioned the results into 4 classes:
|
||||
% 1. prototypes
|
||||
% 2. near-prototypes with high presentation frequency during learning
|
||||
% 3. near-prototypes with low presentation frequency during learning
|
||||
% 4. instances that are far from protoypes
|
||||
% -- Described evidence that ACT's classification confidence and
|
||||
% recognition behaviors closely simulated human subjects' behaviors.
|
||||
% 3. Aha, D.W. (1989). Incremental learning of independent, overlapping, and
|
||||
% graded concept descriptions with an instance-based process framework.
|
||||
% Manuscript submitted for publication.
|
||||
% -- Used same partition as Anderson & Kline
|
||||
% -- Described evidence that Bloom's classification confidence behavior
|
||||
% is similar to the human subjects' behavior. Bloom fitted the data
|
||||
% more closely than did ACT.
|
||||
%
|
||||
% 4. Relevant Information:
|
||||
% This database contains 5 numeric-valued attributes. Only a subset of
|
||||
% 3 are used during testing (the latter 3). Furthermore, only 2 of the
|
||||
% 3 concepts are "used" during testing (i.e., those with the prototypes
|
||||
% 000 and 111). I've mapped all values to their zero-indexing equivalents.
|
||||
%
|
||||
% Some instances could be placed in either category 0 or 1. I've followed
|
||||
% the authors' suggestion, placing them in each category with equal
|
||||
% probability.
|
||||
%
|
||||
% I've replaced the actual values of the attributes (i.e., hobby has values
|
||||
% chess, sports and stamps) with numeric values. I think this is how
|
||||
% the authors' did this when testing the categorization models described
|
||||
% in the paper. I find this unfair. While the subjects were able to bring
|
||||
% background knowledge to bear on the attribute values and their
|
||||
% relationships, the algorithms were provided with no such knowledge. I'm
|
||||
% uncertain whether the 2 distractor attributes (name and hobby) are
|
||||
% presented to the authors' algorithms during testing. However, it is clear
|
||||
% that only the age, educational status, and marital status attributes are
|
||||
% given during the human subjects' transfer tests.
|
||||
%
|
||||
% 5. Number of Instances: 132 training instances, 28 test instances
|
||||
%
|
||||
% 6. Number of Attributes: 5 plus the class membership attribute. 3 concepts.
|
||||
%
|
||||
% 7. Attribute Information:
|
||||
% -- 1. name: distinct for each instance and represented numerically
|
||||
% -- 2. hobby: nominal values ranging between 1 and 3
|
||||
% -- 3. age: nominal values ranging between 1 and 4
|
||||
% -- 4. educational level: nominal values ranging between 1 and 4
|
||||
% -- 5. marital status: nominal values ranging between 1 and 4
|
||||
% -- 6. class: nominal value between 1 and 3
|
||||
%
|
||||
% 9. Missing Attribute Values: none
|
||||
%
|
||||
% 10. Class Distribution: see below
|
||||
%
|
||||
% 11. Detailed description of the experiment:
|
||||
% 1. 3 categories (1, 2, and neither -- which I call 3)
|
||||
% -- some of the instances could be classified in either class 1 or 2, and
|
||||
% they have been evenly distributed between the two classes
|
||||
% 2. 5 Attributes
|
||||
% -- A. name (a randomly-generated number between 1 and 132)
|
||||
% -- B. hobby (a randomly-generated number between 1 and 3)
|
||||
% -- C. age (a number between 1 and 4)
|
||||
% -- D. education level (a number between 1 and 4)
|
||||
% -- E. marital status (a number between 1 and 4)
|
||||
% 3. Classification:
|
||||
% -- only attributes C-E are diagnostic; values for A and B are ignored
|
||||
% -- Class Neither: if a 4 occurs for any attribute C-E
|
||||
% -- Class 1: Otherwise, if (# of 1's)>(# of 2's) for attributes C-E
|
||||
% -- Class 2: Otherwise, if (# of 2's)>(# of 1's) for attributes C-E
|
||||
% -- Either 1 or 2: Otherwise, if (# of 2's)=(# of 1's) for attributes C-E
|
||||
% 4. Prototypes:
|
||||
% -- Class 1: 111
|
||||
% -- Class 2: 222
|
||||
% -- Class Either: 333
|
||||
% -- Class Neither: 444
|
||||
% 5. Number of training instances: 132
|
||||
% -- Each instance presented 0, 1, or 10 times
|
||||
% -- None of the prototypes seen during training
|
||||
% -- 3 instances from each of categories 1, 2, and either are repeated
|
||||
% 10 times each
|
||||
% -- 3 additional instances from the Either category are shown during
|
||||
% learning
|
||||
% 5. Number of test instances: 28
|
||||
% -- All 9 class 1
|
||||
% -- All 9 class 2
|
||||
% -- All 6 class Either
|
||||
% -- All 4 prototypes
|
||||
% --------------------
|
||||
% -- 28 total
|
||||
%
|
||||
% Observations of interest:
|
||||
% 1. Relative classification confidence of
|
||||
% -- prototypes for classes 1 and 2 (2 instances)
|
||||
% (Anderson calls these Class 1 instances)
|
||||
% -- instances of class 1 with frequency 10 during training and
|
||||
% instances of class 2 with frequency 10 during training that
|
||||
% are 1 value away from their respective prototypes (6 instances)
|
||||
% (Anderson calls these Class 2 instances)
|
||||
% -- instances of class 1 with frequency 1 during training and
|
||||
% instances of class 2 with frequency 1 during training that
|
||||
% are 1 value away from their respective prototypes (6 instances)
|
||||
% (Anderson calls these Class 3 instances)
|
||||
% -- instances of class 1 with frequency 1 during training and
|
||||
% instances of class 2 with frequency 1 during training that
|
||||
% are 2 values away from their respective prototypes (6 instances)
|
||||
% (Anderson calls these Class 4 instances)
|
||||
% 2. Relative classification recognition of them also
|
||||
%
|
||||
% Some Expected results:
|
||||
% Both frequency and distance from prototype will effect the classification
|
||||
% accuracy of instances. Greater the frequency, higher the classification
|
||||
% confidence. Closer to prototype, higher the classification confidence.
|
||||
%
|
||||
% Information about the dataset
|
||||
% CLASSTYPE: nominal
|
||||
% CLASSINDEX: last
|
||||
%
|
||||
|
||||
@relation hayes-roth
|
||||
|
||||
@attribute hobby INTEGER
|
||||
@attribute age INTEGER
|
||||
@attribute educational_level INTEGER
|
||||
@attribute marital_status INTEGER
|
||||
@attribute class {1,2,3,4}
|
||||
|
||||
@data
|
||||
2,1,1,2,1
|
||||
2,1,3,2,2
|
||||
3,1,4,1,3
|
||||
2,4,2,2,3
|
||||
1,1,3,4,3
|
||||
1,1,3,2,2
|
||||
3,1,3,2,2
|
||||
3,4,2,4,3
|
||||
2,2,1,1,1
|
||||
3,2,1,1,1
|
||||
1,2,1,1,1
|
||||
2,2,3,4,3
|
||||
1,1,2,1,1
|
||||
2,1,2,2,2
|
||||
2,4,1,4,3
|
||||
1,1,3,3,1
|
||||
3,2,1,2,2
|
||||
1,2,1,1,1
|
||||
3,3,2,1,1
|
||||
3,1,3,2,1
|
||||
1,2,2,1,2
|
||||
3,2,1,3,1
|
||||
2,1,2,1,1
|
||||
3,2,1,3,1
|
||||
2,3,2,1,1
|
||||
3,2,2,1,2
|
||||
3,2,1,3,2
|
||||
2,1,2,2,2
|
||||
1,1,3,2,1
|
||||
3,2,1,1,1
|
||||
1,4,1,1,3
|
||||
2,2,1,3,1
|
||||
1,2,1,3,2
|
||||
1,1,1,2,1
|
||||
2,4,3,1,3
|
||||
3,1,2,2,2
|
||||
1,1,2,2,2
|
||||
3,2,2,1,2
|
||||
1,2,1,2,2
|
||||
3,4,3,2,3
|
||||
2,2,2,1,2
|
||||
2,2,1,2,2
|
||||
3,2,1,3,2
|
||||
3,2,1,1,1
|
||||
3,1,2,1,1
|
||||
1,2,1,3,2
|
||||
2,1,1,2,1
|
||||
1,1,1,2,1
|
||||
1,2,2,3,2
|
||||
3,3,1,1,1
|
||||
3,3,3,1,1
|
||||
3,2,1,2,2
|
||||
3,2,1,2,2
|
||||
3,1,2,1,1
|
||||
1,1,1,2,1
|
||||
2,1,3,2,1
|
||||
2,2,2,1,2
|
||||
2,1,2,1,1
|
||||
2,2,1,3,1
|
||||
2,1,2,2,2
|
||||
1,2,4,2,3
|
||||
2,2,1,2,2
|
||||
1,1,2,4,3
|
||||
1,3,2,1,1
|
||||
2,4,4,2,3
|
||||
2,3,2,1,1
|
||||
3,1,2,2,2
|
||||
1,1,2,2,2
|
||||
1,3,2,4,3
|
||||
1,1,2,2,2
|
||||
3,1,4,2,3
|
||||
2,1,3,2,2
|
||||
1,1,3,2,2
|
||||
3,1,3,2,1
|
||||
1,2,4,4,3
|
||||
1,4,2,1,3
|
||||
2,1,2,1,1
|
||||
3,4,1,2,3
|
||||
2,2,1,1,1
|
||||
1,1,2,1,1
|
||||
2,2,4,3,3
|
||||
3,1,2,2,2
|
||||
1,1,3,2,1
|
||||
1,2,1,3,1
|
||||
1,4,4,1,3
|
||||
3,3,3,2,2
|
||||
2,2,1,3,2
|
||||
3,3,2,1,2
|
||||
1,1,1,3,1
|
||||
2,2,1,2,2
|
||||
2,2,2,1,2
|
||||
2,3,2,3,2
|
||||
1,3,2,1,2
|
||||
2,2,1,2,2
|
||||
1,1,1,2,1
|
||||
3,2,2,1,2
|
||||
3,2,1,1,1
|
||||
1,1,2,1,1
|
||||
3,1,4,4,3
|
||||
3,3,2,1,2
|
||||
2,3,2,1,2
|
||||
2,1,3,1,1
|
||||
1,2,1,2,2
|
||||
3,1,1,2,1
|
||||
2,2,4,1,3
|
||||
1,2,2,1,2
|
||||
2,3,2,1,2
|
||||
2,2,1,4,3
|
||||
1,4,2,3,3
|
||||
2,2,1,1,1
|
||||
1,2,1,1,1
|
||||
2,2,3,2,2
|
||||
1,3,2,1,1
|
||||
3,1,2,1,1
|
||||
3,1,1,2,1
|
||||
3,3,1,4,3
|
||||
2,3,4,1,3
|
||||
1,2,3,3,2
|
||||
3,3,2,2,2
|
||||
3,3,4,2,3
|
||||
1,2,2,1,2
|
||||
2,1,1,4,3
|
||||
3,1,2,2,2
|
||||
3,2,2,4,3
|
||||
2,3,1,3,1
|
||||
2,1,1,2,1
|
||||
3,4,1,3,3
|
||||
1,1,4,3,3
|
||||
2,1,2,1,1
|
||||
1,2,1,2,2
|
||||
1,2,2,1,2
|
||||
3,1,1,2,1
|
||||
1,1,1,2,1
|
||||
1,1,2,1,1
|
||||
1,2,1,1,1
|
||||
1,1,1,3,1
|
||||
1,1,3,1,1
|
||||
1,3,1,1,1
|
||||
1,1,3,3,1
|
||||
1,3,1,3,1
|
||||
1,3,3,1,1
|
||||
1,2,2,1,2
|
||||
1,2,1,2,2
|
||||
1,1,2,2,2
|
||||
1,2,2,3,2
|
||||
1,2,3,2,2
|
||||
1,3,2,2,2
|
||||
1,2,3,3,2
|
||||
1,3,2,3,2
|
||||
1,3,3,2,2
|
||||
1,1,3,2,1
|
||||
1,3,2,1,2
|
||||
1,2,1,3,1
|
||||
1,2,3,1,2
|
||||
1,1,2,3,1
|
||||
1,3,1,2,2
|
||||
1,1,1,1,1
|
||||
1,2,2,2,2
|
||||
1,3,3,3,1
|
||||
1,4,4,4,3
|
225
benchmark/tests/datasets/iris.arff
Executable file
225
benchmark/tests/datasets/iris.arff
Executable file
@@ -0,0 +1,225 @@
|
||||
% 1. Title: Iris Plants Database
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Creator: R.A. Fisher
|
||||
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
|
||||
% (c) Date: July, 1988
|
||||
%
|
||||
% 3. Past Usage:
|
||||
% - Publications: too many to mention!!! Here are a few.
|
||||
% 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
|
||||
% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
|
||||
% to Mathematical Statistics" (John Wiley, NY, 1950).
|
||||
% 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
|
||||
% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.
|
||||
% 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
|
||||
% Structure and Classification Rule for Recognition in Partially Exposed
|
||||
% Environments". IEEE Transactions on Pattern Analysis and Machine
|
||||
% Intelligence, Vol. PAMI-2, No. 1, 67-71.
|
||||
% -- Results:
|
||||
% -- very low misclassification rates (0% for the setosa class)
|
||||
% 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE
|
||||
% Transactions on Information Theory, May 1972, 431-433.
|
||||
% -- Results:
|
||||
% -- very low misclassification rates again
|
||||
% 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II
|
||||
% conceptual clustering system finds 3 classes in the data.
|
||||
%
|
||||
% 4. Relevant Information:
|
||||
% --- This is perhaps the best known database to be found in the pattern
|
||||
% recognition literature. Fisher's paper is a classic in the field
|
||||
% and is referenced frequently to this day. (See Duda & Hart, for
|
||||
% example.) The data set contains 3 classes of 50 instances each,
|
||||
% where each class refers to a type of iris plant. One class is
|
||||
% linearly separable from the other 2; the latter are NOT linearly
|
||||
% separable from each other.
|
||||
% --- Predicted attribute: class of iris plant.
|
||||
% --- This is an exceedingly simple domain.
|
||||
%
|
||||
% 5. Number of Instances: 150 (50 in each of three classes)
|
||||
%
|
||||
% 6. Number of Attributes: 4 numeric, predictive attributes and the class
|
||||
%
|
||||
% 7. Attribute Information:
|
||||
% 1. sepal length in cm
|
||||
% 2. sepal width in cm
|
||||
% 3. petal length in cm
|
||||
% 4. petal width in cm
|
||||
% 5. class:
|
||||
% -- Iris Setosa
|
||||
% -- Iris Versicolour
|
||||
% -- Iris Virginica
|
||||
%
|
||||
% 8. Missing Attribute Values: None
|
||||
%
|
||||
% Summary Statistics:
|
||||
% Min Max Mean SD Class Correlation
|
||||
% sepal length: 4.3 7.9 5.84 0.83 0.7826
|
||||
% sepal width: 2.0 4.4 3.05 0.43 -0.4194
|
||||
% petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)
|
||||
% petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)
|
||||
%
|
||||
% 9. Class Distribution: 33.3% for each of 3 classes.
|
||||
|
||||
@RELATION iris
|
||||
|
||||
@ATTRIBUTE sepallength REAL
|
||||
@ATTRIBUTE sepalwidth REAL
|
||||
@ATTRIBUTE petallength REAL
|
||||
@ATTRIBUTE petalwidth REAL
|
||||
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
|
||||
|
||||
@DATA
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
%
|
||||
%
|
||||
%
|
302
benchmark/tests/datasets/wine.arff
Executable file
302
benchmark/tests/datasets/wine.arff
Executable file
@@ -0,0 +1,302 @@
|
||||
% 1. Title of Database: Wine recognition data
|
||||
% Updated Sept 21, 1998 by C.Blake : Added attribute information
|
||||
%
|
||||
% 2. Sources:
|
||||
% (a) Forina, M. et al, PARVUS - An Extendible Package for Data
|
||||
% Exploration, Classification and Correlation. Institute of Pharmaceutical
|
||||
% and Food Analysis and Technologies, Via Brigata Salerno,
|
||||
% 16147 Genoa, Italy.
|
||||
%
|
||||
% (b) Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au
|
||||
% (c) July 1991
|
||||
% 3. Past Usage:
|
||||
%
|
||||
% (1)
|
||||
% S. Aeberhard, D. Coomans and O. de Vel,
|
||||
% Comparison of Classifiers in High Dimensional Settings,
|
||||
% Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of
|
||||
% Mathematics and Statistics, James Cook University of North Queensland.
|
||||
% (Also submitted to Technometrics).
|
||||
%
|
||||
% The data was used with many others for comparing various
|
||||
% classifiers. The classes are separable, though only RDA
|
||||
% has achieved 100% correct classification.
|
||||
% (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data))
|
||||
% (All results using the leave-one-out technique)
|
||||
%
|
||||
% In a classification context, this is a well posed problem
|
||||
% with "well behaved" class structures. A good data set
|
||||
% for first testing of a new classifier, but not very
|
||||
% challenging.
|
||||
%
|
||||
% (2)
|
||||
% S. Aeberhard, D. Coomans and O. de Vel,
|
||||
% "THE CLASSIFICATION PERFORMANCE OF RDA"
|
||||
% Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of
|
||||
% Mathematics and Statistics, James Cook University of North Queensland.
|
||||
% (Also submitted to Journal of Chemometrics).
|
||||
%
|
||||
% Here, the data was used to illustrate the superior performance of
|
||||
% the use of a new appreciation function with RDA.
|
||||
%
|
||||
% 4. Relevant Information:
|
||||
%
|
||||
% -- These data are the results of a chemical analysis of
|
||||
% wines grown in the same region in Italy but derived from three
|
||||
% different cultivars.
|
||||
% The analysis determined the quantities of 13 constituents
|
||||
% found in each of the three types of wines.
|
||||
%
|
||||
% -- I think that the initial data set had around 30 variables, but
|
||||
% for some reason I only have the 13 dimensional version.
|
||||
% I had a list of what the 30 or so variables were, but a.)
|
||||
% I lost it, and b.), I would not know which 13 variables
|
||||
% are included in the set.
|
||||
%
|
||||
% -- The attributes are (dontated by Riccardo Leardi,
|
||||
% riclea@anchem.unige.it )
|
||||
% 1) Alcohol
|
||||
% 2) Malic acid
|
||||
% 3) Ash
|
||||
% 4) Alcalinity of ash
|
||||
% 5) Magnesium
|
||||
% 6) Total phenols
|
||||
% 7) Flavanoids
|
||||
% 8) Nonflavanoid phenols
|
||||
% 9) Proanthocyanins
|
||||
% 10)Color intensity
|
||||
% 11)Hue
|
||||
% 12)OD280/OD315 of diluted wines
|
||||
% 13)Proline
|
||||
%
|
||||
% 5. Number of Instances
|
||||
%
|
||||
% class 1 59
|
||||
% class 2 71
|
||||
% class 3 48
|
||||
%
|
||||
% 6. Number of Attributes
|
||||
%
|
||||
% 13
|
||||
%
|
||||
% 7. For Each Attribute:
|
||||
%
|
||||
% All attributes are continuous
|
||||
%
|
||||
% No statistics available, but suggest to standardise
|
||||
% variables for certain uses (e.g. for us with classifiers
|
||||
% which are NOT scale invariant)
|
||||
%
|
||||
% NOTE: 1st attribute is class identifier (1-3)
|
||||
%
|
||||
% 8. Missing Attribute Values:
|
||||
%
|
||||
% None
|
||||
%
|
||||
% 9. Class Distribution: number of instances per class
|
||||
%
|
||||
% class 1 59
|
||||
% class 2 71
|
||||
% class 3 48
|
||||
%
|
||||
% Information about the dataset
|
||||
% CLASSTYPE: nominal
|
||||
% CLASSINDEX: first
|
||||
%
|
||||
|
||||
@relation wine
|
||||
|
||||
@attribute class {1,2,3}
|
||||
@attribute Alcohol REAL
|
||||
@attribute Malic_acid REAL
|
||||
@attribute Ash REAL
|
||||
@attribute Alcalinity_of_ash REAL
|
||||
@attribute Magnesium INTEGER
|
||||
@attribute Total_phenols REAL
|
||||
@attribute Flavanoids REAL
|
||||
@attribute Nonflavanoid_phenols REAL
|
||||
@attribute Proanthocyanins REAL
|
||||
@attribute Color_intensity REAL
|
||||
@attribute Hue REAL
|
||||
@attribute OD280/OD315_of_diluted_wines REAL
|
||||
@attribute Proline INTEGER
|
||||
|
||||
@data
|
||||
1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
|
||||
1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
|
||||
1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
|
||||
1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
|
||||
1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
|
||||
1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
|
||||
1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
|
||||
1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
|
||||
1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
|
||||
1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
|
||||
1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
|
||||
1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
|
||||
1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
|
||||
1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
|
||||
1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
|
||||
1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
|
||||
1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
|
||||
1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
|
||||
1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
|
||||
1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
|
||||
1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
|
||||
1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
|
||||
1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
|
||||
1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
|
||||
1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
|
||||
1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
|
||||
1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
|
||||
1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
|
||||
1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
|
||||
1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
|
||||
1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
|
||||
1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
|
||||
1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
|
||||
1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
|
||||
1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
|
||||
1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
|
||||
1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
|
||||
1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
|
||||
1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
|
||||
1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
|
||||
1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
|
||||
1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
|
||||
1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
|
||||
1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
|
||||
1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
|
||||
1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
|
||||
1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
|
||||
1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
|
||||
1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
|
||||
1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
|
||||
1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
|
||||
1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
|
||||
1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
|
||||
1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
|
||||
1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
|
||||
1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
|
||||
1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
|
||||
1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
|
||||
1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
|
||||
2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
|
||||
2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
|
||||
2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
|
||||
2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
|
||||
2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
|
||||
2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
|
||||
2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
|
||||
2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
|
||||
2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
|
||||
2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
|
||||
2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
|
||||
2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
|
||||
2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
|
||||
2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
|
||||
2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
|
||||
2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
|
||||
2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
|
||||
2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
|
||||
2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
|
||||
2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
|
||||
2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
|
||||
2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
|
||||
2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
|
||||
2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
|
||||
2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
|
||||
2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
|
||||
2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
|
||||
2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
|
||||
2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
|
||||
2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
|
||||
2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
|
||||
2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
|
||||
2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
|
||||
2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
|
||||
2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
|
||||
2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
|
||||
2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
|
||||
2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
|
||||
2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
|
||||
2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
|
||||
2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
|
||||
2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
|
||||
2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
|
||||
2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
|
||||
2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
|
||||
2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
|
||||
2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
|
||||
2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
|
||||
2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
|
||||
2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
|
||||
2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
|
||||
2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
|
||||
2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
|
||||
2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
|
||||
2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
|
||||
2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
|
||||
2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
|
||||
2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
|
||||
2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
|
||||
2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
|
||||
2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
|
||||
2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
|
||||
2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
|
||||
2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
|
||||
2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
|
||||
2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
|
||||
2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
|
||||
2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
|
||||
2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
|
||||
2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
|
||||
2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
|
||||
3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
|
||||
3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
|
||||
3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
|
||||
3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
|
||||
3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
|
||||
3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
|
||||
3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
|
||||
3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
|
||||
3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
|
||||
3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
|
||||
3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
|
||||
3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
|
||||
3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
|
||||
3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
|
||||
3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
|
||||
3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
|
||||
3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
|
||||
3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
|
||||
3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
|
||||
3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
|
||||
3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
|
||||
3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
|
||||
3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
|
||||
3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
|
||||
3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
|
||||
3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
|
||||
3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
|
||||
3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
|
||||
3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
|
||||
3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
|
||||
3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
|
||||
3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
|
||||
3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
|
||||
3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
|
||||
3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
|
||||
3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
|
||||
3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
|
||||
3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
|
||||
3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
|
||||
3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
|
||||
3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
|
||||
3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
|
||||
3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
|
||||
3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
|
||||
3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
|
||||
3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
|
||||
3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
|
||||
3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
|
@@ -6,7 +6,7 @@
|
||||
"kernel": "liblinear",
|
||||
"multiclass_strategy": "ovr"
|
||||
},
|
||||
"v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s"
|
||||
"v. 1.3.0, Computed on Test on 2022-02-22 at 12:00:00 took 1s"
|
||||
],
|
||||
"balloons": [
|
||||
0.625,
|
||||
@@ -15,6 +15,6 @@
|
||||
"kernel": "linear",
|
||||
"multiclass_strategy": "ovr"
|
||||
},
|
||||
"v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s"
|
||||
"v. 1.3.0, Computed on Test on 2022-02-22 at 12:00:00 took 1s"
|
||||
]
|
||||
}
|
@@ -3,6 +3,8 @@
|
||||
"title": "Gridsearched hyperparams v022.1b random_init",
|
||||
"model": "ODTE",
|
||||
"version": "0.3.2",
|
||||
"language_version": "3.11x",
|
||||
"language": "Python",
|
||||
"stratified": false,
|
||||
"folds": 5,
|
||||
"date": "2022-04-20",
|
||||
|
@@ -3,6 +3,8 @@
|
||||
"title": "Test default paramters with RandomForest",
|
||||
"model": "RandomForest",
|
||||
"version": "-",
|
||||
"language_version": "3.11x",
|
||||
"language": "Python",
|
||||
"stratified": false,
|
||||
"folds": 5,
|
||||
"date": "2022-01-14",
|
||||
|
@@ -3,6 +3,8 @@
|
||||
"model": "STree",
|
||||
"stratified": false,
|
||||
"folds": 5,
|
||||
"language_version": "3.11x",
|
||||
"language": "Python",
|
||||
"date": "2021-09-30",
|
||||
"time": "11:42:07",
|
||||
"duration": 624.2505249977112,
|
||||
|
@@ -1,6 +1,8 @@
|
||||
{
|
||||
"score_name": "accuracy",
|
||||
"model": "STree",
|
||||
"language": "Python",
|
||||
"language_version": "3.11x",
|
||||
"stratified": false,
|
||||
"folds": 5,
|
||||
"date": "2021-10-27",
|
||||
|
@@ -1,6 +1,8 @@
|
||||
{
|
||||
"score_name": "accuracy",
|
||||
"model": "STree",
|
||||
"language_version": "3.11x",
|
||||
"language": "Python",
|
||||
"stratified": false,
|
||||
"folds": 5,
|
||||
"date": "2021-11-01",
|
||||
|
@@ -55,7 +55,16 @@ class BeReportTest(TestBase):
|
||||
"be_report", ["-s", "accuracy", "-m", "STree", "-g", "1"]
|
||||
)
|
||||
self.assertEqual(stderr.getvalue(), "")
|
||||
self.check_output_file(stdout, "report_grid")
|
||||
file_name = "report_grid.test"
|
||||
with open(os.path.join(self.test_files, file_name)) as f:
|
||||
expected = f.read().splitlines()
|
||||
output_text = stdout.getvalue().splitlines()
|
||||
# Compare replacing STree version
|
||||
for line, index in zip(expected, range(len(expected))):
|
||||
if "1.2.4" in line:
|
||||
# replace STree version
|
||||
line = self.replace_STree_version(line, output_text, index)
|
||||
self.assertEqual(line, output_text[index])
|
||||
|
||||
def test_be_report_best_both(self):
|
||||
stdout, stderr = self.execute_script(
|
||||
|
@@ -7,5 +7,5 @@ Dataset Score File/Message
|
||||
balance-scale 0.963520 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json {'base_estimator__C': 57, 'base_estimator__gamma': 0.1, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}
|
||||
balloons 0.785000 results_accuracy_ODTE_Galgo_2022-04-20_10:52:20_0.json {'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}
|
||||
******************************************************************************************************************************************************************
|
||||
* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0434 *
|
||||
* accuracy compared to STree_default (liblinear-ovr) .: 0.0434 *
|
||||
******************************************************************************************************************************************************************
|
||||
|
@@ -1,16 +1,16 @@
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25 *
|
||||
[94m* test *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 0.80 seconds, 0.00 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m***********************************************************************************************************************
|
||||
[94m************************************************************************************************************************
|
||||
[94m* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:15:25 *
|
||||
[94m* test *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 0.80 seconds, 0.00 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m************************************************************************************************************************
|
||||
|
||||
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'}
|
||||
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Accuracy compared to stree_default (liblinear-ovr) .: 0.0422 *
|
||||
[94m***********************************************************************************************************************
|
||||
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 23.32 12.16 6.44 0.840160±0.0304 0.013745±0.0019 {'splitter': 'best', 'max_features': 'auto'}
|
||||
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000388±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94m************************************************************************************************************************
|
||||
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0422 *
|
||||
[94m************************************************************************************************************************
|
||||
Results in results/results_accuracy_STree_iMac27_2022-05-09_00:15:25_0.json
|
||||
|
@@ -1,16 +1,16 @@
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43 *
|
||||
[94m* test *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 0.48 seconds, 0.00 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m***********************************************************************************************************************
|
||||
[94m************************************************************************************************************************
|
||||
[94m* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 20:14:43 *
|
||||
[94m* test *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 0.48 seconds, 0.00 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m************************************************************************************************************************
|
||||
|
||||
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {}
|
||||
[94mballoons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {}
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Accuracy compared to stree_default (liblinear-ovr) .: 0.0390 *
|
||||
[94m***********************************************************************************************************************
|
||||
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 17.36 9.18 6.18 0.908480±0.0247 0.007388±0.0013 {}
|
||||
[94mballoons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000664±0.0002 {}
|
||||
[94m************************************************************************************************************************
|
||||
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0390 *
|
||||
[94m************************************************************************************************************************
|
||||
Results in results/results_accuracy_STree_iMac27_2022-05-08_20:14:43_0.json
|
||||
|
@@ -1,15 +1,15 @@
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28 *
|
||||
[94m* test *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 0.06 seconds, 0.00 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m***********************************************************************************************************************
|
||||
[94m************************************************************************************************************************
|
||||
[94m* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-08 19:38:28 *
|
||||
[94m* test *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 0.06 seconds, 0.00 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m************************************************************************************************************************
|
||||
|
||||
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mballoons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {}
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Accuracy compared to stree_default (liblinear-ovr) .: 0.0165 *
|
||||
[94m***********************************************************************************************************************
|
||||
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mballoons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000671±0.0001 {}
|
||||
[94m************************************************************************************************************************
|
||||
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0165 *
|
||||
[94m************************************************************************************************************************
|
||||
Partial result file removed: results/results_accuracy_STree_iMac27_2022-05-08_19:38:28_0.json
|
||||
|
@@ -1,16 +1,16 @@
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Report STree ver. 1.2.4 with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06 *
|
||||
[94m* test *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 0.89 seconds, 0.00 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m***********************************************************************************************************************
|
||||
[94m************************************************************************************************************************
|
||||
[94m* STree ver. 1.2.4 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-05-09 00:21:06 *
|
||||
[94m* test *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 0.89 seconds, 0.00 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m************************************************************************************************************************
|
||||
|
||||
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'}
|
||||
[94mballoons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'}
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Accuracy compared to stree_default (liblinear-ovr) .: 0.0391 *
|
||||
[94m***********************************************************************************************************************
|
||||
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 26.12 13.56 7.94 0.910720±0.0249 0.015852±0.0027 {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'}
|
||||
[94mballoons 16 4 2 4.64 2.82 2.66 0.663333±0.3009 0.000640±0.0001 {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'}
|
||||
[94m************************************************************************************************************************
|
||||
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0391 *
|
||||
[94m************************************************************************************************************************
|
||||
Results in results/results_accuracy_STree_iMac27_2022-05-09_00:21:06_0.json
|
||||
|
@@ -1,4 +1,4 @@
|
||||
1;1;" Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07"
|
||||
1;1;" STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07"
|
||||
2;1;" With gridsearched hyperparameters"
|
||||
3;1;" Score is accuracy"
|
||||
3;2;" Execution time"
|
||||
@@ -45,4 +45,4 @@
|
||||
8;10;"0.0008541679382324218"
|
||||
8;11;"3.629469326417878e-05"
|
||||
8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
|
||||
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454"
|
||||
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454"
|
@@ -1,4 +1,4 @@
|
||||
1;1;" Report ODTE ver. 0.3.2 with 5 Folds cross validation and 10 random seeds. 2022-04-20 10:52:20"
|
||||
1;1;" ODTE ver. 0.3.2 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-04-20 10:52:20"
|
||||
2;1;" Gridsearched hyperparams v022.1b random_init"
|
||||
3;1;" Score is accuracy"
|
||||
3;2;" Execution time"
|
||||
@@ -45,4 +45,4 @@
|
||||
8;10;"0.1156062078475952"
|
||||
8;11;"0.0127842418285999"
|
||||
8;12;"{'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}"
|
||||
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0434"
|
||||
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0434"
|
@@ -1,4 +1,4 @@
|
||||
1;1;" Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-10-27 09:40:40"
|
||||
1;1;" STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-10-27 09:40:40"
|
||||
2;1;" default A"
|
||||
3;1;" Score is accuracy"
|
||||
3;2;" Execution time"
|
||||
@@ -43,4 +43,4 @@
|
||||
8;10;"0.02120100021362305"
|
||||
8;11;"0.003526023309468471"
|
||||
8;12;"{'splitter': 'best', 'max_features': 'auto'}"
|
||||
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0416"
|
||||
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0416"
|
@@ -1,4 +1,4 @@
|
||||
1;1;" Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07"
|
||||
1;1;" STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07"
|
||||
2;1;" With gridsearched hyperparameters"
|
||||
3;1;" Score is accuracy"
|
||||
3;2;" Execution time"
|
||||
@@ -49,4 +49,4 @@
|
||||
11;2;"✔"
|
||||
11;3;1
|
||||
11;4;"Equal to best"
|
||||
13;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454"
|
||||
13;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454"
|
@@ -1,4 +1,4 @@
|
||||
1;1;" Report ODTE ver. 0.3.2 with 5 Folds cross validation and 10 random seeds. 2022-04-20 10:52:20"
|
||||
1;1;" ODTE ver. 0.3.2 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-04-20 10:52:20"
|
||||
2;1;" Gridsearched hyperparams v022.1b random_init"
|
||||
3;1;" Score is accuracy"
|
||||
3;2;" Execution time"
|
||||
@@ -45,4 +45,4 @@
|
||||
8;10;"0.1156062078475952"
|
||||
8;11;"0.0127842418285999"
|
||||
8;12;"{'base_estimator__C': 5, 'base_estimator__gamma': 0.14, 'base_estimator__kernel': 'rbf', 'base_estimator__multiclass_strategy': 'ovr', 'n_estimators': 100, 'n_jobs': -1}"
|
||||
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0434"
|
||||
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0434"
|
||||
|
@@ -1,4 +1,4 @@
|
||||
1;1;" Report RandomForest ver. - with 5 Folds cross validation and 10 random seeds. 2022-01-14 12:39:30"
|
||||
1;1;" RandomForest ver. - Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2022-01-14 12:39:30"
|
||||
2;1;" Test default paramters with RandomForest"
|
||||
3;1;" Score is accuracy"
|
||||
3;2;" Execution time"
|
||||
@@ -45,4 +45,4 @@
|
||||
8;10;"0.07016648769378662"
|
||||
8;11;"0.002460508923990468"
|
||||
8;12;"{}"
|
||||
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0363"
|
||||
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0363"
|
||||
|
@@ -1,4 +1,4 @@
|
||||
1;1;" Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07"
|
||||
1;1;" STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07"
|
||||
2;1;" With gridsearched hyperparameters"
|
||||
3;1;" Score is accuracy"
|
||||
3;2;" Execution time"
|
||||
@@ -45,4 +45,4 @@
|
||||
8;10;"0.0008541679382324218"
|
||||
8;11;"3.629469326417878e-05"
|
||||
8;12;"{'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}"
|
||||
10;1;"** Accuracy compared to stree_default (liblinear-ovr) .: 0.0454"
|
||||
10;1;"** accuracy compared to STree_default (liblinear-ovr) .: 0.0454"
|
||||
|
@@ -1,15 +1,15 @@
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
|
||||
[94m* With gridsearched hyperparameters *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m***********************************************************************************************************************
|
||||
[94m************************************************************************************************************************
|
||||
[94m* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
|
||||
[94m* With gridsearched hyperparameters *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m************************************************************************************************************************
|
||||
|
||||
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 *
|
||||
[94m***********************************************************************************************************************
|
||||
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94m************************************************************************************************************************
|
||||
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
|
||||
[94m************************************************************************************************************************
|
||||
|
@@ -7,5 +7,5 @@ Dataset Score File/Message
|
||||
balance-scale 0.980000 results_accuracy_STree_iMac27_2021-10-27_09:40:40_0.json {'splitter': 'best', 'max_features': 'auto'}
|
||||
balloons 0.860000 results_accuracy_STree_iMac27_2021-09-30_11:42:07_0.json {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
******************************************************************************************************************************************************************
|
||||
* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0457 *
|
||||
* accuracy compared to STree_default (liblinear-ovr) .: 0.0457 *
|
||||
******************************************************************************************************************************************************************
|
||||
|
@@ -1,16 +1,16 @@
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* Report STree ver. 1.2.3 with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
|
||||
[94m* With gridsearched hyperparameters *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m***********************************************************************************************************************
|
||||
[94m************************************************************************************************************************
|
||||
[94m* STree ver. 1.2.3 Python ver. 3.11x with 5 Folds cross validation and 10 random seeds. 2021-09-30 11:42:07 *
|
||||
[94m* With gridsearched hyperparameters *
|
||||
[94m* Random seeds: [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] Stratified: False *
|
||||
[94m* Execution took 624.25 seconds, 0.17 hours, on iMac27 *
|
||||
[94m* Score is accuracy *
|
||||
[94m************************************************************************************************************************
|
||||
|
||||
Dataset Samp Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ===== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94m***********************************************************************************************************************
|
||||
[94m* ✔ Equal to best .....: 1 *
|
||||
[94m* Accuracy compared to stree_default (liblinear-ovr) .: 0.0454 *
|
||||
[94m***********************************************************************************************************************
|
||||
Dataset Sampl. Feat. Cls Nodes Leaves Depth Score Time Hyperparameters
|
||||
============================== ====== ===== === ======= ======= ======= =============== ================ ===============
|
||||
[96mbalance-scale 625 4 3 7.00 4.00 3.00 0.970560±0.0150 0.014049±0.0020 {'C': 10000.0, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94mballoons 16 4 2 3.00 2.00 2.00 0.860000±0.2850✔ 0.000854±0.0000 {'C': 7, 'gamma': 0.1, 'kernel': 'rbf', 'max_iter': 10000.0, 'multiclass_strategy': 'ovr'}
|
||||
[94m************************************************************************************************************************
|
||||
[94m* ✔ Equal to best .....: 1 *
|
||||
[94m* accuracy compared to STree_default (liblinear-ovr) .: 0.0454 *
|
||||
[94m************************************************************************************************************************
|
||||
|
@@ -1,4 +1,4 @@
|
||||
[94mDataset Samp. Feat. Cls Balance
|
||||
============================== ===== ===== === ========================================
|
||||
[96mbalance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
|
||||
[94mballoons 16 4 2 56.25%/ 43.75%
|
||||
[94mDataset Sampl. Feat. Cls Balance
|
||||
============================== ===== ====== === ========================================
|
||||
[96mbalance-scale 625 4 3 7.84%/ 46.08%/ 46.08%
|
||||
[94mballoons 16 4 2 56.25%/ 43.75%
|
||||
|
@@ -7,5 +7,5 @@ Dataset Score File/Message
|
||||
balance-scale 0.919995 v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s {'C': 1.0, 'kernel': 'liblinear', 'multiclass_strategy': 'ovr'}
|
||||
balloons 0.625000 v. 1.2.4, Computed on Test on 2022-02-22 at 12:00:00 took 1s {'C': 1.0, 'kernel': 'linear', 'multiclass_strategy': 'ovr'}
|
||||
******************************************************************************************************************************************************************
|
||||
* Scores compared to stree_default accuracy (liblinear-ovr) .: 0.0384 *
|
||||
* accuracy compared to STree_default (liblinear-ovr) .: 0.0384 *
|
||||
******************************************************************************************************************************************************************
|
||||
|
@@ -1,5 +1,6 @@
|
||||
pandas
|
||||
scikit-learn
|
||||
scipy
|
||||
odte
|
||||
mufs
|
||||
xlsxwriter
|
||||
@@ -8,3 +9,4 @@ tqdm
|
||||
xgboost
|
||||
graphviz
|
||||
Wodt @ git+ssh://git@github.com/doctorado-ml/Wodt.git#egg=Wodt
|
||||
unittest-xml-reporting
|
||||
|
Reference in New Issue
Block a user