From d4cfe77b1813c458ab53de0012680ee953b694ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Montan=CC=83ana?= Date: Wed, 10 Mar 2021 01:37:00 +0100 Subject: [PATCH] Add wodt clf Add execution results of RaF, RoF and RRoF Fix fit time in database records --- analysis_mysql.py | 14 +- data/ESWA/results_TBRRoF.sql | 52 ++++++ data/ESWA/results_TBRaF.sql | 52 ++++++ data/ESWA/results_TBRoF.sql | 52 ++++++ data/ESWA/results_base_RRoF.sql | 52 ++++++ data/ESWA/results_base_RaF.sql | 52 ++++++ data/ESWA/results_base_RoF.sql | 52 ++++++ experiment.py | 10 +- experimentation/Database.py | 8 +- experimentation/Models.py | 26 +++ setup.py | 2 +- testwodt.py | 125 ++++++++++++++ wodt/WODT.py | 289 ++++++++++++++++++++++++++++++++ wodt/__init__.py | 5 + 14 files changed, 782 insertions(+), 9 deletions(-) create mode 100644 data/ESWA/results_TBRRoF.sql create mode 100644 data/ESWA/results_TBRaF.sql create mode 100644 data/ESWA/results_TBRoF.sql create mode 100644 data/ESWA/results_base_RRoF.sql create mode 100644 data/ESWA/results_base_RaF.sql create mode 100644 data/ESWA/results_base_RoF.sql create mode 100644 testwodt.py create mode 100644 wodt/WODT.py create mode 100644 wodt/__init__.py diff --git a/analysis_mysql.py b/analysis_mysql.py index c8970d5..4875312 100644 --- a/analysis_mysql.py +++ b/analysis_mysql.py @@ -4,10 +4,18 @@ from experimentation.Sets import Datasets from experimentation.Utils import TextColor from experimentation.Database import MySQL -models_tree = ["stree", "oc1", "cart"] -models_ensemble = ["odte", "adaBoost", "bagging"] +models_tree = [ + "stree", + "wodt", + "oc1", + "cart", + "baseRaF", + "baseRoF", + "baseRRoF", +] +models_ensemble = ["odte", "adaBoost", "bagging", "TBRaF", "TBRoF", "TBRRoF"] title = "Best model results" -lengths = (30, 9, 11, 11, 11) +lengths = (30, 9, 11, 11, 11, 11, 11, 11, 11) def parse_arguments() -> Tuple[str, str, str, bool, bool]: diff --git a/data/ESWA/results_TBRRoF.sql b/data/ESWA/results_TBRRoF.sql new file mode 100644 index 0000000..0bc9d18 --- /dev/null +++ b/data/ESWA/results_TBRRoF.sql @@ -0,0 +1,52 @@ +-- +-- TBRRoF +-- +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.903019,'balance-scale','TBRRoF',1,0,'{}',0.0237096,211011,0.178456); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.6125,'balloons','TBRRoF',1,0,'{}',0.249671,0.115329,0.0147107); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.975059,'breast-cancer-wisc-diag','TBRRoF',1,0,'{}',0.0144574,142675,0.0638186); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.803922,'breast-cancer-wisc-prog','TBRRoF',1,0,'{}',0.0531358,0.973572,0.0399328); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.970509,'breast-cancer-wisc','TBRRoF',1,0,'{}',0.00955804,237296,0.138799); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.720847,'breast-cancer','TBRRoF',1,0,'{}',0.0609055,158353,0.616083); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.827935,'cardiotocography-10clases','TBRRoF',1,0,'{}',0.0165564,250415,174333); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.92117,'cardiotocography-3clases','TBRRoF',1,0,'{}',0.00984313,386489,373011); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.823077,'conn-bench-sonar-mines-rocks','TBRRoF',1,0,'{}',0.0527948,113023,0.0554272); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.775781,'cylinder-bands','TBRRoF',1,0,'{}',0.0338267,360351,0.429748); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.977065,'dermatology','TBRRoF',1,0,'{}',0.0110894,183941,0.385648); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.851027,'echocardiogram','TBRRoF',1,0,'{}',0.0472795,0.354076,0.0261453); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.882,'fertility','TBRRoF',1,0,'{}',0.0587233,0.410455,0.0491118); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.731984,'haberman-survival','TBRRoF',1,0,'{}',0.0429949,0.667255,0.0682103); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.813607,'heart-hungarian','TBRRoF',1,0,'{}',0.0436205,0.680885,0.0547318); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.827054,'hepatitis','TBRRoF',1,0,'{}',0.0580787,0.658639,0.0417014); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.712495,'ilpd-indian-liver','TBRRoF',1,0,'{}',0.0405313,22513,0.111347); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.950421,'ionosphere','TBRRoF',1,0,'{}',0.0171068,125847,0.0400326); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.966701,'iris','TBRRoF',1,0,'{}',0.0272691,0.382659,0.0201222); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.6716,'led-display','TBRRoF',1,0,'{}',0.0774858,37252,0.237682); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.867222,'libras','TBRRoF',1,0,'{}',0.0437779,384402,0.114302); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.896431,'low-res-spect','TBRRoF',1,0,'{}',0.0203116,300936,0.109406); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.854054,'lymphography','TBRRoF',1,0,'{}',0.0640777,0.861888,0.0410658); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.824359,'mammographic','TBRRoF',1,0,'{}',0.0191403,237551,0.122342); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.810714,'molec-biol-promoter','TBRRoF',1,0,'{}',0.0759873,0.523289,0.0118402); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.890756,'musk-1','TBRRoF',1,0,'{}',0.0265738,202355,0.0349188); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.834648,'oocytes_merluccius_nucleus_4d','TBRRoF',1,0,'{}',0.0264323,601944,178713); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.928392,'oocytes_merluccius_states_2f','TBRRoF',1,0,'{}',0.0149997,491221,0.206203); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.833991,'oocytes_trisopterus_nucleus_2f','TBRRoF',1,0,'{}',0.0267839,46184,0.0751242); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.928947,'oocytes_trisopterus_states_5b','TBRRoF',1,0,'{}',0.0136192,42026,0.312106); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.937439,'parkinsons','TBRRoF',1,0,'{}',0.0303885,0.654357,0.0272996); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.767708,'pima','TBRRoF',1,0,'{}',0.0279917,306414,0.204124); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.860577,'pittsburg-bridges-MATERIAL','TBRRoF',1,0,'{}',0.0651088,0.388944,0.0480131); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.693214,'pittsburg-bridges-REL-L','TBRRoF',1,0,'{}',0.0862127,0.705379,0.0433156); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.686957,'pittsburg-bridges-SPAN','TBRRoF',1,0,'{}',0.0624509,0.596839,0.0344775); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.865407,'pittsburg-bridges-T-OR-D','TBRRoF',1,0,'{}',0.0511018,0.359113,0.0464296); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.725485,'planning','TBRRoF',1,0,'{}',0.0576996,10051,0.166844); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.711364,'post-operative','TBRRoF',1,0,'{}',0.0885492,0.103245,0.0142092); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.943056,'seeds','TBRRoF',1,0,'{}',0.0291748,0.524034,0.03172); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.678338,'statlog-australian-credit','TBRRoF',1,0,'{}',0.0285911,0.465842,0.0118528); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.7538,'statlog-german-credit','TBRRoF',1,0,'{}',0.0283282,588563,0.168927); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.834155,'statlog-heart','TBRRoF',1,0,'{}',0.0358614,0.718854,0.0542387); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.975414,'statlog-image','TBRRoF',1,0,'{}',0.00512352,255329,226215); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.788413,'statlog-vehicle','TBRRoF',1,0,'{}',0.0332766,620603,0.112329); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.99,'synthetic-control','TBRRoF',1,0,'{}',0.00458831,266777,0.0519119); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.985791,'tic-tac-toe','TBRRoF',1,0,'{}',0.00819396,445686,0.0959968); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.846523,'vertebral-column-2clases','TBRRoF',1,0,'{}',0.0342551,137499,0.0622581); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.979792,'wine','TBRRoF',1,0,'{}',0.0228782,0.592137,0.0261656); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','12:16:43','crossval',0.960385,'zoo','TBRRoF',1,0,'{}',0.0408897,0.573084,0.0239393); diff --git a/data/ESWA/results_TBRaF.sql b/data/ESWA/results_TBRaF.sql new file mode 100644 index 0000000..4a2b3e0 --- /dev/null +++ b/data/ESWA/results_TBRaF.sql @@ -0,0 +1,52 @@ +-- +-- TBRaF +-- +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.898218,'balance-scale','TBRaF',1,0,'{}',0.0221572,147481,0.0929036); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.5375,'balloons','TBRaF',1,0,'{}',0.203182,0.0605146,0.00610705); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.968724,'breast-cancer-wisc-diag','TBRaF',1,0,'{}',0.0158557,122699,0.0508574); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.802041,'breast-cancer-wisc-prog','TBRaF',1,0,'{}',0.0411827,0.516865,0.0182615); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.968274,'breast-cancer-wisc','TBRaF',1,0,'{}',0.0146783,108741,0.0473096); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.730021,'breast-cancer','TBRaF',1,0,'{}',0.0521928,115929,0.0450501); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.830095,'cardiotocography-10clases','TBRaF',1,0,'{}',0.0210438,282584,199304); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.91675,'cardiotocography-3clases','TBRaF',1,0,'{}',0.0118997,315465,192647); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.806731,'conn-bench-sonar-mines-rocks','TBRaF',1,0,'{}',0.0314914,0.502975,0.0217173); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.750391,'cylinder-bands','TBRaF',1,0,'{}',0.020666,187612,0.0361602); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.978128,'dermatology','TBRaF',1,0,'{}',0.0137608,141583,0.714745); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.839821,'echocardiogram','TBRaF',1,0,'{}',0.0599146,0.273034,0.0293564); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.876,'fertility','TBRaF',1,0,'{}',0.0466115,0.24055,0.0217635); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.733114,'haberman-survival','TBRaF',1,0,'{}',0.0450299,0.450113,0.0427755); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.821607,'heart-hungarian','TBRaF',1,0,'{}',0.0349804,0.826003,0.0330775); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.816431,'hepatitis','TBRaF',1,0,'{}',0.0746159,0.412975,0.0340097); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.714522,'ilpd-indian-liver','TBRaF',1,0,'{}',0.0420083,133141,0.0652264); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.936188,'ionosphere','TBRaF',1,0,'{}',0.0313079,0.658064,0.0344485); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.960949,'iris','TBRaF',1,0,'{}',0.0367069,0.209043,0.0243583); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.7122,'led-display','TBRaF',1,0,'{}',0.0144207,331435,0.0872926); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.855,'libras','TBRaF',1,0,'{}',0.0377184,252853,0.0727154); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.895253,'low-res-spect','TBRaF',1,0,'{}',0.0164526,260787,0.13843); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.837838,'lymphography','TBRaF',1,0,'{}',0.0626212,0.544017,0.038001); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.831024,'mammographic','TBRaF',1,0,'{}',0.0203123,263569,0.0958739); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.816621,'molec-biol-promoter','TBRaF',1,0,'{}',0.0896387,0.216461,0.0123003); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.881513,'musk-1','TBRaF',1,0,'{}',0.0231183,14804,0.0423494); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.837735,'oocytes_merluccius_nucleus_4d','TBRaF',1,0,'{}',0.0253044,34712,0.775161); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.925444,'oocytes_merluccius_states_2f','TBRaF',1,0,'{}',0.0188085,41035,0.189899); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.827851,'oocytes_trisopterus_nucleus_2f','TBRaF',1,0,'{}',0.0269572,297983,0.0558168); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.928509,'oocytes_trisopterus_states_5b','TBRaF',1,0,'{}',0.0149313,29922,0.151926); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.907843,'parkinsons','TBRaF',1,0,'{}',0.0501492,0.422776,0.0247379); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.764583,'pima','TBRaF',1,0,'{}',0.0238137,269342,0.0690004); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.861813,'pittsburg-bridges-MATERIAL','TBRaF',1,0,'{}',0.0622277,0.277335,0.0340988); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.700143,'pittsburg-bridges-REL-L','TBRaF',1,0,'{}',0.0617154,0.494063,0.0271107); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.708696,'pittsburg-bridges-SPAN','TBRaF',1,0,'{}',0.089327,0.440551,0.0219081); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.87237,'pittsburg-bridges-T-OR-D','TBRaF',1,0,'{}',0.069001,0.233546,0.024066); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.714468,'planning','TBRaF',1,0,'{}',0.0697879,0.711834,0.0397762); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.708333,'post-operative','TBRaF',1,0,'{}',0.0870909,0.273825,0.0350431); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.94391,'seeds','TBRaF',1,0,'{}',0.0284646,0.413964,0.0266677); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.678151,'statlog-australian-credit','TBRaF',1,0,'{}',0.0327147,0.625115,0.107057); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.7528,'statlog-german-credit','TBRaF',1,0,'{}',0.0268046,423104,0.0719579); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.83501,'statlog-heart','TBRaF',1,0,'{}',0.0402007,0.738742,0.0372339); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.968223,'statlog-image','TBRaF',1,0,'{}',0.00572038,317869,199838); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.776598,'statlog-vehicle','TBRaF',1,0,'{}',0.0357477,374977,0.0753526); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.988333,'synthetic-control','TBRaF',1,0,'{}',0.00888523,137269,0.033371); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.975981,'tic-tac-toe','TBRaF',1,0,'{}',0.0127253,30795,0.0561163); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.845611,'vertebral-column-2clases','TBRaF',1,0,'{}',0.0408165,0.732599,0.0305218); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.98083,'wine','TBRaF',1,0,'{}',0.0133092,0.282677,0.0104738); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:40:09','crossval',0.946769,'zoo','TBRaF',1,0,'{}',0.0385195,0.333227,0.0293076); diff --git a/data/ESWA/results_TBRoF.sql b/data/ESWA/results_TBRoF.sql new file mode 100644 index 0000000..7dc5315 --- /dev/null +++ b/data/ESWA/results_TBRoF.sql @@ -0,0 +1,52 @@ +-- +-- TBRoF +-- +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.782049,'balance-scale','TBRoF',1,0,'{}',0.165039,191999,0.888319); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.6,'balloons','TBRoF',1,0,'{}',0.318301,0.0945516,0.0317163); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.971164,'breast-cancer-wisc-diag','TBRoF',1,0,'{}',0.013496,101219,0.188329); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.803341,'breast-cancer-wisc-prog','TBRoF',1,0,'{}',0.056903,0.564749,0.0914801); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.961436,'breast-cancer-wisc','TBRoF',1,0,'{}',0.0155321,155881,0.30061); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.723702,'breast-cancer','TBRoF',1,0,'{}',0.0616484,0.834729,0.271552); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.783448,'cardiotocography-10clases','TBRoF',1,0,'{}',0.0161869,41973,484605); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.895115,'cardiotocography-3clases','TBRoF',1,0,'{}',0.0185059,258994,530755); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.796154,'conn-bench-sonar-mines-rocks','TBRoF',1,0,'{}',0.0538172,0.843827,0.109639); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.714844,'cylinder-bands','TBRoF',1,0,'{}',0.0389802,194509,0.464629); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.970531,'dermatology','TBRoF',1,0,'{}',0.0157392,106912,0.0901046); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.825268,'echocardiogram','TBRoF',1,0,'{}',0.0441591,0.311842,0.0811893); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.88,'fertility','TBRoF',1,0,'{}',0.0745513,0.0980062,0.0154864); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.743219,'haberman-survival','TBRoF',1,0,'{}',0.0420406,163565,0.469856); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.813014,'heart-hungarian','TBRoF',1,0,'{}',0.0466896,0.63616,0.118379); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.808344,'hepatitis','TBRoF',1,0,'{}',0.0645135,0.574301,0.21777); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.711911,'ilpd-indian-liver','TBRoF',1,0,'{}',0.038764,153988,0.322007); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.90433,'ionosphere','TBRoF',1,0,'{}',0.0303405,200361,0.505734); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.97474,'iris','TBRoF',1,0,'{}',0.0250567,0.232732,0.0198281); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.6814,'led-display','TBRoF',1,0,'{}',0.097266,551895,0.562322); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.762778,'libras','TBRoF',1,0,'{}',0.0485361,130429,21116); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.863678,'low-res-spect','TBRoF',1,0,'{}',0.0209091,800181,0.88479); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.766216,'lymphography','TBRoF',1,0,'{}',0.0583466,0.645312,0.0846287); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.81604,'mammographic','TBRoF',1,0,'{}',0.0277715,427548,0.412096); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.736538,'molec-biol-promoter','TBRoF',1,0,'{}',0.104285,0.308432,0.0405886); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.866807,'musk-1','TBRoF',1,0,'{}',0.0287601,3109,0.579935); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.823507,'oocytes_merluccius_nucleus_4d','TBRoF',1,0,'{}',0.0245473,598154,0.863981); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.920166,'oocytes_merluccius_states_2f','TBRoF',1,0,'{}',0.0165272,59664,0.832241); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.807237,'oocytes_trisopterus_nucleus_2f','TBRoF',1,0,'{}',0.0217897,372732,0.401537); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.930702,'oocytes_trisopterus_states_5b','TBRoF',1,0,'{}',0.0160742,489326,0.565728); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.878002,'parkinsons','TBRoF',1,0,'{}',0.0545301,0.619312,0.0746759); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.767708,'pima','TBRoF',1,0,'{}',0.0312226,143769,0.143854); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.792308,'pittsburg-bridges-MATERIAL','TBRoF',1,0,'{}',0.0921668,0.406356,0.0446203); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.611857,'pittsburg-bridges-REL-L','TBRoF',1,0,'{}',0.113804,0.76446,0.0680861); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.636957,'pittsburg-bridges-SPAN','TBRoF',1,0,'{}',0.119091,0.731129,0.0847695); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.863037,'pittsburg-bridges-T-OR-D','TBRoF',1,0,'{}',0.0832874,0.208244,0.0339748); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.704421,'planning','TBRoF',1,0,'{}',0.0674753,0.240795,0.145842); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.711364,'post-operative','TBRoF',1,0,'{}',0.102059,0.0934878,0.0101939); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.934188,'seeds','TBRoF',1,0,'{}',0.0909744,0.410396,0.0538859); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.678335,'statlog-australian-credit','TBRoF',1,0,'{}',0.0312903,0.682229,0.217045); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.7514,'statlog-german-credit','TBRoF',1,0,'{}',0.0223003,356689,0.494854); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.826801,'statlog-heart','TBRoF',1,0,'{}',0.0407774,0.493484,0.0934234); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.961298,'statlog-image','TBRoF',1,0,'{}',0.00773378,301575,115945); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.788538,'statlog-vehicle','TBRoF',1,0,'{}',0.0852794,479306,0.769822); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.980333,'synthetic-control','TBRoF',1,0,'{}',0.0119404,224574,0.15732); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.982247,'tic-tac-toe','TBRoF',1,0,'{}',0.0122867,16288,0.239083); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.845627,'vertebral-column-2clases','TBRoF',1,0,'{}',0.0408004,0.610367,0.0614021); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.987648,'wine','TBRoF',1,0,'{}',0.0153501,0.209809,0.0170114); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-09','00:48:19','crossval',0.952385,'zoo','TBRoF',1,0,'{}',0.0455876,0.355625,0.00956); diff --git a/data/ESWA/results_base_RRoF.sql b/data/ESWA/results_base_RRoF.sql new file mode 100644 index 0000000..6175c51 --- /dev/null +++ b/data/ESWA/results_base_RRoF.sql @@ -0,0 +1,52 @@ +-- +-- Base RRoF +-- +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.797369,'balance-scale','baseRRoF',1,0,'{}',0.058467,0.056173,0.0404587); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.625,'balloons','baseRRoF',1,0,'{}',0.190221,0.00330475,0.000990778); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.940946,'breast-cancer-wisc-diag','baseRRoF',1,0,'{}',0.0212105,0.0296062,0.00588306); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.703902,'breast-cancer-wisc-prog','baseRRoF',1,0,'{}',0.0667437,0.0191964,0.00236345); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.952484,'breast-cancer-wisc','baseRRoF',1,0,'{}',0.0154129,0.051149,0.0105423); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.719554,'breast-cancer','baseRRoF',1,0,'{}',0.0526763,0.0273944,0.0119587); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.657922,'cardiotocography-10clases','baseRRoF',1,0,'{}',0.0524926,0.502717,0.126206); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.876762,'cardiotocography-3clases','baseRRoF',1,0,'{}',0.0112342,0.627032,0.282809); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.734615,'conn-bench-sonar-mines-rocks','baseRRoF',1,0,'{}',0.0538895,0.0193592,0.00260686); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.654297,'cylinder-bands','baseRRoF',1,0,'{}',0.0499199,0.0584003,0.0126956); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.851879,'dermatology','baseRRoF',1,0,'{}',0.0774006,0.0322685,0.00427929); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.793482,'echocardiogram','baseRRoF',1,0,'{}',0.0879076,0.00767857,0.00261677); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.832,'fertility','baseRRoF',1,0,'{}',0.0717892,0.00881917,0.00140421); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.729706,'haberman-survival','baseRRoF',1,0,'{}',0.0487396,0.0127047,0.00574701); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.770639,'heart-hungarian','baseRRoF',1,0,'{}',0.053471,0.0146634,0.00465382); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.77991,'hepatitis','baseRRoF',1,0,'{}',0.0671092,0.0136452,0.00159511); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.696055,'ilpd-indian-liver','baseRRoF',1,0,'{}',0.0231133,0.0482305,0.0101052); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.878506,'ionosphere','baseRRoF',1,0,'{}',0.0325688,0.0262894,0.00293877); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.909217,'iris','baseRRoF',1,0,'{}',0.0546734,0.00826875,0.00241578); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.5002,'led-display','baseRRoF',1,0,'{}',0.16598,0.0834546,0.0215293); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.684444,'libras','baseRRoF',1,0,'{}',0.0786005,0.0762693,0.00805703); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.826734,'low-res-spect','baseRRoF',1,0,'{}',0.0437699,0.0586663,0.00444502); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.708108,'lymphography','baseRRoF',1,0,'{}',0.0565566,0.0177934,0.00199152); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.80708,'mammographic','baseRRoF',1,0,'{}',0.0259152,0.0431632,0.00868914); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.638736,'molec-biol-promoter','baseRRoF',1,0,'{}',0.108143,0.00983929,0.00140181); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.756723,'musk-1','baseRRoF',1,0,'{}',0.0425421,0.0401557,0.00192906); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.729155,'oocytes_merluccius_nucleus_4d','baseRRoF',1,0,'{}',0.026248,0.108816,0.00532225); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.880828,'oocytes_merluccius_states_2f','baseRRoF',1,0,'{}',0.0219842,0.102759,0.024178); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.712719,'oocytes_trisopterus_nucleus_2f','baseRRoF',1,0,'{}',0.0333267,0.093124,0.00443858); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.866667,'oocytes_trisopterus_states_5b','baseRRoF',1,0,'{}',0.0233995,0.0830202,0.0114035); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.854289,'parkinsons','baseRRoF',1,0,'{}',0.0629868,0.0129451,0.00232781); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.726302,'pima','baseRRoF',1,0,'{}',0.0387727,0.062338,0.0289431); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.803846,'pittsburg-bridges-MATERIAL','baseRRoF',1,0,'{}',0.0712916,0.00857422,0.00225875); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.601357,'pittsburg-bridges-REL-L','baseRRoF',1,0,'{}',0.0786889,0.0140277,0.00294606); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.584783,'pittsburg-bridges-SPAN','baseRRoF',1,0,'{}',0.0930101,0.0108635,0.00273294); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.849259,'pittsburg-bridges-T-OR-D','baseRRoF',1,0,'{}',0.0577165,0.00722582,0.00266615); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.61227,'planning','baseRRoF',1,0,'{}',0.0876011,0.0209287,0.007585); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.705303,'post-operative','baseRRoF',1,0,'{}',0.086102,0.00269023,0.00129621); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.790705,'seeds','baseRRoF',1,0,'{}',0.180289,0.0103721,0.00346276); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.678214,'statlog-australian-credit','baseRRoF',1,0,'{}',0.0315745,0.0091094,0.000591205); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.6848,'statlog-german-credit','baseRRoF',1,0,'{}',0.0202552,0.114948,0.00756476); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.76883,'statlog-heart','baseRRoF',1,0,'{}',0.0546972,0.0141939,0.0031457); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.879948,'statlog-image','baseRRoF',1,0,'{}',0.0810351,0.506874,0.202689); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.663137,'statlog-vehicle','baseRRoF',1,0,'{}',0.0347429,0.113744,0.0125771); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.885,'synthetic-control','baseRRoF',1,0,'{}',0.0266557,0.0499058,0.00479801); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.824842,'tic-tac-toe','baseRRoF',1,0,'{}',0.0352731,0.0833412,0.012665); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.803313,'vertebral-column-2clases','baseRRoF',1,0,'{}',0.0458269,0.0246671,0.00566697); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.828063,'wine','baseRRoF',1,0,'{}',0.136358,0.0100478,0.00276311); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:39:38','crossval',0.845154,'zoo','baseRRoF',1,0,'{}',0.0862973,0.0103013,0.00116166); diff --git a/data/ESWA/results_base_RaF.sql b/data/ESWA/results_base_RaF.sql new file mode 100644 index 0000000..fe2819f --- /dev/null +++ b/data/ESWA/results_base_RaF.sql @@ -0,0 +1,52 @@ +-- +-- Base RaF +-- +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.788139,'balance-scale','baseRaF',1,0,'{}',0.175399,0.0531906,0.0387208); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.625,'balloons','baseRaF',1,0,'{}',0.222131,0.00184033,0.0012073); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.953228,'breast-cancer-wisc-diag','baseRaF',1,0,'{}',0.0163919,0.0107458,0.0016314); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.710464,'breast-cancer-wisc-prog','baseRaF',1,0,'{}',0.0714578,0.00518408,0.00147653); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.934215,'breast-cancer-wisc','baseRaF',1,0,'{}',0.0266307,0.0123473,0.00230302); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.644096,'breast-cancer','baseRaF',1,0,'{}',0.0518223,0.0118358,0.00162178); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.728229,'cardiotocography-10clases','baseRaF',1,0,'{}',0.0173198,0.431309,0.0804245); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.872983,'cardiotocography-3clases','baseRaF',1,0,'{}',0.0185093,0.432781,0.0658666); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.725,'conn-bench-sonar-mines-rocks','baseRaF',1,0,'{}',0.0820887,0.00428136,0.000752719); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.642188,'cylinder-bands','baseRaF',1,0,'{}',0.0323213,0.0178885,0.00150941); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.954,'dermatology','baseRaF',1,0,'{}',0.028694,0.0113351,0.00078796); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.697098,'echocardiogram','baseRaF',1,0,'{}',0.0724735,0.00488536,0.00142448); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.79,'fertility','baseRaF',1,0,'{}',0.106128,0.00275558,0.000802812); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.714491,'haberman-survival','baseRaF',1,0,'{}',0.0527132,0.0155133,0.00412764); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.776621,'heart-hungarian','baseRaF',1,0,'{}',0.0558441,0.0131795,0.00145566); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.768549,'hepatitis','baseRaF',1,0,'{}',0.0627836,0.00425792,0.00133006); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.658306,'ilpd-indian-liver','baseRaF',1,0,'{}',0.0468364,0.0396437,0.00471406); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.848391,'ionosphere','baseRaF',1,0,'{}',0.0425361,0.00529335,0.00104925); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.953534,'iris','baseRaF',1,0,'{}',0.0236,0.00274398,0.000563343); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.6732,'led-display','baseRaF',1,0,'{}',0.0339188,0.0837362,0.0068326); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.590556,'libras','baseRaF',1,0,'{}',0.0790025,0.127775,0.0240925); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.733636,'low-res-spect','baseRaF',1,0,'{}',0.0524552,0.0410959,0.00897058); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.695946,'lymphography','baseRaF',1,0,'{}',0.0593915,0.00475246,0.000937836); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.75569,'mammographic','baseRaF',1,0,'{}',0.032263,0.0626184,0.00550619); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.639835,'molec-biol-promoter','baseRaF',1,0,'{}',0.0955089,0.0011949,0.00015005); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.77521,'musk-1','baseRaF',1,0,'{}',0.0271103,0.0202152,0.00422566); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.772783,'oocytes_merluccius_nucleus_4d','baseRaF',1,0,'{}',0.0353748,0.0480226,0.00807228); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.891969,'oocytes_merluccius_states_2f','baseRaF',1,0,'{}',0.0194857,0.0475845,0.00754526); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.74057,'oocytes_trisopterus_nucleus_2f','baseRaF',1,0,'{}',0.0287175,0.0333703,0.00293244); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.906579,'oocytes_trisopterus_states_5b','baseRaF',1,0,'{}',0.0185044,0.0299981,0.00418996); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.827941,'parkinsons','baseRaF',1,0,'{}',0.0676886,0.00474085,0.00101539); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.685677,'pima','baseRaF',1,0,'{}',0.0293304,0.0469974,0.00298769); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.768132,'pittsburg-bridges-MATERIAL','baseRaF',1,0,'{}',0.0740949,0.00492078,0.00125145); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.573786,'pittsburg-bridges-REL-L','baseRaF',1,0,'{}',0.0863999,0.0076706,0.00187105); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.571739,'pittsburg-bridges-SPAN','baseRaF',1,0,'{}',0.126386,0.00753147,0.00159705); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.803407,'pittsburg-bridges-T-OR-D','baseRaF',1,0,'{}',0.0986764,0.00238962,0.000941915); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.613144,'planning','baseRaF',1,0,'{}',0.0599842,0.00957998,0.00164111); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.49072,'post-operative','baseRaF',1,0,'{}',0.117636,0.00542032,0.00112316); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.93052,'seeds','baseRaF',1,0,'{}',0.0377479,0.0052242,0.00100866); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.678291,'statlog-australian-credit','baseRaF',1,0,'{}',0.030839,0.0113963,0.0035361); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.672,'statlog-german-credit','baseRaF',1,0,'{}',0.0368668,0.0587169,0.00545287); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.745187,'statlog-heart','baseRaF',1,0,'{}',0.0466497,0.00900943,0.00111242); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.942258,'statlog-image','baseRaF',1,0,'{}',0.0116565,0.500883,0.271411); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.768526,'statlog-vehicle','baseRaF',1,0,'{}',0.0320319,0.0358268,0.00307129); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.885667,'synthetic-control','baseRaF',1,0,'{}',0.0300701,0.0271531,0.00357264); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.964097,'tic-tac-toe','baseRaF',1,0,'{}',0.0128756,0.0292446,0.00718939); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.810858,'vertebral-column-2clases','baseRaF',1,0,'{}',0.0363136,0.0136081,0.00216191); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.952619,'wine','baseRaF',1,0,'{}',0.0321276,0.00283508,0.000572741); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:15','crossval',0.859385,'zoo','baseRaF',1,0,'{}',0.0984949,0.00476124,0.000706802); diff --git a/data/ESWA/results_base_RoF.sql b/data/ESWA/results_base_RoF.sql new file mode 100644 index 0000000..fecd4f0 --- /dev/null +++ b/data/ESWA/results_base_RoF.sql @@ -0,0 +1,52 @@ +-- +-- Base RoF +-- +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.767583,'balance-scale','baseRoF',1,0,'{}',0.203255,0.0446322,0.0545407); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.675,'balloons','baseRoF',1,0,'{}',0.257774,0.00230893,0.00107712); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.972242,'breast-cancer-wisc-diag','baseRoF',1,0,'{}',0.0131518,0.0177623,0.00299349); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.783633,'breast-cancer-wisc-prog','baseRoF',1,0,'{}',0.0607377,0.0332747,0.0116981); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.960812,'breast-cancer-wisc','baseRoF',1,0,'{}',0.010349,0.0332904,0.00682684); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.722333,'breast-cancer','baseRoF',1,0,'{}',0.0369547,0.0148639,0.00494574); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.777798,'cardiotocography-10clases','baseRoF',1,0,'{}',0.0199397,0.812814,0.137849); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.893694,'cardiotocography-3clases','baseRoF',1,0,'{}',0.0189301,0.507345,0.0816501); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.786538,'conn-bench-sonar-mines-rocks','baseRoF',1,0,'{}',0.053272,0.0319204,0.00526368); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.715234,'cylinder-bands','baseRoF',1,0,'{}',0.0316052,0.0382499,0.019126); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.976456,'dermatology','baseRoF',1,0,'{}',0.017583,0.0183773,0.00096312); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.826161,'echocardiogram','baseRoF',1,0,'{}',0.0566044,0.00666127,0.00152563); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.88,'fertility','baseRoF',1,0,'{}',0.0535085,0.0032558,0.00141667); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.732777,'haberman-survival','baseRoF',1,0,'{}',0.0461519,0.0260083,0.00642227); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.806292,'heart-hungarian','baseRoF',1,0,'{}',0.0292422,0.0118435,0.00345694); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.794833,'hepatitis','baseRoF',1,0,'{}',0.0520409,0.00962171,0.00293465); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.704268,'ilpd-indian-liver','baseRoF',1,0,'{}',0.0394383,0.0323293,0.0058233); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.912165,'ionosphere','baseRoF',1,0,'{}',0.0312381,0.0361893,0.00726605); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.977304,'iris','baseRoF',1,0,'{}',0.024722,0.00467289,0.000694593); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.7058,'led-display','baseRoF',1,0,'{}',0.0330989,0.106641,0.00429973); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.748333,'libras','baseRoF',1,0,'{}',0.0516492,0.22779,0.0346553); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.867643,'low-res-spect','baseRoF',1,0,'{}',0.0227084,0.141473,0.0178801); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.771622,'lymphography','baseRoF',1,0,'{}',0.069309,0.0110059,0.00142268); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.815809,'mammographic','baseRoF',1,0,'{}',0.0213706,0.0823246,0.00838469); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.651648,'molec-biol-promoter','baseRoF',1,0,'{}',0.0747578,0.00914322,0.00114168); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.856303,'musk-1','baseRoF',1,0,'{}',0.0337128,0.144454,0.0303389); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.825408,'oocytes_merluccius_nucleus_4d','baseRoF',1,0,'{}',0.0248569,0.110147,0.0221553); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.921528,'oocytes_merluccius_states_2f','baseRoF',1,0,'{}',0.0202779,0.117209,0.0160898); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.805702,'oocytes_trisopterus_nucleus_2f','baseRoF',1,0,'{}',0.0233865,0.0707077,0.00991037); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.932018,'oocytes_trisopterus_states_5b','baseRoF',1,0,'{}',0.0162558,0.0968036,0.0125905); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.911581,'parkinsons','baseRoF',1,0,'{}',0.0266905,0.0106399,0.00138008); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.772135,'pima','baseRoF',1,0,'{}',0.0263347,0.0273879,0.00352025); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.816758,'pittsburg-bridges-MATERIAL','baseRoF',1,0,'{}',0.06753,0.00732728,0.000908743); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.627214,'pittsburg-bridges-REL-L','baseRoF',1,0,'{}',0.0869763,0.0132576,0.00120841); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.632609,'pittsburg-bridges-SPAN','baseRoF',1,0,'{}',0.0919342,0.0137112,0.00153695); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.87437,'pittsburg-bridges-T-OR-D','baseRoF',1,0,'{}',0.0575295,0.00422873,0.000965931); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.710165,'planning','baseRoF',1,0,'{}',0.0602237,0.00391673,0.00158102); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.709091,'post-operative','baseRoF',1,0,'{}',0.0800951,0.00196808,0.0007889); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.949644,'seeds','baseRoF',1,0,'{}',0.0411416,0.00794183,0.0010996); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.678271,'statlog-australian-credit','baseRoF',1,0,'{}',0.0332535,0.0124906,0.00557268); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.7596,'statlog-german-credit','baseRoF',1,0,'{}',0.0251279,0.0651643,0.00937727); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.830554,'statlog-heart','baseRoF',1,0,'{}',0.0438866,0.00845302,0.00176189); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.961124,'statlog-image','baseRoF',1,0,'{}',0.00954905,0.565189,0.239254); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.805401,'statlog-vehicle','baseRoF',1,0,'{}',0.0266276,0.0921317,0.00659751); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.98,'synthetic-control','baseRoF',1,0,'{}',0.0110289,0.0381931,0.00304846); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.983295,'tic-tac-toe','baseRoF',1,0,'{}',0.0045061,0.0308545,0.00203696); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.841328,'vertebral-column-2clases','baseRoF',1,0,'{}',0.0406997,0.0125325,0.00288834); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.989872,'wine','baseRoF',1,0,'{}',0.0134836,0.00354218,0.000355459); +replace into results (date, time, type, accuracy, dataset, classifier, norm, stand, parameters, accuracy_std, time_spent, time_spent_std) values ('2021-03-08','18:38:52','crossval',0.931154,'zoo','baseRoF',1,0,'{}',0.044867,0.00618482,0.000234567); diff --git a/experiment.py b/experiment.py index 654f783..150b9ba 100644 --- a/experiment.py +++ b/experiment.py @@ -20,7 +20,15 @@ def parse_arguments() -> Tuple[str, str, str, str, str, bool, bool, dict]: "-m", "--model", type=str, - choices=["stree", "adaBoost", "bagging", "odte", "oc1", "cart"], + choices=[ + "stree", + "wodt", + "adaBoost", + "bagging", + "odte", + "oc1", + "cart", + ], required=False, default="stree", ) diff --git a/experimentation/Database.py b/experimentation/Database.py index eb32e38..e753aaf 100644 --- a/experimentation/Database.py +++ b/experimentation/Database.py @@ -345,8 +345,8 @@ class Outcomes(BD): float(results["test_score"].std()), ], [ - float(results["score_time"].mean()), - float(results["score_time"].std()), + float(results["fit_time"].mean()), + float(results["fit_time"].std()), ], parameters, ) @@ -441,8 +441,8 @@ class Hyperparameters(BD): float(outcomes["test_score_std"]), ] time_spent = [ - float(outcomes["score_time"]), - float(outcomes["score_time_std"]), + float(outcomes["fit_time"]), + float(outcomes["fit_time_std"]), ] self.mirror( grid_type, diff --git a/experimentation/Models.py b/experimentation/Models.py index 1956f79..f2d9569 100644 --- a/experimentation/Models.py +++ b/experimentation/Models.py @@ -11,6 +11,7 @@ from sklearn.svm import LinearSVC # type: ignore from sklearn.tree import DecisionTreeClassifier # type: ignore from odte import Odte from sklearn_oblique_tree.oblique import ObliqueTree +from wodt import TreeClassifier class ModelBase(ABC): @@ -95,6 +96,31 @@ class ModelOc1(ModelBase): self._param_grid = [self._rbf] +class ModelWodt(ModelBase): + def __init__(self, random_state: Optional[int] = None) -> None: + self._clf = TreeClassifier() + super().__init__(random_state) + self._model_name = "wodt" + self._linear = { + "random_state": [self._random_state], + } + self._rbf = {} + self._poly = {} + self._param_grid = [ + self._linear, + self._poly, + self._rbf, + ] + + def select_params(self, kernel: str) -> None: + if kernel == "linear": + self._param_grid = [self._linear] + elif kernel == "poly": + self._param_grid = [self._poly] + else: + self._param_grid = [self._rbf] + + class ModelStree(ModelBase): def __init__(self, random_state: Optional[int] = None) -> None: self._clf = Stree() diff --git a/setup.py b/setup.py index a66ebe3..9496d9d 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ import setuptools -__version__ = "0.1.0" +__version__ = "0.2.0" __author__ = "Ricardo Montañana Gómez" diff --git a/testwodt.py b/testwodt.py new file mode 100644 index 0000000..07a5342 --- /dev/null +++ b/testwodt.py @@ -0,0 +1,125 @@ +import argparse +from wodt import TreeClassifier +from sklearn.model_selection import cross_val_score +import numpy as np +import random +from experimentation.Sets import Datasets + + +def parse_arguments(): + ap = argparse.ArgumentParser() + ap.add_argument( + "-S", + "--set-of-files", + type=str, + choices=["aaai", "tanveer"], + required=False, + default="aaai", + ) + ap.add_argument( + "-d", + "--dataset", + type=str, + required=False, + help="Dataset name", + ) + ap.add_argument( + "-n", + "--normalize", + default=False, + type=bool, + required=False, + help="Normalize dataset (True/False)", + ) + ap.add_argument( + "-s", + "--standardize", + default=False, + type=bool, + required=False, + help="Standardize dataset (True/False)", + ) + ap.add_argument( + "-p", + "--paper-norm", + default=False, + type=bool, + required=False, + help="[-1, 1] normalization like on paper (True/False)", + ) + ap.add_argument( + "-r", + "--random-set", + default=0, + type=int, + required=False, + help="Set of random seeds: {0, 1}", + ) + args = ap.parse_args() + return ( + args.set_of_files, + args.dataset, + args.normalize, + args.standardize, + args.paper_norm, + args.random_set, + ) + + +def normalize_paper(data): + min_data = data.min() + return 2 * (data - min_data) / (data.max() - min_data) - 1 + + +def process_dataset(dataset, verbose): + X, y = dt.load(dataset) + if paper_norm: + X = normalize_paper(X) + scores = [] + if verbose: + print(f"* Processing dataset [{dataset}] from Set: {set_of_files}") + print(f"X.shape: {X.shape}") + print(f"{X[:4]}") + print(f"Random seeds: {random_seeds}") + print(f"[-1, 1]: {paper_norm} norm: {normalize} std: {standardize}") + for random_state in random_seeds: + random.seed(random_state) + np.random.seed(random_state) + clf = TreeClassifier(random_state=random_state) + res = cross_val_score(clf, X, y, cv=5) + scores.append(res) + if verbose: + print( + f"Random seed: {random_state:5d} Accuracy: {res.mean():6.4f}" + f"±{res.std():6.4f}" + ) + return scores + + +( + set_of_files, + dataset, + normalize, + standardize, + paper_norm, + random_set, +) = parse_arguments() +random_seeds = ( + [57, 31, 1714, 17, 23, 79, 83, 97, 7, 1] + if random_set == 0 + else [32, 24, 56, 18, 2, 94, 1256, 84, 156, 42] +) +dt = Datasets(normalize, standardize, set_of_files) +if dataset == "all": + print( + f"* Process all datasets set: {set_of_files} [-1, 1]: {paper_norm} " + f"norm: {normalize} std: {standardize}" + ) + print(f"5 Fold Cross Validation with 10 random seeds {random_seeds}") + for dataset in dt: + print(f"- {dataset[0]:20s} ", end="") + scores = process_dataset(dataset[0], verbose=False) + print(f"{np.mean(scores):6.4f}±{np.std(scores):6.4f}") +else: + scores = process_dataset(dataset, verbose=True) + print(f"* Accuracy: {np.mean(scores):6.4f}±{np.std(scores):6.4f}") diff --git a/wodt/WODT.py b/wodt/WODT.py new file mode 100644 index 0000000..fe00a50 --- /dev/null +++ b/wodt/WODT.py @@ -0,0 +1,289 @@ +######################## +"""import""" +import numpy as np +import random +from scipy.optimize import minimize +from sklearn.base import BaseEstimator, ClassifierMixin + + +"""global var""" +epsilonepsilon = 1e-220 +epsilon = 1e-50 + +"""class""" + + +class SplitQuestion(object): + """docstring for SplitQuestion""" + + def __init__(self, attrIDs=[0], paras=[0], threshold=0): + super(SplitQuestion, self).__init__() + self.attrIDs = attrIDs + self.paras = paras + self.threshold = threshold + + # we only consider continuous attributes for simplicity + def test_forOneInstance(self, x): + return np.dot(x[self.attrIDs], self.paras) <= self.threshold + + def test(self, X): + return np.dot(X[:, self.attrIDs], self.paras) <= self.threshold + + +class Node(object): + """docstring for RBNode""" + + def __init__(self, depth, split, sample_ids, X, Y, class_num): + super(Node, self).__init__() + self.sample_ids = sample_ids + self.split = split + self.depth = depth + self.X = X + self.Y = Y + self.class_num = class_num + self.is_leaf = False + # after grow_stump, set the node as an internal node + + def find_best_split(self, max_features="sqrt"): + feature_num = self.X.shape[1] + subset_feature_num = feature_num + if max_features == "sqrt": + subset_feature_num = int(np.sqrt(feature_num)) + if max_features == "all": + subset_feature_num = feature_num + if max_features == "log": + subset_feature_num = int(np.log2(feature_num)) + if isinstance(max_features, int): + subset_feature_num = max_features + if isinstance(max_features, float): + subset_feature_num = int(feature_num * max_features) + + # ### get random subset of features + # ### feature 0 is threshold + feature_ids = range(feature_num) + subset_feature_ids = random.sample(feature_ids, subset_feature_num) + self.split.attrIDs = subset_feature_ids + subset_feature_ids = np.array(subset_feature_ids) + + X = self.X + subFeatures_X = X[ + self.sample_ids[:, None], subset_feature_ids[None, :] + ] + Y = self.Y[self.sample_ids] + class_num = self.class_num + + # ############################## + # define func and func_gradient for optimization + def func(a): + paras = a[1:] + threshold = a[0] + p = sigmoid(np.dot(subFeatures_X, paras) - threshold) + w_R = p + w_L = 1 - w_R + w_R_sum = w_R.sum() + w_L_sum = w_L.sum() + w_R_eachClass = np.array( + [sum(w_R[Y == k]) for k in range(class_num)] + ) + w_L_eachClass = np.array( + [sum(w_L[Y == k]) for k in range(class_num)] + ) + fun = ( + w_L_sum * np.log2(w_L_sum + epsilonepsilon) + + w_R_sum * np.log2(w_R_sum + epsilonepsilon) + - np.sum( + w_R_eachClass * np.log2(w_R_eachClass + epsilonepsilon) + ) + - np.sum( + w_L_eachClass * np.log2(w_L_eachClass + epsilonepsilon) + ) + ) + # fun = w_L.sum() * compute_entropy(Y, w_L) + w_R.sum() + # * compute_entropy(Y, w_R) + return fun + + def func_gradient(a): + paras = a[1:] + threshold = a[0] + + p = sigmoid(np.dot(subFeatures_X, paras) - threshold) + w_R = p + w_L = 1 - w_R + w_R_eachClass = np.array( + [sum(w_R[Y == k]) for k in range(class_num)] + ) + w_L_eachClass = np.array( + [sum(w_L[Y == k]) for k in range(class_num)] + ) + la = np.log2( + w_L_eachClass[Y] * w_R.sum() + epsilonepsilon + ) - np.log2(w_R_eachClass[Y] * w_L.sum() + epsilonepsilon) + beta = la * p * (1 - p) + + jac = np.zeros(a.shape) + jac[0] = -np.sum(beta) + jac[1:] = np.dot(subFeatures_X.T, beta) + + return jac + + ################################################ + initial_a = np.random.rand(subset_feature_num + 1) - 0.5 + result = minimize( + func, + initial_a, + method="L-BFGS-B", + jac=func_gradient, + options={"maxiter": 10, "disp": False}, + ) + + ########################################## + self.split.paras = result.x[1:] + self.split.threshold = result.x[0] + + return 1 + + def grow_stump(self): + L_bool = self.split.test(self.X[self.sample_ids]) + L_sample_ids = self.sample_ids[L_bool] + R_sample_ids = self.sample_ids[~L_bool] + # if len(R_sample_ids) * len(L_sample_ids) == 0 : + # print('some branch is 0 sample') + LChild = Node( + self.depth + 1, + SplitQuestion(), + L_sample_ids, + self.X, + self.Y, + self.class_num, + ) + RChild = Node( + self.depth + 1, + SplitQuestion(), + R_sample_ids, + self.X, + self.Y, + self.class_num, + ) + + if len(L_sample_ids) == 0: + LChild.is_leaf = True + LChild.class_distribution = compute_class_distribution( + self.Y[self.sample_ids], self.class_num + ) + if len(R_sample_ids) == 0: + RChild.is_leaf = True + RChild.class_distribution = compute_class_distribution( + self.Y[self.sample_ids], self.class_num + ) + + self.LChild = LChild + self.RChild = RChild + + +class TreeClassifier(BaseEstimator, ClassifierMixin): + """docstring for TreeClassifier""" + + def __init__( + self, + max_depth=50, + min_samples_split=2, + max_features="all", + random_state=None, + ): + # super(TreeClassifier, self).__init__() + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.max_features = max_features + self.random_state = random_state + + def fit(self, X, Y): + self.X = X + self.Y = Y + self.classNum = self.Y.max() + 1 + self.sampleNum = self.X.shape[0] + if self.random_state is not None: + random.seed(self.random_state) + ########### + self.root_node = Node( + 1, + SplitQuestion(), + np.arange(self.sampleNum, dtype=np.uint32), + self.X, + self.Y, + self.classNum, + ) + self.leaf_num = 1 + self.tree_depth = self.bulid_subtree(self.root_node) + + def bulid_subtree(self, node): + if node.is_leaf: + return node.depth + + # stopping conditions + is_leaf = ( + node.depth >= self.max_depth + or len(node.sample_ids) < self.min_samples_split + or is_all_equal(self.Y[node.sample_ids]) + ) + + if is_leaf or node.find_best_split(self.max_features) < 0: + node.is_leaf = True + node.class_distribution = compute_class_distribution( + self.Y[node.sample_ids], self.classNum + ) + return node.depth + + node.grow_stump() + node.is_leaf = False + self.leaf_num += 1 + L_subtree_depth = self.bulid_subtree(node.LChild) + R_subtree_depth = self.bulid_subtree(node.RChild) + return max(L_subtree_depth, R_subtree_depth) + + def predict_forOneInstance(self, x): + present_node = self.root_node + while not (present_node.is_leaf): + if present_node.split.test_forOneInstance(x): + present_node = present_node.LChild + else: + present_node = present_node.RChild + return np.argmax(present_node.class_distribution) + + def predict(self, X): + m = X.shape[0] + Y_predicted = np.zeros((m,), dtype=int) + for i in range(m): + x = X[i] + Y_predicted[i] = self.predict_forOneInstance(x) + return Y_predicted + + def score( + self, X: np.array, y: np.array, sample_weight: np.array = None + ) -> float: + y_pred = self.predict(X) + return np.mean(y_pred == y) + + +#################### +"""function""" + + +def sigmoid(z): + # because that -z is too big will arise runtimeWarning in np.exp() + if isinstance(z, float) and (z < -500): + z = -500 + elif not (isinstance(z, float)): + z[z < -500] = (-500) * np.ones(sum(z < -500)) + + return 1 / (np.exp(-z) + 1) + + +def is_all_equal(x): + x_min, x_max = x.min(), x.max() + return x_min == x_max + + +def compute_class_distribution(Y, class_num): + sample_num = len(Y) + ratio_each_class = [sum(Y == k) / sample_num for k in range(class_num)] + return np.array(ratio_each_class) diff --git a/wodt/__init__.py b/wodt/__init__.py new file mode 100644 index 0000000..6a86cb0 --- /dev/null +++ b/wodt/__init__.py @@ -0,0 +1,5 @@ +from .WODT import TreeClassifier + +__all__ = [ + "TreeClassifier", +]