Compare commits

...

266 Commits

Author SHA1 Message Date
f3b8150e2c Add notes to Classifier & Changelog 2024-02-12 10:58:20 +01:00
03f8b8653b Add getNotes test 2024-02-09 12:06:19 +01:00
2163e95c4a add getNotes method 2024-02-09 10:57:19 +01:00
b33da34655 Add notes to Classifier & use them in BoostAODE 2024-02-08 18:01:09 +01:00
e17aee7bdb Remove argparse module 2024-01-09 18:02:17 +01:00
37c31ee4c2 Update libraries 2024-01-08 17:45:11 +01:00
80afdc06f7 Remove unneeded argparse module 2024-01-08 00:55:16 +01:00
Ricardo Montañana Gómez
666782217e Merge pull request #1 from rmontanana/library
Remove other projects' sources
2024-01-07 20:01:37 +01:00
55af0714cd Remove other projects' sources 2024-01-07 19:58:22 +01:00
6ef5ca541a Add app version to command line utils 2024-01-06 22:38:34 +01:00
4364317411 Merge pull request 'Refactor mpi grid search process using the producer consumer pattern' (#15) from producer_consumer into main
Reviewed-on: #15
2024-01-04 15:24:48 +00:00
65a96851ef Check min number of nested folds 2024-01-04 11:01:59 +01:00
722da7f781 Keep only mpi b_grid compute 2024-01-04 01:21:56 +01:00
b1833a5feb Add reset color to final progress bar 2024-01-03 22:45:16 +01:00
41a0bd4ddd fix dataset name mistakes 2024-01-03 17:15:57 +01:00
9ab4fc7d76 Fix some mistakes in methods 2024-01-03 11:53:46 +01:00
beadb7465f Complete first approach 2023-12-31 12:02:13 +01:00
652e5f623f Add todo comments 2023-12-28 23:32:24 +01:00
b7fef9a99d Remove kk file 2023-12-28 23:24:59 +01:00
343269d48c Fix syntax errors 2023-12-28 23:21:50 +01:00
21c4c6df51 Fix first mistakes in structure 2023-12-25 19:33:52 +01:00
702f086706 Update miniconda instructions 2023-12-23 19:54:00 +01:00
981bc8f98b Fix install message in readme 2023-12-23 01:00:55 +01:00
e0b7b2d316 Set structure & protocol of producer-consumer 2023-12-22 12:47:13 +01:00
9b9e91e856 Merge pull request 'mpi_grid' (#14) from mpi_grid into main
Reviewed-on: #14
2023-12-18 09:05:55 +00:00
18e8e84284 Add openmpi instructions for Oracle Linux 2023-12-17 12:19:50 +01:00
7de11b0e6d Fix format of duration 2023-12-17 01:45:04 +01:00
9b8db37a4b Fix duration of task not set 2023-12-16 19:31:45 +01:00
49b26bd04b fix duration output 2023-12-16 12:53:25 +01:00
b5b5b48864 Update grid progress bar output 2023-12-15 18:09:17 +01:00
19586a3a5a Fix pesky error allocating memory in workers 2023-12-15 01:54:13 +01:00
ffe6d37436 Add messages to control trace 2023-12-14 21:06:43 +01:00
b73f4be146 First try with complete algorithm 2023-12-14 15:55:08 +01:00
dbf2f35502 First compiling version 2023-12-12 18:57:57 +01:00
db9e80a70e Create build tasks 2023-12-12 12:15:22 +01:00
40ae4ad7f9 Include mpi in CMakeLists 2023-12-11 09:06:05 +01:00
234342f2de Add mpi parameter to b_grid 2023-12-10 22:33:17 +01:00
aa0936abd1 Add --exclude parameter to b_grid to exclude datasets 2023-12-08 12:09:08 +01:00
f0d6f0cc38 Fix sample building 2023-12-04 19:12:44 +01:00
cc316bb8d3 Add colors to results of gridsearch 2023-12-04 17:34:00 +01:00
0723564e66 Fix some output in gridsearch 2023-12-03 17:55:44 +01:00
2e95e8999d Complete nested gridsearch 2023-12-03 12:37:25 +01:00
fb9b395748 Begin output nested grid 2023-12-02 13:19:12 +01:00
03e4437fea refactor gridsearch to have only one go method 2023-12-02 10:59:05 +01:00
33cd32c639 Add header to grid output and report 2023-12-01 10:30:53 +01:00
c460ef46ed Refactor gridsearch method 2023-11-30 11:01:37 +01:00
dee9c674da Refactor grid input hyperparameter file 2023-11-29 18:24:34 +01:00
e3f6dc1e0b Fix tolerance hyperp error & gridsearch 2023-11-29 12:33:50 +01:00
460d20a402 Add reports to gridsearch 2023-11-29 00:26:48 +01:00
8dbbb65a2f Add only parameter to gridsearch 2023-11-28 10:08:40 +01:00
d06bf187b2 Implement Random Forest nodes/leaves/depth 2023-11-28 00:35:38 +01:00
4addaefb47 Implement sklearn version in PyWrap 2023-11-27 22:34:34 +01:00
82964190f6 Add nodes/leaves/depth to STree & ODTE 2023-11-27 10:57:57 +01:00
4fefe9a1d2 Add grid input info to grid output 2023-11-26 16:07:32 +01:00
7c12dd25e5 Fix upper case typo 2023-11-26 10:55:32 +01:00
c713c0b1df Add continue from parameter to gridsearch 2023-11-26 10:36:09 +01:00
64069a6cb7 Adapt b_main to the new hyperparam file format 2023-11-25 16:52:25 +01:00
ba2a3f9523 Merge pull request 'gridsearch' (#13) from gridsearch into main
Reviewed-on: #13
2023-11-25 11:16:13 +00:00
f94e2d6a27 Add quiet parameter 2023-11-24 21:16:20 +01:00
2121ba9b98 Refactor input grid parameters to json file 2023-11-24 09:57:29 +01:00
8b7b59d42b Complete first step 2023-11-23 12:59:21 +01:00
bbe5302ab1 Add info to output 2023-11-22 16:38:50 +01:00
c2eb727fc7 Complete output interface of gridsearch 2023-11-22 16:30:04 +01:00
fb347ed5b9 Begin gridsearch implementation 2023-11-22 12:22:30 +01:00
b657762c0c Generate combinations sample 2023-11-22 00:18:24 +01:00
495d8a8528 Begin implementing grid combinations 2023-11-21 13:11:14 +01:00
4628e48d3c Build gridsearch structure 2023-11-20 23:32:34 +01:00
5876be4b24 Add more install instructions of Boost to README 2023-11-20 20:39:22 +01:00
dc3400197f Add coment todo impelemt number of nodes 2023-11-20 01:14:13 +01:00
26d3a57782 Add info to invalid hyperparameter exception 2023-11-19 23:02:28 +01:00
4f3a04058f Refactor Hyperparameters management 2023-11-19 22:36:27 +01:00
89c4613591 Implement hyperparameters with json file 2023-11-18 11:56:10 +01:00
28f3d87e32 Add Python Classifiers
Add STree, Odte, SVC & RandomForest Classifiers
Remove using namespace ... in project
2023-11-17 11:11:05 +01:00
e8d2c9fc0b Set intolerant convergence 2023-11-17 10:26:25 +01:00
d3cb580387 Remove n_jobs from STree 2023-11-17 10:10:31 +01:00
f088df14fd Restore the Creation model position in experiment 2023-11-17 01:10:46 +01:00
e2249eace7 Disable Warning messages in python clfs
Disable removing Python env
2023-11-16 22:38:46 +01:00
64f5a7f14a Fix header in example 2023-11-16 17:03:40 +01:00
408db2aad5 Mark override fit funtcion 2023-11-14 18:59:41 +01:00
e03efb5f63 set tolerance=0 if feature selection in BoostAODE 2023-11-14 10:12:02 +01:00
f617886133 Add new models to example 2023-11-14 09:12:25 +01:00
69ad660040 Refactor version method in PyClassifier 2023-11-13 13:59:06 +01:00
431b3a3aa5 Fit PyWrap into BayesNet 2023-11-13 11:13:32 +01:00
6a23e2cc26 Add CMakelist integration 2023-11-12 22:14:29 +01:00
f6e00530be Add Pywrap sources 2023-11-12 21:43:07 +01:00
f9258e43b9 Remove using namespace from Library 2023-11-08 18:45:35 +01:00
92820555da Simple fix 2023-10-28 10:56:47 +02:00
5a3af51826 Activate best score in odte 2023-10-25 10:23:42 +02:00
a8f9800631 Fix mistake when no results in manage 2023-10-24 19:44:23 +02:00
84cec0c1e0 Add results files affected in best results excel 2023-10-24 16:18:52 +02:00
130139f644 Update formulas to use letters in ranges in excel 2023-10-24 13:06:31 +02:00
651f84b562 Fix mistake in conditional format in bestresults 2023-10-24 11:18:19 +02:00
553ab0fa22 Add conditional format to BestResults Excel 2023-10-24 10:56:41 +02:00
4975feabff Fix mistake in node count 2023-10-23 22:46:10 +02:00
32293af69f Fix header in manage 2023-10-23 17:04:59 +02:00
858664be2d Add total number of results in manage 2023-10-23 16:22:15 +02:00
1f705f6018 Refactor BestScore and add experiment to .env 2023-10-23 16:12:52 +02:00
7bcd2eed06 Add variable width of dataset name in reports 2023-10-22 22:58:52 +02:00
833acefbb3 Fix index limits mistake in manage 2023-10-22 20:21:50 +02:00
26b649ebae Refactor ManageResults and CommandParser 2023-10-22 20:03:34 +02:00
080eddf9cd Fix hyperparameters output in b_best 2023-10-20 22:52:48 +02:00
04e754b2f5 Adjust filename and hyperparameters in reports 2023-10-20 11:12:46 +02:00
38423048bd Add excel to best report of model 2023-10-19 18:12:55 +02:00
64fc97b892 Rename utilities sources to match final names 2023-10-19 09:57:04 +02:00
2c2159f192 Add quiet mode to b_main
Reduce output when --quiet is set, not showing fold info
2023-10-17 21:51:53 +02:00
6765552a7c Update submodule versions 2023-10-16 19:21:57 +02:00
f72aa5b9a6 Merge pull request 'Create Boost_CFS' (#11) from Boost_CFS into main
Add hyper parameter to BoostAODE. This hyper parameter decides if we select features with cfs/fcbf/iwss before start building models and build a Spode with the selected features.
The hyperparameter is select_features
2023-10-15 09:22:14 +00:00
fa7fe081ad Fix xlsx library finding 2023-10-15 11:19:58 +02:00
660e783517 Update validation for feature selection 2023-10-14 13:32:09 +02:00
b35532dd9e Implement IWSS and FCBF too for BoostAODE 2023-10-14 13:12:04 +02:00
6ef49385ea Remove unneeded method declaration FeatureSelect 2023-10-14 11:30:32 +02:00
6d5a25cdc8 Refactor CFS class creating abstract base class 2023-10-14 11:27:46 +02:00
d00b08cbe8 Fix Header for Linux 2023-10-13 14:26:47 +02:00
977ff6fddb Update CMakeLists for Linux 2023-10-13 14:01:52 +02:00
54b8939f35 Prepare BoostAODE first try 2023-10-13 13:46:22 +02:00
5022a4dc90 Complete CFS tested with Python mufs 2023-10-13 12:29:25 +02:00
40d1dad5d8 Begin CFS implementation 2023-10-11 21:17:26 +02:00
47e2b138c5 Complete first working cfs 2023-10-11 11:33:29 +02:00
e7ded68267 First cfs working version 2023-10-10 23:00:38 +02:00
ca833a34f5 try openssl sha256 2023-10-10 18:16:43 +02:00
df9b4c48d2 Begin CFS initialization 2023-10-10 13:39:11 +02:00
f288bbd6fa Begin adding cfs to BoostAODE 2023-10-10 11:52:39 +02:00
7d8aca4f59 Add Locale shared config to reports 2023-10-09 19:41:29 +02:00
8fdad78a8c Continue Test Network 2023-10-09 11:25:30 +02:00
e3ae073333 Continue test Network 2023-10-08 15:54:58 +02:00
4b732e76c2 MST change unordered_set to list 2023-10-07 19:08:13 +02:00
fe5fead27e Begin Fix Test MST 2023-10-07 01:43:26 +02:00
8c3864f3c8 Complete Folding Test 2023-10-07 01:23:36 +02:00
1287160c47 Refactor makefile to use variables 2023-10-07 00:16:25 +02:00
2f58807322 Begin refactor CMakeLists debug/release paths 2023-10-06 19:32:29 +02:00
17e079edd5 Begin Test Folding 2023-10-06 17:08:54 +02:00
b9e0028e9d Refactor Makefile 2023-10-06 01:28:27 +02:00
e0d39fe631 Fix BayesMetrics Test 2023-10-06 01:14:55 +02:00
36b0277576 Add Maximum Spanning Tree test 2023-10-05 15:45:36 +02:00
da8d018ec4 Refactor Makefile 2023-10-05 11:45:00 +02:00
5f0676691c Add First BayesMetrics Tests 2023-10-05 01:14:16 +02:00
3448fb1299 Refactor Tests and add BayesMetrics test 2023-10-04 23:19:23 +02:00
5e938d5cca Add ranks sheet to excel best results 2023-10-04 16:26:57 +02:00
55e742438f Add constant references to Statistics 2023-10-04 13:40:45 +02:00
c4ae3fe429 Add Control model rank info to report 2023-10-04 12:42:35 +02:00
93e4ff94db Add significance level as parameter in best 2023-10-02 15:46:40 +02:00
57c27f739c Remove unused code in BestResults 2023-10-02 15:31:02 +02:00
a434d7f1ae Add a Linux config in launch.json 2023-09-30 18:44:21 +02:00
294666c516 Fix a Linux problem in Datasets 2023-09-30 18:43:47 +02:00
fd04e78ad9 Restore sample.cc 2023-09-29 18:50:25 +02:00
66ec1b343b Remove platformUtils and split Datasets & Dataset 2023-09-29 18:20:46 +02:00
bb423da42f Add csv and R_dat files to platform 2023-09-29 13:52:50 +02:00
db17c14042 Change names of executables to b_... 2023-09-29 09:17:50 +02:00
a4401cb78f Linux CMakeLists.txt adjustment 2023-09-29 00:30:47 +02:00
9d3d9cc6c6 Complete Excel output for bestResults with Friedman test 2023-09-28 18:52:37 +02:00
cfcf3c16df Add best results Excel 2023-09-28 17:12:04 +02:00
85202260f3 Separate specific Excel methods to ExcelFile 2023-09-28 13:07:11 +02:00
82acb3cab5 Enhance output of Best results reports 2023-09-28 12:08:56 +02:00
623ceed396 Merge pull request 'Add Friedman Test & post hoc tests to BestResults' (#10) from boost into main
Reviewed-on: #10
2023-09-28 07:44:55 +00:00
926de2bebd Add boost info to README 2023-09-28 09:44:33 +02:00
71704e3547 Enhance output info in Statistics 2023-09-28 01:27:18 +02:00
3b06534327 Remove duplicated code in BestResults 2023-09-28 00:59:34 +02:00
ac89a451e3 Duplicate statistics tests in class 2023-09-28 00:45:15 +02:00
00c6cf663b Fix order of output in posthoc 2023-09-27 19:11:47 +02:00
5043c12be8 Complete posthoc with Holm adjust 2023-09-27 18:34:16 +02:00
11320e2cc7 Complete friedman test as in exreport 2023-09-27 12:36:03 +02:00
ce66483b65 Update boost version requirement for Linux 2023-09-26 14:12:53 +02:00
cab8e14b2d Add friedman hyperparameter 2023-09-26 11:26:59 +02:00
f0d0abe891 Add boost library link to linux build 2023-09-26 01:07:50 +02:00
dcba146e12 Begin adding Friedman test to BestResults 2023-09-26 01:04:59 +02:00
3ea0285119 Fix ranks to match friedman test ranks 2023-09-25 18:38:12 +02:00
e3888e1503 Merge pull request 'bestResults' (#9) from bestResults into main
Reviewed-on: https://gitea.rmontanana.es:3000/rmontanana/BayesNet/pulls/9

Add best results management, build, report, build all & report all
2023-09-25 12:02:17 +00:00
06de13df98 Add date/time to header of report best 2023-09-25 10:04:53 +02:00
de4fa6a04f Add color to totals 2023-09-23 10:30:39 +02:00
3a7bf4e672 Fix ranking order mistake 2023-09-23 01:33:23 +02:00
cd0bc02a74 Add report/build all with totals and ranks 2023-09-23 01:14:02 +02:00
c8597a794e Begin report all models 2023-09-22 18:13:32 +02:00
b30416364d Fix mistake in best results file name 2023-09-22 14:14:39 +02:00
3a16589220 Add best config for debug in vscode 2023-09-22 01:04:36 +02:00
c4f9187e2a Complete best build and report 2023-09-22 01:03:55 +02:00
c4d0a5b4e6 Split Result from Results 2023-09-21 23:30:17 +02:00
7bfafe555f Begin BestResults build 2023-09-21 23:04:11 +02:00
337b6f7e79 Rename BestResult to BestScore 2023-09-21 19:30:07 +02:00
5fa0b957dd Fix mistake in idx range in manage 2023-09-20 19:12:07 +02:00
67252fc41d Fix CMakeLists libxlsxwriter for Linux 2023-09-20 19:02:53 +02:00
94ae9456a0 Fix libxslxwriter linking problem 2023-09-20 18:50:11 +02:00
781993e326 Resolve some warnings 2023-09-20 17:54:15 +02:00
8257a6ae39 Add message of not exist Best Results 2023-09-20 13:50:34 +02:00
fc81730dfc Merge pull request 'Exchange OpenXLSX to libxlsxwriter' (#8) from libxlsxwriter into main
Add multiple sheets to excel file
Add format and color to sheets
Add comparison with ZeroR
Add comparison with Best Results
Separate contextual menu from general in manage
2023-09-20 11:17:16 +00:00
d8734ff082 Separate contextual menu from general 2023-09-20 13:15:33 +02:00
03533461c8 Add compare to best results in manage 2023-09-20 12:51:19 +02:00
68f22a673d Add comparison to report console 2023-09-20 11:40:01 +02:00
b9bc0088f3 Add format to unique dataset results summary 2023-09-20 10:30:45 +02:00
c280e254ca Remove OpenXLSX submodule 2023-09-20 01:09:58 +02:00
3d0f29fda3 Remove .vscode/settings.json from repository 2023-09-20 01:01:40 +02:00
20a6ebab7c Support to add any number of sheets to excel 2023-09-20 00:58:01 +02:00
925f71166c Fix mistake in comparison 2023-09-19 23:46:49 +02:00
f69f415b92 Complete comparison with ZeroR 2023-09-19 17:55:03 +02:00
1bdfbd1620 Complete adding color to format 2023-09-19 14:07:41 +02:00
06fb135526 First approach 2023-09-18 23:26:22 +02:00
501ea0ab4e Fix CMakeList manage build with Linux 2023-09-18 19:27:40 +02:00
847c6761d7 Add Linux specific link library to cmake 2023-09-17 10:42:19 +02:00
6030885fc3 Add partial result filter to manage 2023-09-16 17:27:18 +02:00
89df7f4db0 Add library to manage link 2023-09-14 01:41:49 +02:00
41257ed566 If ! convergence don't predict test 2023-09-10 19:50:36 +02:00
506369e46b Add Convergence hyperparameter 2023-09-07 11:27:35 +02:00
d908f389f5 Begin using validation as finish condition 2023-09-06 10:51:07 +02:00
5a7c8f1818 Add status to classifier and Experiment 2023-09-05 13:39:43 +02:00
64fc7bd9dd Add show dataset detail in report 2023-09-05 09:26:49 +02:00
0b7beda78c Add threads without limit to network fit 2023-09-04 21:24:11 +02:00
05b670dfc0 Add detail to fold progress in main 2023-09-03 16:33:48 +02:00
de62d42b74 Fix make debug command 2023-09-03 14:13:10 +02:00
edb957d22e Add filter complete results to manage 2023-09-03 14:07:11 +02:00
4de5cb4c6c Merge pull request 'Solve Ensemble models exceptions on certain datasets' (#7) from solveexceptions into main
Reviewed-on: #7
2023-09-02 15:29:33 +00:00
c35030f137 Upgrade models version and Add class diagram 2023-09-02 14:39:43 +02:00
182b07ed90 Solve voting vector error 2023-09-02 13:58:12 +02:00
7806f961e2 Remove threads 2023-08-31 20:30:28 +02:00
7c3e315ae7 Add Linux specific options to compile 2023-08-29 18:20:55 +02:00
284ef6dfd1 Add significanceModels to AODELd 2023-08-24 12:58:53 +02:00
1c6af619b5 Exception if hyperparameters not valid 2023-08-24 12:09:35 +02:00
86ffdfd6f3 Add const feature and className to fit models 2023-08-23 23:15:39 +02:00
d82148079d Add KDB hyperparameters K and theta 2023-08-23 00:44:10 +02:00
067430fd1b Add xlsxopen submodule 2023-08-22 23:45:11 +02:00
f5d0d16365 Merge pull request 'Add excel report to manage results' (#6) from xlsx into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/6
2023-08-22 21:40:11 +00:00
97ca8ac084 Move check valid hyperparameters to Classifier 2023-08-22 22:12:20 +02:00
1c1385b768 Fix maxModels mistake in BoostAODE if !repeatSp
Throw exception if wrong hyperparmeter is supplied
2023-08-22 21:55:17 +02:00
35432b6294 Fix time std was not saved in experiment 2023-08-22 12:30:27 +02:00
c59dd30e53 Complete Excel Report with data 2023-08-22 11:55:15 +02:00
d2da0ddb88 Create ReportExcel eq to ReportConsole 2023-08-21 17:51:49 +02:00
8066701c3c Refactor Report class into ReportBase & ReportCons 2023-08-21 17:16:29 +02:00
0f66ac73d0 Revert "Refactor Report into ReportBase & ReportConsole"
This reverts commit 4370bf51d7.
2023-08-21 17:15:14 +02:00
4370bf51d7 Refactor Report into ReportBase & ReportConsole 2023-08-21 17:14:23 +02:00
2b7353b9e0 Add default sorting by date in manage 2023-08-21 16:30:10 +02:00
b686b3c9c3 Enhance copy in Makefile 2023-08-21 12:18:23 +02:00
2dd04a6c44 enhance saving results and add Makefile copy 2023-08-21 11:57:45 +02:00
1da83662d0 Always save results 2023-08-21 10:55:20 +02:00
3ac9593c65 Fix mistake in sample 2023-08-20 20:36:46 +02:00
6b317accf1 Add hyperparameters and processing order to Boost 2023-08-20 20:31:23 +02:00
4964aab722 Add hyperparameters management in experiments 2023-08-20 17:57:38 +02:00
7a6ec73d63 Merge pull request 'boostAode' (#5) from boostAode into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/5
Implement boostAODE
add list datasets
add manage results
2023-08-20 09:02:07 +00:00
1a534888d6 Fix report format 2023-08-19 23:30:44 +02:00
59ffd179f4 Fix report format 2023-08-19 21:26:48 +02:00
9972738deb Add list datasets and add locale format 2023-08-19 19:05:16 +02:00
bafcb26bb6 Add manage to build target 2023-08-18 13:43:53 +02:00
2d7999d5f2 Add manage to release targets 2023-08-18 13:43:13 +02:00
a6bb22dfb5 Complete first BoostAODE 2023-08-18 11:50:34 +02:00
704dc937be Remove FeatureSel, add SelectKBest to BayesMetrics 2023-08-16 19:05:18 +02:00
a3e665eed6 make weights double 2023-08-16 12:46:09 +02:00
918a7b4180 Remove unneeded output 2023-08-16 12:36:38 +02:00
80b20f35b4 Fix weights mistakes in computation 2023-08-16 12:32:51 +02:00
4d4780c1d5 Add BoostAODE model based on AODE 2023-08-15 16:16:04 +02:00
fa612c531e Complete Adding weights to Models 2023-08-15 15:59:56 +02:00
24b68f9ae2 Add weigths as parameter 2023-08-15 15:04:56 +02:00
a062ebf445 Merge pull request 'reports' (#4) from reports into boostAode
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/4
2023-08-14 16:58:48 +00:00
2a3fc9aa45 Add colors and enhace input control 2023-08-14 17:03:06 +02:00
55d21294d5 Add class Paths and enhance input 2023-08-14 00:40:31 +02:00
3691cb4a61 Add totals and filter by scoreName and model 2023-08-13 18:13:00 +02:00
054567c65a Add sorting capacity 2023-08-13 17:10:18 +02:00
2729b92f06 Summary list 2023-08-13 16:19:17 +02:00
f26ea1f0ac Add weights to BayesMetrics 2023-08-13 12:56:06 +02:00
af0419c9da First approx with const 1 weights 2023-08-13 00:59:02 +02:00
90c92e5c56 Merge pull request 'Add states as result in Proposal methods' (#3) from optimize_memory into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/3
2023-08-12 14:16:55 +00:00
182b52a887 Add states as result in Proposal methods 2023-08-12 16:16:17 +02:00
6679b90a82 Merge pull request 'optimize_memory' (#2) from optimize_memory into main
Reviewed-on: https://gitea.rmontanana.es:11000/rmontanana/BayesNet/pulls/2
2023-08-12 14:15:03 +00:00
405887f833 Solved Ld poor results 2023-08-12 11:49:18 +02:00
3a85481a5a Redo pass states to Network Fit needed in crossval
fix mistake in headerline (report)
2023-08-12 11:10:53 +02:00
0ad5505c16 Spodeld working with poor accuracy 2023-08-10 02:06:18 +02:00
323444b74a const functions 2023-08-08 01:53:41 +02:00
ef1bffcac3 Fixed normal classifiers 2023-08-07 13:50:11 +02:00
06db8f51ce Refactor library and models to lighten data stored
Refactro Ensemble to inherit from Classifier insted of BaseClassifier
2023-08-07 12:49:37 +02:00
e74565ba01 update clang-tidy 2023-08-07 00:44:12 +02:00
106 changed files with 2216 additions and 4927 deletions

View File

@@ -13,5 +13,4 @@ HeaderFilterRegex: 'src/*'
AnalyzeTemporaryDtors: false
WarningsAsErrors: ''
FormatStyle: file
FormatStyleOptions: ''
...

5
.gitignore vendored
View File

@@ -31,7 +31,10 @@
*.exe
*.out
*.app
build/
build/**
build_*/**
*.dSYM/**
cmake-build*/**
.idea
puml/**
.vscode/settings.json

12
.gitmodules vendored
View File

@@ -1,12 +1,18 @@
[submodule "lib/mdlp"]
path = lib/mdlp
url = https://github.com/rmontanana/mdlp
main = main
update = merge
[submodule "lib/catch2"]
path = lib/catch2
main = v2.x
update = merge
url = https://github.com/catchorg/Catch2.git
[submodule "lib/argparse"]
path = lib/argparse
url = https://github.com/p-ranav/argparse
[submodule "lib/json"]
path = lib/json
url = https://github.com/nlohmann/json.git
master = master
update = merge
[submodule "lib/folding"]
path = lib/folding
url = https://github.com/rmontanana/folding

18
.vscode/c_cpp_properties.json vendored Normal file
View File

@@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "Mac",
"includePath": [
"${workspaceFolder}/**"
],
"defines": [],
"macFrameworkPath": [
"/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks"
],
"cStandard": "c17",
"cppStandard": "c++17",
"compileCommands": "${workspaceFolder}/cmake-build-release/compile_commands.json"
}
],
"version": 4
}

99
.vscode/launch.json vendored
View File

@@ -5,12 +5,12 @@
"type": "lldb",
"request": "launch",
"name": "sample",
"program": "${workspaceFolder}/build/sample/BayesNetSample",
"program": "${workspaceFolder}/build_debug/sample/BayesNetSample",
"args": [
"-d",
"iris",
"-m",
"KDB",
"TANLd",
"-s",
"271",
"-p",
@@ -21,24 +21,103 @@
{
"type": "lldb",
"request": "launch",
"name": "experiment",
"program": "${workspaceFolder}/build/src/Platform/main",
"name": "experimentPy",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"AODELd",
"-p",
"/Users/rmontanana/Code/discretizbench/datasets",
"STree",
"--stratified",
"-d",
"iris"
"iris",
//"--discretize"
// "--hyperparameters",
// "{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "gridsearch",
"program": "${workspaceFolder}/build_debug/src/Platform/b_grid",
"args": [
"-m",
"KDB",
"--discretize",
"--continue",
"glass",
"--only",
"--compute"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "experimentBayes",
"program": "${workspaceFolder}/build_debug/src/Platform/b_main",
"args": [
"-m",
"TAN",
"--stratified",
"--discretize",
"-d",
"iris",
"--hyperparameters",
"{\"repeatSparent\": true, \"maxModels\": 12}"
],
"cwd": "/home/rmontanana/Code/discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "best",
"program": "${workspaceFolder}/build_debug/src/Platform/b_best",
"args": [
"-m",
"BoostAODE",
"-s",
"accuracy",
"--build",
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "manage",
"program": "${workspaceFolder}/build_debug/src/Platform/b_manage",
"args": [
"-n",
"20"
],
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "list",
"program": "${workspaceFolder}/build_debug/src/Platform/b_list",
"args": [],
//"cwd": "/Users/rmontanana/Code/discretizbench",
"cwd": "${workspaceFolder}/../discretizbench",
},
{
"type": "lldb",
"request": "launch",
"name": "test",
"program": "${workspaceFolder}/build_debug/tests/unit_tests",
"args": [
"-c=\"Metrics Test\"",
// "-s",
],
"cwd": "${workspaceFolder}/build/tests",
},
{
"name": "Build & debug active file",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/bayesnet",
"program": "${workspaceFolder}/build_debug/bayesnet",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",

109
.vscode/settings.json vendored
View File

@@ -1,109 +0,0 @@
{
"files.associations": {
"*.rmd": "markdown",
"*.py": "python",
"vector": "cpp",
"__bit_reference": "cpp",
"__bits": "cpp",
"__config": "cpp",
"__debug": "cpp",
"__errc": "cpp",
"__hash_table": "cpp",
"__locale": "cpp",
"__mutex_base": "cpp",
"__node_handle": "cpp",
"__nullptr": "cpp",
"__split_buffer": "cpp",
"__string": "cpp",
"__threading_support": "cpp",
"__tuple": "cpp",
"array": "cpp",
"atomic": "cpp",
"bitset": "cpp",
"cctype": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"compare": "cpp",
"complex": "cpp",
"concepts": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"exception": "cpp",
"initializer_list": "cpp",
"ios": "cpp",
"iosfwd": "cpp",
"istream": "cpp",
"limits": "cpp",
"locale": "cpp",
"memory": "cpp",
"mutex": "cpp",
"new": "cpp",
"optional": "cpp",
"ostream": "cpp",
"ratio": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"string": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"typeinfo": "cpp",
"unordered_map": "cpp",
"variant": "cpp",
"algorithm": "cpp",
"iostream": "cpp",
"iomanip": "cpp",
"numeric": "cpp",
"set": "cpp",
"__tree": "cpp",
"deque": "cpp",
"list": "cpp",
"map": "cpp",
"unordered_set": "cpp",
"any": "cpp",
"condition_variable": "cpp",
"forward_list": "cpp",
"fstream": "cpp",
"stack": "cpp",
"thread": "cpp",
"__memory": "cpp",
"filesystem": "cpp",
"*.toml": "toml",
"utility": "cpp",
"__verbose_abort": "cpp",
"bit": "cpp",
"random": "cpp",
"*.tcc": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory_resource": "cpp",
"format": "cpp",
"valarray": "cpp",
"regex": "cpp",
"span": "cpp",
"cfenv": "cpp",
"cinttypes": "cpp",
"csetjmp": "cpp",
"future": "cpp",
"queue": "cpp",
"typeindex": "cpp",
"shared_mutex": "cpp",
"*.ipp": "cpp",
"cassert": "cpp",
"charconv": "cpp",
"source_location": "cpp",
"ranges": "cpp"
},
"cmake.configureOnOpen": false,
"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools"
}

23
.vscode/tasks.json vendored
View File

@@ -32,6 +32,29 @@
],
"group": "build",
"detail": "Task generated by Debugger."
},
{
"type": "cppbuild",
"label": "C/C++: g++ build active file",
"command": "/usr/bin/g++",
"args": [
"-fdiagnostics-color=always",
"-g",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}"
],
"options": {
"cwd": "${fileDirname}"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "Task generated by Debugger."
}
]
}

23
CHANGELOG.md Normal file
View File

@@ -0,0 +1,23 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.0.1] - 2024-02-12
### Added
- Notes in Classifier class
- BoostAODE: Add note with used features in initialization with feature selection
- BoostAODE: Add note with the number of models
- BoostAODE: Add note with the number of features used to create models if not all features are used
- Test version number in TestBayesModels
- Add tests with feature_select and notes on BoostAODE
### Fixed
- Network predict test
- Network predict_proba test
- Network score test

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.20)
project(BayesNet
VERSION 0.1.0
VERSION 1.0.1
DESCRIPTION "Bayesian Network and basic classifiers Library."
HOMEPAGE_URL "https://github.com/rmontanana/bayesnet"
LANGUAGES CXX
@@ -24,7 +24,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
# Options
# -------
option(ENABLE_CLANG_TIDY "Enable to add clang tidy." OFF)
@@ -34,14 +34,13 @@ option(CODE_COVERAGE "Collect coverage from test library" OFF)
# CMakes modules
# --------------
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_PATH})
include(AddGitSubmodule)
if (CODE_COVERAGE)
enable_testing()
include(CodeCoverage)
MESSAGE("Code coverage enabled")
set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0 -g")
SET(GCC_COVERAGE_LINK_FLAGS " ${GCC_COVERAGE_LINK_FLAGS} -lgcov --coverage")
endif (CODE_COVERAGE)
@@ -53,7 +52,6 @@ endif (ENABLE_CLANG_TIDY)
# ---------------------------------------------
# include(FetchContent)
add_git_submodule("lib/mdlp")
add_git_submodule("lib/argparse")
add_git_submodule("lib/json")
# Subdirectories
@@ -61,12 +59,9 @@ add_git_submodule("lib/json")
add_subdirectory(config)
add_subdirectory(lib/Files)
add_subdirectory(src/BayesNet)
add_subdirectory(src/Platform)
add_subdirectory(sample)
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.hpp)
file(GLOB BayesNet_HEADERS CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.h ${BayesNet_SOURCE_DIR}/BayesNet/*.h)
file(GLOB BayesNet_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cc ${BayesNet_SOURCE_DIR}/src/BayesNet/*.cpp)
file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform/*.cc ${BayesNet_SOURCE_DIR}/src/Platform/*.cpp)
# Testing
# -------
@@ -74,7 +69,6 @@ file(GLOB Platform_SOURCES CONFIGURE_DEPENDS ${BayesNet_SOURCE_DIR}/src/Platform
if (ENABLE_TESTING)
MESSAGE("Testing enabled")
add_git_submodule("lib/catch2")
include(CTest)
add_subdirectory(tests)
endif (ENABLE_TESTING)

View File

@@ -1,6 +1,26 @@
SHELL := /bin/bash
.DEFAULT_GOAL := help
.PHONY: coverage setup help build test
.PHONY: coverage setup help buildr buildd test clean debug release
f_release = build_release
f_debug = build_debug
app_targets = BayesNet
test_targets = unit_tests_bayesnet
n_procs = -j 16
define ClearTests
@for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \
echo ">>> Cleaning $$t..." ; \
rm -f $(f_debug)/tests/$$t ; \
fi ; \
done
@nfiles="$(find . -name "*.gcda" -print0)" ; \
if test "${nfiles}" != "" ; then \
find . -name "*.gcda" -print0 | xargs -0 rm 2>/dev/null ;\
fi ;
endef
setup: ## Install dependencies for tests and coverage
@if [ "$(shell uname)" = "Darwin" ]; then \
@@ -12,48 +32,54 @@ setup: ## Install dependencies for tests and coverage
fi
dependency: ## Create a dependency graph diagram of the project (build/dependency.png)
cd build && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
@echo ">>> Creating dependency graph diagram of the project...";
$(MAKE) debug
cd $(f_debug) && cmake .. --graphviz=dependency.dot && dot -Tpng dependency.dot -o dependency.png
build: ## Build the main and BayesNetSample
cmake --build build -t main -t BayesNetSample -j 32
buildd: ## Build the debug targets
cmake --build $(f_debug) -t $(app_targets) $(n_procs)
clean: ## Clean the debug info
@echo ">>> Cleaning Debug BayesNet ...";
find . -name "*.gcda" -print0 | xargs -0 rm
buildr: ## Build the release targets
cmake --build $(f_release) -t $(app_targets) $(n_procs)
clean: ## Clean the tests info
@echo ">>> Cleaning Debug BayesNet tests...";
$(call ClearTests)
@echo ">>> Done";
debug: ## Build a debug version of the project
@echo ">>> Building Debug BayesNet...";
@if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON; \
cmake --build build -j 32;
@if [ -d ./$(f_debug) ]; then rm -rf ./$(f_debug); fi
@mkdir $(f_debug);
@cmake -S . -B $(f_debug) -D CMAKE_BUILD_TYPE=Debug -D ENABLE_TESTING=ON -D CODE_COVERAGE=ON
@echo ">>> Done";
release: ## Build a Release version of the project
@echo ">>> Building Release BayesNet...";
@if [ -d ./build ]; then rm -rf ./build; fi
@mkdir build;
cmake -S . -B build -D CMAKE_BUILD_TYPE=Release; \
cmake --build build -t main -t BayesNetSample -j 32;
@if [ -d ./$(f_release) ]; then rm -rf ./$(f_release); fi
@mkdir $(f_release);
@cmake -S . -B $(f_release) -D CMAKE_BUILD_TYPE=Release
@echo ">>> Done";
test: ## Run tests
@echo "* Running tests...";
find . -name "*.gcda" -print0 | xargs -0 rm
@cd build; \
cmake --build . --target unit_tests ;
@cd build/tests; \
./unit_tests;
opt = ""
test: ## Run tests (opt="-s") to verbose output the tests, (opt="-c='Test Maximum Spanning Tree'") to run only that section
@echo ">>> Running BayesNet & Platform tests...";
@$(MAKE) clean
@cmake --build $(f_debug) -t $(test_targets) $(n_procs)
@for t in $(test_targets); do \
if [ -f $(f_debug)/tests/$$t ]; then \
cd $(f_debug)/tests ; \
./$$t $(opt) ; \
fi ; \
done
@echo ">>> Done";
coverage: ## Run tests and generate coverage report (build/index.html)
@echo "*Building tests...";
find . -name "*.gcda" -print0 | xargs -0 rm
@cd build; \
cmake --build . --target unit_tests ;
@cd build/tests; \
./unit_tests;
gcovr ;
@echo ">>> Building tests with coverage..."
@$(MAKE) test
@gcovr $(f_debug)/tests
@echo ">>> Done";
help: ## Show help message
@IFS=$$'\n' ; \

View File

@@ -1,5 +1,22 @@
# BayesNet
Bayesian Network Classifier with libtorch from scratch
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
Bayesian Network Classifiers using libtorch from scratch
### Release
```bash
make release
make buildr
```
### Debug & Tests
```bash
make debug
make test
make coverage
```
## 1. Introduction

View File

@@ -1,12 +0,0 @@
digraph BayesNet {
label=<BayesNet >
fontsize=30
fontcolor=blue
labelloc=t
layout=circo
class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ]
class -> sepallength class -> sepalwidth class -> petallength class -> petalwidth petallength [shape=circle]
petallength -> sepallength petalwidth [shape=circle]
sepallength [shape=circle]
sepallength -> sepalwidth sepalwidth [shape=circle]
sepalwidth -> petalwidth }

View File

@@ -11,3 +11,4 @@ static constexpr std::string_view project_name = " @PROJECT_NAME@ ";
static constexpr std::string_view project_version = "@PROJECT_VERSION@";
static constexpr std::string_view project_description = "@PROJECT_DESCRIPTION@";
static constexpr std::string_view git_sha = "@GIT_SHA@";
static constexpr std::string_view data_path = "@BayesNet_SOURCE_DIR@/tests/data/";

View File

@@ -1 +0,0 @@
null

View File

@@ -1,25 +0,0 @@
Type Si
Type Fe
Type RI
Type Na
Type Ba
Type Ca
Type Al
Type K
Type Mg
Fe RI
Fe Ba
Fe Ca
RI Na
RI Ba
RI Ca
RI Al
RI K
RI Mg
Ba Ca
Ba Al
Ca Al
Ca K
Ca Mg
Al K
K Mg

View File

@@ -1,645 +0,0 @@
class att215
class att25
class att131
class att95
class att122
class att17
class att28
class att5
class att121
class att214
class att197
class att116
class att182
class att60
class att168
class att178
class att206
class att89
class att77
class att209
class att73
class att126
class att16
class att74
class att27
class att61
class att20
class att101
class att85
class att76
class att137
class att211
class att143
class att14
class att40
class att210
class att155
class att170
class att160
class att23
class att162
class att203
class att164
class att107
class att62
class att42
class att71
class att128
class att138
class att83
class att171
class att92
class att163
class att49
class att161
class att158
class att176
class att11
class att145
class att4
class att172
class att196
class att58
class att68
class att169
class att80
class att32
class att175
class att87
class att88
class att159
class att18
class att52
class att98
class att136
class att150
class att156
class att110
class att100
class att63
class att148
class att90
class att167
class att35
class att205
class att51
class att21
class att142
class att46
class att134
class att39
class att102
class att208
class att130
class att149
class att96
class att75
class att118
class att78
class att213
class att112
class att38
class att174
class att189
class att70
class att179
class att59
class att79
class att15
class att47
class att124
class att34
class att54
class att191
class att86
class att56
class att151
class att66
class att173
class att44
class att198
class att139
class att216
class att129
class att152
class att69
class att81
class att50
class att153
class att41
class att204
class att188
class att26
class att13
class att117
class att114
class att10
class att64
class att200
class att9
class att3
class att119
class att45
class att104
class att140
class att30
class att183
class att146
class att141
class att202
class att194
class att24
class att147
class att8
class att212
class att123
class att166
class att187
class att127
class att190
class att105
class att106
class att184
class att82
class att2
class att135
class att154
class att111
class att115
class att99
class att22
class att84
class att207
class att94
class att177
class att103
class att93
class att201
class att43
class att36
class att12
class att125
class att165
class att180
class att195
class att157
class att48
class att6
class att113
class att193
class att91
class att72
class att31
class att132
class att33
class att57
class att144
class att192
class att185
class att37
class att53
class att120
class att186
class att199
class att65
class att108
class att133
class att29
class att19
class att7
class att97
class att67
class att55
class att1
class att109
class att181
att215 att25
att215 att131
att215 att95
att25 att131
att25 att121
att25 att73
att25 att61
att25 att85
att25 att169
att25 att13
att131 att95
att131 att122
att131 att17
att131 att28
att131 att121
att131 att214
att131 att116
att131 att126
att131 att143
att95 att122
att95 att17
att95 att28
att95 att5
att95 att214
att95 att116
att95 att60
att95 att143
att95 att155
att95 att71
att122 att182
att122 att170
att17 att5
att17 att197
att17 att89
att17 att77
att17 att161
att28 att206
att28 att16
att28 att76
att28 att172
att28 att124
att28 att64
att5 att197
att5 att89
att5 att209
att121 att73
att214 att178
att214 att58
att214 att142
att197 att209
att197 att101
att116 att182
att116 att60
att116 att168
att116 att178
att116 att206
att116 att126
att116 att16
att116 att27
att116 att20
att116 att211
att116 att164
att116 att128
att182 att27
att182 att14
att60 att168
att60 att156
att168 att156
att168 att96
att178 att20
att178 att58
att178 att142
att178 att130
att206 att74
att206 att170
att206 att158
att89 att77
att89 att137
att89 att149
att89 att173
att77 att137
att77 att161
att209 att101
att209 att41
att73 att61
att73 att157
att126 att162
att126 att138
att126 att150
att16 att74
att16 att76
att16 att40
att16 att4
att74 att14
att74 att62
att27 att171
att61 att85
att61 att169
att20 att211
att20 att210
att20 att164
att20 att176
att101 att41
att85 att13
att76 att40
att76 att160
att137 att149
att211 att210
att211 att162
att211 att171
att211 att163
att211 att175
att211 att79
att143 att155
att143 att23
att143 att71
att143 att83
att143 att11
att14 att98
att40 att160
att40 att4
att40 att196
att40 att52
att210 att42
att210 att114
att155 att23
att155 att203
att155 att107
att155 att11
att170 att158
att160 att52
att23 att203
att162 att138
att162 att18
att162 att150
att162 att90
att162 att174
att203 att107
att203 att49
att203 att59
att203 att191
att203 att119
att164 att62
att164 att42
att164 att128
att164 att92
att164 att163
att164 att176
att164 att145
att164 att68
att164 att80
att164 att98
att164 att110
att164 att205
att164 att21
att164 att213
att164 att112
att164 att38
att164 att56
att164 att44
att107 att59
att107 att47
att107 att191
att71 att83
att71 att167
att71 att35
att128 att92
att138 att18
att83 att167
att171 att87
att171 att159
att171 att63
att171 att51
att171 att39
att171 att75
att163 att49
att163 att175
att163 att87
att163 att79
att163 att151
att163 att139
att163 att187
att163 att91
att161 att173
att176 att145
att176 att172
att176 att68
att176 att80
att176 att32
att176 att110
att176 att205
att176 att21
att176 att134
att176 att56
att4 att196
att4 att88
att4 att136
att4 att100
att4 att148
att4 att208
att172 att112
att172 att184
att196 att88
att196 att136
att196 att100
att196 att208
att58 att46
att68 att32
att32 att200
att87 att159
att87 att63
att87 att75
att87 att15
att87 att99
att159 att195
att18 att90
att18 att102
att18 att78
att18 att198
att52 att124
att98 att86
att150 att174
att150 att66
att156 att96
att156 att216
att156 att204
att156 att24
att156 att84
att100 att148
att63 att51
att63 att3
att63 att183
att90 att102
att90 att78
att167 att35
att167 att179
att35 att179
att51 att39
att51 att3
att21 att134
att21 att213
att21 att38
att21 att189
att21 att129
att21 att81
att21 att117
att21 att9
att142 att46
att142 att130
att142 att118
att142 att10
att142 att202
att142 att190
att142 att106
att46 att70
att46 att34
att46 att166
att134 att2
att102 att54
att130 att118
att130 att10
att130 att202
att149 att125
att96 att216
att96 att24
att75 att15
att75 att99
att118 att70
att78 att198
att213 att189
att38 att50
att38 att26
att174 att54
att174 att66
att174 att30
att189 att86
att189 att129
att189 att69
att189 att81
att189 att153
att189 att117
att189 att9
att189 att45
att189 att105
att70 att34
att59 att47
att79 att151
att79 att139
att79 att187
att79 att127
att79 att103
att79 att43
att79 att91
att79 att19
att124 att64
att54 att114
att54 att30
att191 att119
att86 att194
att56 att44
att56 att152
att56 att50
att56 att188
att56 att26
att56 att104
att56 att140
att56 att146
att56 att194
att56 att8
att56 att2
att56 att133
att56 att1
att173 att125
att173 att113
att44 att152
att44 att188
att44 att200
att44 att212
att44 att1
att139 att103
att139 att43
att139 att31
att139 att199
att139 att7
att216 att204
att216 att36
att216 att12
att216 att180
att216 att108
att129 att69
att152 att140
att69 att153
att81 att45
att153 att141
att41 att53
att204 att12
att13 att157
att114 att6
att114 att186
att10 att190
att64 att184
att200 att104
att9 att146
att9 att141
att9 att177
att9 att37
att9 att133
att9 att109
att9 att181
att3 att183
att3 att147
att3 att123
att3 att135
att3 att111
att45 att105
att45 att177
att45 att93
att45 att201
att45 att193
att45 att37
att45 att97
att140 att8
att30 att6
att183 att147
att183 att123
att202 att166
att202 att106
att202 att82
att24 att84
att24 att36
att147 att135
att8 att212
att166 att82
att187 att127
att187 att115
att127 att115
att105 att93
att106 att154
att82 att154
att82 att22
att135 att111
att135 att207
att154 att22
att154 att94
att111 att207
att22 att94
att84 att48
att177 att165
att103 att195
att103 att109
att93 att201
att93 att165
att93 att193
att93 att33
att201 att33
att201 att57
att36 att180
att36 att72
att36 att132
att36 att144
att125 att113
att125 att185
att125 att65
att125 att29
att180 att48
att180 att72
att180 att192
att180 att108
att6 att186
att113 att185
att113 att53
att193 att97
att91 att31
att91 att19
att72 att132
att72 att192
att31 att199
att31 att67
att132 att144
att132 att120
att33 att57
att144 att120
att185 att65
att199 att7
att199 att67
att199 att55
att65 att29
att67 att55
att109 att181

View File

@@ -1,859 +0,0 @@
class att215
class att25
class att131
class att95
class att122
class att17
class att28
class att5
class att121
class att214
class att197
class att116
class att182
class att60
class att168
class att178
class att206
class att89
class att77
class att209
class att73
class att126
class att16
class att74
class att27
class att61
class att20
class att101
class att85
class att76
class att137
class att211
class att143
class att14
class att40
class att210
class att155
class att170
class att160
class att23
class att162
class att203
class att164
class att107
class att62
class att42
class att71
class att128
class att138
class att83
class att171
class att92
class att163
class att49
class att161
class att158
class att176
class att11
class att145
class att4
class att172
class att196
class att58
class att68
class att169
class att80
class att32
class att175
class att87
class att88
class att159
class att18
class att52
class att98
class att136
class att150
class att156
class att110
class att100
class att63
class att148
class att90
class att167
class att35
class att205
class att51
class att21
class att142
class att46
class att134
class att39
class att102
class att208
class att130
class att149
class att96
class att75
class att118
class att78
class att213
class att112
class att38
class att174
class att189
class att70
class att179
class att59
class att79
class att15
class att47
class att124
class att34
class att54
class att191
class att86
class att56
class att151
class att66
class att173
class att44
class att198
class att139
class att216
class att129
class att152
class att69
class att81
class att50
class att153
class att41
class att204
class att188
class att26
class att13
class att117
class att114
class att10
class att64
class att200
class att9
class att3
class att119
class att45
class att104
class att140
class att30
class att183
class att146
class att141
class att202
class att194
class att24
class att147
class att8
class att212
class att123
class att166
class att187
class att127
class att190
class att105
class att106
class att184
class att82
class att2
class att135
class att154
class att111
class att115
class att99
class att22
class att84
class att207
class att94
class att177
class att103
class att93
class att201
class att43
class att36
class att12
class att125
class att165
class att180
class att195
class att157
class att48
class att6
class att113
class att193
class att91
class att72
class att31
class att132
class att33
class att57
class att144
class att192
class att185
class att37
class att53
class att120
class att186
class att199
class att65
class att108
class att133
class att29
class att19
class att7
class att97
class att67
class att55
class att1
class att109
class att181
att215 att25
att215 att131
att215 att95
att215 att17
att215 att214
att215 att143
att25 att131
att25 att95
att25 att122
att25 att121
att25 att73
att25 att61
att25 att85
att25 att169
att25 att13
att25 att157
att131 att95
att131 att122
att131 att17
att131 att28
att131 att5
att131 att121
att131 att214
att131 att116
att131 att182
att131 att60
att131 att126
att131 att16
att131 att27
att131 att20
att131 att143
att131 att155
att95 att122
att95 att17
att95 att28
att95 att5
att95 att121
att95 att214
att95 att197
att95 att116
att95 att60
att95 att168
att95 att178
att95 att143
att95 att155
att95 att23
att95 att71
att95 att167
att122 att28
att122 att182
att122 att170
att17 att5
att17 att197
att17 att89
att17 att77
att17 att209
att17 att137
att17 att161
att17 att41
att28 att206
att28 att16
att28 att76
att28 att40
att28 att210
att28 att160
att28 att172
att28 att124
att28 att64
att5 att197
att5 att89
att5 att77
att5 att209
att5 att101
att121 att73
att121 att61
att214 att116
att214 att178
att214 att206
att214 att58
att214 att142
att214 att46
att197 att89
att197 att209
att197 att101
att116 att182
att116 att60
att116 att168
att116 att178
att116 att206
att116 att73
att116 att126
att116 att16
att116 att74
att116 att27
att116 att20
att116 att211
att116 att164
att116 att128
att116 att92
att116 att176
att116 att68
att182 att27
att182 att14
att60 att168
att60 att156
att60 att96
att168 att126
att168 att156
att168 att96
att168 att216
att178 att20
att178 att211
att178 att58
att178 att142
att178 att130
att178 att166
att206 att74
att206 att170
att206 att158
att89 att77
att89 att137
att89 att149
att89 att173
att77 att137
att77 att161
att77 att149
att209 att101
att209 att41
att73 att61
att73 att85
att73 att13
att73 att157
att126 att162
att126 att138
att126 att18
att126 att150
att16 att74
att16 att76
att16 att40
att16 att4
att16 att196
att16 att136
att74 att14
att74 att62
att27 att171
att27 att63
att61 att85
att61 att169
att20 att76
att20 att211
att20 att210
att20 att170
att20 att164
att20 att128
att20 att176
att20 att80
att101 att41
att85 att169
att85 att13
att76 att14
att76 att40
att76 att160
att76 att4
att76 att52
att137 att161
att137 att149
att137 att173
att137 att125
att211 att210
att211 att162
att211 att164
att211 att62
att211 att42
att211 att171
att211 att163
att211 att175
att211 att79
att211 att151
att211 att43
att143 att155
att143 att23
att143 att203
att143 att71
att143 att83
att143 att11
att14 att98
att40 att160
att40 att4
att40 att196
att40 att88
att40 att52
att210 att162
att210 att42
att210 att114
att155 att23
att155 att203
att155 att107
att155 att11
att170 att158
att160 att52
att160 att124
att23 att203
att23 att107
att23 att71
att23 att11
att162 att138
att162 att18
att162 att150
att162 att90
att162 att102
att162 att174
att162 att66
att203 att107
att203 att49
att203 att59
att203 att47
att203 att191
att203 att119
att164 att62
att164 att42
att164 att128
att164 att171
att164 att92
att164 att163
att164 att158
att164 att176
att164 att145
att164 att172
att164 att58
att164 att68
att164 att80
att164 att32
att164 att98
att164 att156
att164 att110
att164 att205
att164 att21
att164 att134
att164 att213
att164 att112
att164 att38
att164 att189
att164 att56
att164 att44
att164 att152
att164 att8
att107 att83
att107 att49
att107 att59
att107 att47
att107 att191
att42 att138
att42 att54
att42 att114
att71 att83
att71 att167
att71 att35
att71 att179
att128 att92
att128 att112
att138 att18
att138 att150
att83 att167
att83 att35
att171 att87
att171 att159
att171 att63
att171 att51
att171 att39
att171 att75
att92 att163
att92 att145
att92 att56
att163 att49
att163 att175
att163 att87
att163 att79
att163 att151
att163 att139
att163 att187
att163 att127
att163 att103
att163 att91
att49 att37
att161 att173
att161 att113
att176 att145
att176 att172
att176 att68
att176 att80
att176 att32
att176 att175
att176 att98
att176 att110
att176 att205
att176 att21
att176 att134
att176 att213
att176 att56
att4 att196
att4 att88
att4 att136
att4 att100
att4 att148
att4 att208
att172 att112
att172 att184
att196 att88
att196 att136
att196 att100
att196 att148
att196 att208
att58 att142
att58 att46
att58 att34
att68 att32
att80 att38
att32 att110
att32 att21
att32 att44
att32 att200
att175 att87
att175 att159
att175 att79
att175 att187
att175 att115
att87 att159
att87 att63
att87 att51
att87 att75
att87 att15
att87 att99
att159 att75
att159 att15
att159 att195
att18 att90
att18 att102
att18 att78
att18 att198
att52 att124
att52 att64
att98 att86
att136 att100
att136 att208
att150 att90
att150 att174
att150 att66
att156 att205
att156 att96
att156 att216
att156 att204
att156 att24
att156 att84
att156 att36
att156 att12
att156 att108
att100 att148
att63 att51
att63 att39
att63 att3
att63 att183
att63 att147
att90 att102
att90 att78
att167 att35
att167 att179
att35 att179
att51 att39
att51 att3
att51 att183
att21 att134
att21 att213
att21 att38
att21 att189
att21 att129
att21 att81
att21 att153
att21 att117
att21 att9
att142 att46
att142 att130
att142 att118
att142 att70
att142 att10
att142 att202
att142 att190
att142 att106
att46 att130
att46 att118
att46 att70
att46 att34
att46 att166
att46 att82
att134 att2
att39 att3
att102 att78
att102 att174
att102 att54
att102 att198
att130 att118
att130 att10
att130 att202
att130 att190
att130 att106
att149 att125
att96 att216
att96 att204
att96 att24
att75 att15
att75 att99
att118 att70
att118 att10
att118 att202
att78 att198
att213 att189
att213 att129
att213 att69
att213 att81
att38 att50
att38 att26
att174 att54
att174 att66
att174 att30
att189 att86
att189 att129
att189 att69
att189 att81
att189 att153
att189 att117
att189 att9
att189 att45
att189 att141
att189 att105
att70 att34
att70 att154
att179 att59
att59 att47
att59 att191
att59 att119
att79 att86
att79 att151
att79 att139
att79 att187
att79 att127
att79 att103
att79 att43
att79 att193
att79 att91
att79 att19
att124 att64
att54 att114
att54 att30
att54 att6
att191 att119
att86 att194
att56 att44
att56 att152
att56 att50
att56 att188
att56 att26
att56 att200
att56 att104
att56 att140
att56 att146
att56 att194
att56 att8
att56 att2
att56 att133
att56 att1
att151 att139
att66 att30
att173 att125
att173 att113
att173 att185
att44 att152
att44 att50
att44 att188
att44 att200
att44 att104
att44 att140
att44 att194
att44 att212
att44 att1
att139 att26
att139 att99
att139 att103
att139 att43
att139 att91
att139 att31
att139 att199
att139 att7
att216 att204
att216 att24
att216 att84
att216 att36
att216 att12
att216 att180
att216 att108
att129 att69
att152 att188
att152 att140
att69 att153
att69 att9
att69 att177
att81 att45
att81 att105
att153 att117
att153 att141
att41 att53
att204 att12
att204 att180
att188 att146
att188 att212
att13 att157
att114 att6
att114 att186
att10 att190
att64 att184
att200 att104
att9 att45
att9 att146
att9 att141
att9 att177
att9 att37
att9 att133
att9 att109
att9 att181
att3 att183
att3 att147
att3 att123
att3 att135
att3 att111
att45 att105
att45 att177
att45 att93
att45 att201
att45 att165
att45 att193
att45 att33
att45 att37
att45 att133
att45 att97
att140 att8
att30 att6
att30 att186
att183 att147
att183 att123
att183 att135
att146 att2
att202 att166
att202 att106
att202 att82
att24 att84
att24 att36
att24 att132
att147 att123
att147 att135
att147 att111
att147 att207
att8 att212
att166 att82
att166 att22
att166 att94
att187 att127
att187 att115
att127 att115
att105 att184
att105 att93
att105 att201
att106 att154
att82 att154
att82 att22
att135 att111
att135 att207
att154 att22
att154 att94
att111 att207
att99 att195
att22 att94
att84 att48
att177 att93
att177 att165
att177 att181
att103 att195
att103 att97
att103 att109
att93 att201
att93 att165
att93 att193
att93 att33
att93 att57
att201 att33
att201 att57
att43 att31
att36 att180
att36 att48
att36 att72
att36 att132
att36 att144
att125 att113
att125 att185
att125 att65
att125 att29
att180 att48
att180 att72
att180 att192
att180 att108
att48 att72
att6 att186
att113 att185
att113 att53
att113 att65
att193 att97
att91 att31
att91 att199
att91 att19
att72 att132
att72 att144
att72 att192
att72 att120
att31 att199
att31 att7
att31 att67
att31 att55
att31 att1
att132 att144
att132 att120
att33 att57
att144 att192
att144 att120
att185 att53
att185 att65
att185 att29
att199 att19
att199 att7
att199 att67
att199 att55
att199 att109
att65 att29
att7 att67
att67 att55
att109 att181

View File

@@ -1,859 +0,0 @@
class att215
class att25
class att131
class att95
class att122
class att17
class att28
class att5
class att121
class att214
class att197
class att116
class att182
class att60
class att168
class att178
class att206
class att89
class att77
class att209
class att73
class att126
class att16
class att74
class att27
class att61
class att20
class att101
class att85
class att76
class att137
class att211
class att143
class att14
class att40
class att210
class att155
class att170
class att160
class att23
class att162
class att203
class att164
class att107
class att62
class att42
class att71
class att128
class att138
class att83
class att171
class att92
class att163
class att49
class att161
class att158
class att176
class att11
class att145
class att4
class att172
class att196
class att58
class att68
class att169
class att80
class att32
class att175
class att87
class att88
class att159
class att18
class att52
class att98
class att136
class att150
class att156
class att110
class att100
class att63
class att148
class att90
class att167
class att35
class att205
class att51
class att21
class att142
class att46
class att134
class att39
class att102
class att208
class att130
class att149
class att96
class att75
class att118
class att78
class att213
class att112
class att38
class att174
class att189
class att70
class att179
class att59
class att79
class att15
class att47
class att124
class att34
class att54
class att191
class att86
class att56
class att151
class att66
class att173
class att44
class att198
class att139
class att216
class att129
class att152
class att69
class att81
class att50
class att153
class att41
class att204
class att188
class att26
class att13
class att117
class att114
class att10
class att64
class att200
class att9
class att3
class att119
class att45
class att104
class att140
class att30
class att183
class att146
class att141
class att202
class att194
class att24
class att147
class att8
class att212
class att123
class att166
class att187
class att127
class att190
class att105
class att106
class att184
class att82
class att2
class att135
class att154
class att111
class att115
class att99
class att22
class att84
class att207
class att94
class att177
class att103
class att93
class att201
class att43
class att36
class att12
class att125
class att165
class att180
class att195
class att157
class att48
class att6
class att113
class att193
class att91
class att72
class att31
class att132
class att33
class att57
class att144
class att192
class att185
class att37
class att53
class att120
class att186
class att199
class att65
class att108
class att133
class att29
class att19
class att7
class att97
class att67
class att55
class att1
class att109
class att181
att215 att25
att215 att131
att215 att95
att215 att17
att215 att214
att215 att143
att25 att131
att25 att95
att25 att122
att25 att121
att25 att73
att25 att61
att25 att85
att25 att169
att25 att13
att25 att157
att131 att95
att131 att122
att131 att17
att131 att28
att131 att5
att131 att121
att131 att214
att131 att116
att131 att182
att131 att60
att131 att126
att131 att16
att131 att27
att131 att20
att131 att143
att131 att155
att95 att122
att95 att17
att95 att28
att95 att5
att95 att121
att95 att214
att95 att197
att95 att116
att95 att60
att95 att168
att95 att178
att95 att143
att95 att155
att95 att23
att95 att71
att95 att167
att122 att28
att122 att182
att122 att170
att17 att5
att17 att197
att17 att89
att17 att77
att17 att209
att17 att137
att17 att161
att17 att41
att28 att206
att28 att16
att28 att76
att28 att40
att28 att210
att28 att160
att28 att172
att28 att124
att28 att64
att5 att197
att5 att89
att5 att77
att5 att209
att5 att101
att121 att73
att121 att61
att214 att116
att214 att178
att214 att206
att214 att58
att214 att142
att214 att46
att197 att89
att197 att209
att197 att101
att116 att182
att116 att60
att116 att168
att116 att178
att116 att206
att116 att73
att116 att126
att116 att16
att116 att74
att116 att27
att116 att20
att116 att211
att116 att164
att116 att128
att116 att92
att116 att176
att116 att68
att182 att27
att182 att14
att60 att168
att60 att156
att60 att96
att168 att126
att168 att156
att168 att96
att168 att216
att178 att20
att178 att211
att178 att58
att178 att142
att178 att130
att178 att166
att206 att74
att206 att170
att206 att158
att89 att77
att89 att137
att89 att149
att89 att173
att77 att137
att77 att161
att77 att149
att209 att101
att209 att41
att73 att61
att73 att85
att73 att13
att73 att157
att126 att162
att126 att138
att126 att18
att126 att150
att16 att74
att16 att76
att16 att40
att16 att4
att16 att196
att16 att136
att74 att14
att74 att62
att27 att171
att27 att63
att61 att85
att61 att169
att20 att76
att20 att211
att20 att210
att20 att170
att20 att164
att20 att128
att20 att176
att20 att80
att101 att41
att85 att169
att85 att13
att76 att14
att76 att40
att76 att160
att76 att4
att76 att52
att137 att161
att137 att149
att137 att173
att137 att125
att211 att210
att211 att162
att211 att164
att211 att62
att211 att42
att211 att171
att211 att163
att211 att175
att211 att79
att211 att151
att211 att43
att143 att155
att143 att23
att143 att203
att143 att71
att143 att83
att143 att11
att14 att98
att40 att160
att40 att4
att40 att196
att40 att88
att40 att52
att210 att162
att210 att42
att210 att114
att155 att23
att155 att203
att155 att107
att155 att11
att170 att158
att160 att52
att160 att124
att23 att203
att23 att107
att23 att71
att23 att11
att162 att138
att162 att18
att162 att150
att162 att90
att162 att102
att162 att174
att162 att66
att203 att107
att203 att49
att203 att59
att203 att47
att203 att191
att203 att119
att164 att62
att164 att42
att164 att128
att164 att171
att164 att92
att164 att163
att164 att158
att164 att176
att164 att145
att164 att172
att164 att58
att164 att68
att164 att80
att164 att32
att164 att98
att164 att156
att164 att110
att164 att205
att164 att21
att164 att134
att164 att213
att164 att112
att164 att38
att164 att189
att164 att56
att164 att44
att164 att152
att164 att8
att107 att83
att107 att49
att107 att59
att107 att47
att107 att191
att42 att138
att42 att54
att42 att114
att71 att83
att71 att167
att71 att35
att71 att179
att128 att92
att128 att112
att138 att18
att138 att150
att83 att167
att83 att35
att171 att87
att171 att159
att171 att63
att171 att51
att171 att39
att171 att75
att92 att163
att92 att145
att92 att56
att163 att49
att163 att175
att163 att87
att163 att79
att163 att151
att163 att139
att163 att187
att163 att127
att163 att103
att163 att91
att49 att37
att161 att173
att161 att113
att176 att145
att176 att172
att176 att68
att176 att80
att176 att32
att176 att175
att176 att98
att176 att110
att176 att205
att176 att21
att176 att134
att176 att213
att176 att56
att4 att196
att4 att88
att4 att136
att4 att100
att4 att148
att4 att208
att172 att112
att172 att184
att196 att88
att196 att136
att196 att100
att196 att148
att196 att208
att58 att142
att58 att46
att58 att34
att68 att32
att80 att38
att32 att110
att32 att21
att32 att44
att32 att200
att175 att87
att175 att159
att175 att79
att175 att187
att175 att115
att87 att159
att87 att63
att87 att51
att87 att75
att87 att15
att87 att99
att159 att75
att159 att15
att159 att195
att18 att90
att18 att102
att18 att78
att18 att198
att52 att124
att52 att64
att98 att86
att136 att100
att136 att208
att150 att90
att150 att174
att150 att66
att156 att205
att156 att96
att156 att216
att156 att204
att156 att24
att156 att84
att156 att36
att156 att12
att156 att108
att100 att148
att63 att51
att63 att39
att63 att3
att63 att183
att63 att147
att90 att102
att90 att78
att167 att35
att167 att179
att35 att179
att51 att39
att51 att3
att51 att183
att21 att134
att21 att213
att21 att38
att21 att189
att21 att129
att21 att81
att21 att153
att21 att117
att21 att9
att142 att46
att142 att130
att142 att118
att142 att70
att142 att10
att142 att202
att142 att190
att142 att106
att46 att130
att46 att118
att46 att70
att46 att34
att46 att166
att46 att82
att134 att2
att39 att3
att102 att78
att102 att174
att102 att54
att102 att198
att130 att118
att130 att10
att130 att202
att130 att190
att130 att106
att149 att125
att96 att216
att96 att204
att96 att24
att75 att15
att75 att99
att118 att70
att118 att10
att118 att202
att78 att198
att213 att189
att213 att129
att213 att69
att213 att81
att38 att50
att38 att26
att174 att54
att174 att66
att174 att30
att189 att86
att189 att129
att189 att69
att189 att81
att189 att153
att189 att117
att189 att9
att189 att45
att189 att141
att189 att105
att70 att34
att70 att154
att179 att59
att59 att47
att59 att191
att59 att119
att79 att86
att79 att151
att79 att139
att79 att187
att79 att127
att79 att103
att79 att43
att79 att193
att79 att91
att79 att19
att124 att64
att54 att114
att54 att30
att54 att6
att191 att119
att86 att194
att56 att44
att56 att152
att56 att50
att56 att188
att56 att26
att56 att200
att56 att104
att56 att140
att56 att146
att56 att194
att56 att8
att56 att2
att56 att133
att56 att1
att151 att139
att66 att30
att173 att125
att173 att113
att173 att185
att44 att152
att44 att50
att44 att188
att44 att200
att44 att104
att44 att140
att44 att194
att44 att212
att44 att1
att139 att26
att139 att99
att139 att103
att139 att43
att139 att91
att139 att31
att139 att199
att139 att7
att216 att204
att216 att24
att216 att84
att216 att36
att216 att12
att216 att180
att216 att108
att129 att69
att152 att188
att152 att140
att69 att153
att69 att9
att69 att177
att81 att45
att81 att105
att153 att117
att153 att141
att41 att53
att204 att12
att204 att180
att188 att146
att188 att212
att13 att157
att114 att6
att114 att186
att10 att190
att64 att184
att200 att104
att9 att45
att9 att146
att9 att141
att9 att177
att9 att37
att9 att133
att9 att109
att9 att181
att3 att183
att3 att147
att3 att123
att3 att135
att3 att111
att45 att105
att45 att177
att45 att93
att45 att201
att45 att165
att45 att193
att45 att33
att45 att37
att45 att133
att45 att97
att140 att8
att30 att6
att30 att186
att183 att147
att183 att123
att183 att135
att146 att2
att202 att166
att202 att106
att202 att82
att24 att84
att24 att36
att24 att132
att147 att123
att147 att135
att147 att111
att147 att207
att8 att212
att166 att82
att166 att22
att166 att94
att187 att127
att187 att115
att127 att115
att105 att184
att105 att93
att105 att201
att106 att154
att82 att154
att82 att22
att135 att111
att135 att207
att154 att22
att154 att94
att111 att207
att99 att195
att22 att94
att84 att48
att177 att93
att177 att165
att177 att181
att103 att195
att103 att97
att103 att109
att93 att201
att93 att165
att93 att193
att93 att33
att93 att57
att201 att33
att201 att57
att43 att31
att36 att180
att36 att48
att36 att72
att36 att132
att36 att144
att125 att113
att125 att185
att125 att65
att125 att29
att180 att48
att180 att72
att180 att192
att180 att108
att48 att72
att6 att186
att113 att185
att113 att53
att113 att65
att193 att97
att91 att31
att91 att199
att91 att19
att72 att132
att72 att144
att72 att192
att72 att120
att31 att199
att31 att7
att31 att67
att31 att55
att31 att1
att132 att144
att132 att120
att33 att57
att144 att192
att144 att120
att185 att53
att185 att65
att185 att29
att199 att19
att199 att7
att199 att67
att199 att55
att199 att109
att65 att29
att7 att67
att67 att55
att109 att181

BIN
diagrams/BayesNet.pdf Executable file

Binary file not shown.

View File

@@ -1,4 +1,4 @@
filter = src/
exclude-directories = build/lib/
exclude-directories = build_debug/lib/
print-summary = yes
sort-percentage = yes

View File

@@ -4,11 +4,9 @@
#include <map>
#include <iostream>
using namespace std;
ArffFiles::ArffFiles() = default;
vector<string> ArffFiles::getLines() const
std::vector<std::string> ArffFiles::getLines() const
{
return lines;
}
@@ -18,48 +16,48 @@ unsigned long int ArffFiles::getSize() const
return lines.size();
}
vector<pair<string, string>> ArffFiles::getAttributes() const
std::vector<std::pair<std::string, std::string>> ArffFiles::getAttributes() const
{
return attributes;
}
string ArffFiles::getClassName() const
std::string ArffFiles::getClassName() const
{
return className;
}
string ArffFiles::getClassType() const
std::string ArffFiles::getClassType() const
{
return classType;
}
vector<vector<float>>& ArffFiles::getX()
std::vector<std::vector<float>>& ArffFiles::getX()
{
return X;
}
vector<int>& ArffFiles::getY()
std::vector<int>& ArffFiles::getY()
{
return y;
}
void ArffFiles::loadCommon(string fileName)
void ArffFiles::loadCommon(std::string fileName)
{
ifstream file(fileName);
std::ifstream file(fileName);
if (!file.is_open()) {
throw invalid_argument("Unable to open file");
throw std::invalid_argument("Unable to open file");
}
string line;
string keyword;
string attribute;
string type;
string type_w;
std::string line;
std::string keyword;
std::string attribute;
std::string type;
std::string type_w;
while (getline(file, line)) {
if (line.empty() || line[0] == '%' || line == "\r" || line == " ") {
continue;
}
if (line.find("@attribute") != string::npos || line.find("@ATTRIBUTE") != string::npos) {
stringstream ss(line);
if (line.find("@attribute") != std::string::npos || line.find("@ATTRIBUTE") != std::string::npos) {
std::stringstream ss(line);
ss >> keyword >> attribute;
type = "";
while (ss >> type_w)
@@ -74,35 +72,35 @@ void ArffFiles::loadCommon(string fileName)
}
file.close();
if (attributes.empty())
throw invalid_argument("No attributes found");
throw std::invalid_argument("No attributes found");
}
void ArffFiles::load(const string& fileName, bool classLast)
void ArffFiles::load(const std::string& fileName, bool classLast)
{
int labelIndex;
loadCommon(fileName);
if (classLast) {
className = get<0>(attributes.back());
classType = get<1>(attributes.back());
className = std::get<0>(attributes.back());
classType = std::get<1>(attributes.back());
attributes.pop_back();
labelIndex = static_cast<int>(attributes.size());
} else {
className = get<0>(attributes.front());
classType = get<1>(attributes.front());
className = std::get<0>(attributes.front());
classType = std::get<1>(attributes.front());
attributes.erase(attributes.begin());
labelIndex = 0;
}
generateDataset(labelIndex);
}
void ArffFiles::load(const string& fileName, const string& name)
void ArffFiles::load(const std::string& fileName, const std::string& name)
{
int labelIndex;
loadCommon(fileName);
bool found = false;
for (int i = 0; i < attributes.size(); ++i) {
if (attributes[i].first == name) {
className = get<0>(attributes[i]);
classType = get<1>(attributes[i]);
className = std::get<0>(attributes[i]);
classType = std::get<1>(attributes[i]);
attributes.erase(attributes.begin() + i);
labelIndex = i;
found = true;
@@ -110,19 +108,19 @@ void ArffFiles::load(const string& fileName, const string& name)
}
}
if (!found) {
throw invalid_argument("Class name not found");
throw std::invalid_argument("Class name not found");
}
generateDataset(labelIndex);
}
void ArffFiles::generateDataset(int labelIndex)
{
X = vector<vector<float>>(attributes.size(), vector<float>(lines.size()));
auto yy = vector<string>(lines.size(), "");
auto removeLines = vector<int>(); // Lines with missing values
X = std::vector<std::vector<float>>(attributes.size(), std::vector<float>(lines.size()));
auto yy = std::vector<std::string>(lines.size(), "");
auto removeLines = std::vector<int>(); // Lines with missing values
for (size_t i = 0; i < lines.size(); i++) {
stringstream ss(lines[i]);
string value;
std::stringstream ss(lines[i]);
std::string value;
int pos = 0;
int xIndex = 0;
while (getline(ss, value, ',')) {
@@ -146,21 +144,21 @@ void ArffFiles::generateDataset(int labelIndex)
y = factorize(yy);
}
string ArffFiles::trim(const string& source)
std::string ArffFiles::trim(const std::string& source)
{
string s(source);
std::string s(source);
s.erase(0, s.find_first_not_of(" '\n\r\t"));
s.erase(s.find_last_not_of(" '\n\r\t") + 1);
return s;
}
vector<int> ArffFiles::factorize(const vector<string>& labels_t)
std::vector<int> ArffFiles::factorize(const std::vector<std::string>& labels_t)
{
vector<int> yy;
std::vector<int> yy;
yy.reserve(labels_t.size());
map<string, int> labelMap;
std::map<std::string, int> labelMap;
int i = 0;
for (const string& label : labels_t) {
for (const std::string& label : labels_t) {
if (labelMap.find(label) == labelMap.end()) {
labelMap[label] = i++;
}

View File

@@ -4,31 +4,29 @@
#include <string>
#include <vector>
using namespace std;
class ArffFiles {
private:
vector<string> lines;
vector<pair<string, string>> attributes;
string className;
string classType;
vector<vector<float>> X;
vector<int> y;
std::vector<std::string> lines;
std::vector<std::pair<std::string, std::string>> attributes;
std::string className;
std::string classType;
std::vector<std::vector<float>> X;
std::vector<int> y;
void generateDataset(int);
void loadCommon(string);
void loadCommon(std::string);
public:
ArffFiles();
void load(const string&, bool = true);
void load(const string&, const string&);
vector<string> getLines() const;
void load(const std::string&, bool = true);
void load(const std::string&, const std::string&);
std::vector<std::string> getLines() const;
unsigned long int getSize() const;
string getClassName() const;
string getClassType() const;
static string trim(const string&);
vector<vector<float>>& getX();
vector<int>& getY();
vector<pair<string, string>> getAttributes() const;
static vector<int> factorize(const vector<string>& labels_t);
std::string getClassName() const;
std::string getClassType() const;
static std::string trim(const std::string&);
std::vector<std::vector<float>>& getX();
std::vector<int>& getY();
std::vector<std::pair<std::string, std::string>> getAttributes() const;
static std::vector<int> factorize(const std::vector<std::string>& labels_t);
};
#endif

View File

@@ -1,2 +1 @@
add_library(ArffFiles ArffFiles.cc)
#target_link_libraries(BayesNet "${TORCH_LIBRARIES}")

Submodule lib/argparse deleted from b0930ab028

1
lib/folding Submodule

Submodule lib/folding added at 37316a54e0

View File

@@ -1,7 +0,0 @@
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
add_executable(BayesNetSample sample.cc ${BayesNet_SOURCE_DIR}/src/Platform/Folding.cc ${BayesNet_SOURCE_DIR}/src/Platform/Models.cc)
target_link_libraries(BayesNetSample BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,236 +0,0 @@
#include <iostream>
#include <torch/torch.h>
#include <string>
#include <map>
#include <argparse/argparse.hpp>
#include "ArffFiles.h"
#include "BayesMetrics.h"
#include "CPPFImdlp.h"
#include "Folding.h"
#include "Models.h"
#include "modelRegister.h"
using namespace std;
const string PATH = "../../data/";
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features)
{
vector<mdlp::labels_t>Xd;
map<string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xd.push_back(xd);
}
return { Xd, maxes };
}
bool file_exists(const std::string& name)
{
if (FILE* file = fopen(name.c_str(), "r")) {
fclose(file);
return true;
} else {
return false;
}
}
pair<vector<vector<int>>, vector<int>> extract_indices(vector<int> indices, vector<vector<int>> X, vector<int> y)
{
vector<vector<int>> Xr; // nxm
vector<int> yr;
for (int col = 0; col < X.size(); ++col) {
Xr.push_back(vector<int>());
}
for (auto index : indices) {
for (int col = 0; col < X.size(); ++col) {
Xr[col].push_back(X[col][index]);
}
yr.push_back(y[index]);
}
return { Xr, yr };
}
int main(int argc, char** argv)
{
map<string, bool> datasets = {
{"diabetes", true},
{"ecoli", true},
{"glass", true},
{"iris", true},
{"kdd_JapaneseVowels", false},
{"letter", true},
{"liver-disorders", true},
{"mfeat-factors", true},
};
auto valid_datasets = vector<string>();
transform(datasets.begin(), datasets.end(), back_inserter(valid_datasets),
[](const pair<string, bool>& pair) { return pair.first; });
argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-d", "--dataset")
.help("Dataset file name")
.action([valid_datasets](const std::string& value) {
if (find(valid_datasets.begin(), valid_datasets.end(), value) != valid_datasets.end()) {
return value;
}
throw runtime_error("file must be one of {diabetes, ecoli, glass, iris, kdd_JapaneseVowels, letter, liver-disorders, mfeat-factors}");
}
);
program.add_argument("-p", "--path")
.help(" folder where the data files are located, default")
.default_value(string{ PATH }
);
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString())
.action([](const std::string& value) {
static const vector<string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
}
);
program.add_argument("--discretize").help("Discretize input dataset").default_value(false).implicit_value(true);
program.add_argument("--dumpcpt").help("Dump CPT Tables").default_value(false).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value(false).implicit_value(true);
program.add_argument("--tensors").help("Use tensors to store samples").default_value(false).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(5).scan<'i', int>().action([](const string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw runtime_error(err.what());
}
catch (...) {
throw runtime_error("Number of folds must be an integer");
}});
program.add_argument("-s", "--seed").help("Random seed").default_value(-1).scan<'i', int>();
bool class_last, stratified, tensors, dump_cpt;
string model_name, file_name, path, complete_file_name;
int nFolds, seed;
try {
program.parse_args(argc, argv);
file_name = program.get<string>("dataset");
path = program.get<string>("path");
model_name = program.get<string>("model");
complete_file_name = path + file_name + ".arff";
stratified = program.get<bool>("stratified");
tensors = program.get<bool>("tensors");
nFolds = program.get<int>("folds");
seed = program.get<int>("seed");
dump_cpt = program.get<bool>("dumpcpt");
class_last = datasets[file_name];
if (!file_exists(complete_file_name)) {
throw runtime_error("Data File " + path + file_name + ".arff" + " does not exist");
}
}
catch (const exception& err) {
cerr << err.what() << endl;
cerr << program;
exit(1);
}
/*
* Begin Processing
*/
auto handler = ArffFiles();
handler.load(complete_file_name, class_last);
// Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
vector<string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features),
[](const pair<string, string>& item) { return item.first; });
// Discretize Dataset
auto [Xd, maxes] = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<string, vector<int>> states;
for (auto feature : features) {
states[feature] = vector<int>(maxes[feature]);
}
states[className] = vector<int>(maxes[className]);
auto clf = platform::Models::instance()->create(model_name);
clf->fit(Xd, y, features, className, states);
if (dump_cpt) {
cout << "--- CPT Tables ---" << endl;
clf->dump_cpt();
}
auto lines = clf->show();
for (auto line : lines) {
cout << line << endl;
}
cout << "--- Topological Order ---" << endl;
auto order = clf->topological_order();
for (auto name : order) {
cout << name << ", ";
}
cout << "end." << endl;
auto score = clf->score(Xd, y);
cout << "Score: " << score << endl;
// auto graph = clf->graph();
// auto dot_file = model_name + "_" + file_name;
// ofstream file(dot_file + ".dot");
// file << graph;
// file.close();
// cout << "Graph saved in " << model_name << "_" << file_name << ".dot" << endl;
// cout << "dot -Tpng -o " + dot_file + ".png " + dot_file + ".dot " << endl;
// string stratified_string = stratified ? " Stratified" : "";
// cout << nFolds << " Folds" << stratified_string << " Cross validation" << endl;
// cout << "==========================================" << endl;
// torch::Tensor Xt = torch::zeros({ static_cast<int>(Xd.size()), static_cast<int>(Xd[0].size()) }, torch::kInt32);
// torch::Tensor yt = torch::tensor(y, torch::kInt32);
// for (int i = 0; i < features.size(); ++i) {
// Xt.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
// }
// float total_score = 0, total_score_train = 0, score_train, score_test;
// Fold* fold;
// if (stratified)
// fold = new StratifiedKFold(nFolds, y, seed);
// else
// fold = new KFold(nFolds, y.size(), seed);
// for (auto i = 0; i < nFolds; ++i) {
// auto [train, test] = fold->getFold(i);
// cout << "Fold: " << i + 1 << endl;
// if (tensors) {
// auto ttrain = torch::tensor(train, torch::kInt64);
// auto ttest = torch::tensor(test, torch::kInt64);
// torch::Tensor Xtraint = torch::index_select(Xt, 1, ttrain);
// torch::Tensor ytraint = yt.index({ ttrain });
// torch::Tensor Xtestt = torch::index_select(Xt, 1, ttest);
// torch::Tensor ytestt = yt.index({ ttest });
// clf->fit(Xtraint, ytraint, features, className, states);
// auto temp = clf->predict(Xtraint);
// score_train = clf->score(Xtraint, ytraint);
// score_test = clf->score(Xtestt, ytestt);
// } else {
// auto [Xtrain, ytrain] = extract_indices(train, Xd, y);
// auto [Xtest, ytest] = extract_indices(test, Xd, y);
// clf->fit(Xtrain, ytrain, features, className, states);
// score_train = clf->score(Xtrain, ytrain);
// score_test = clf->score(Xtest, ytest);
// }
// if (dump_cpt) {
// cout << "--- CPT Tables ---" << endl;
// clf->dump_cpt();
// }
// total_score_train += score_train;
// total_score += score_test;
// cout << "Score Train: " << score_train << endl;
// cout << "Score Test : " << score_test << endl;
// cout << "-------------------------------------------------------------------------------" << endl;
// }
// cout << "**********************************************************************************" << endl;
// cout << "Average Score Train: " << total_score_train / nFolds << endl;
// cout << "Average Score Test : " << total_score / nFolds << endl;return 0;
}

View File

@@ -2,14 +2,16 @@
namespace bayesnet {
AODE::AODE() : Ensemble() {}
void AODE::train()
void AODE::buildModel(const torch::Tensor& weights)
{
models.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODE>(i));
}
n_models = models.size();
significanceModels = std::vector<double>(n_models, 1.0);
}
vector<string> AODE::graph(const string& title)
std::vector<std::string> AODE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}

View File

@@ -5,11 +5,11 @@
namespace bayesnet {
class AODE : public Ensemble {
protected:
void train() override;
void buildModel(const torch::Tensor& weights) override;
public:
AODE();
virtual ~AODE() {};
vector<string> graph(const string& title = "AODE") override;
std::vector<std::string> graph(const std::string& title = "AODE") const override;
};
}
#endif

View File

@@ -1,33 +1,38 @@
#include "AODELd.h"
namespace bayesnet {
using namespace std;
AODELd::AODELd() : Ensemble(), Proposal(Ensemble::Xv, Ensemble::yv, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
AODELd::AODELd() : Ensemble(), Proposal(dataset, features, className) {}
AODELd& AODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
checkInput(X_, y_);
features = features_;
className = className_;
states = states_;
train();
for (const auto& model : models) {
model->fit(X_, y_, features_, className_, states_);
}
n_models = models.size();
fitted = true;
Xf = X_;
y = y_;
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
Ensemble::fit(dataset, features, className, states);
return *this;
}
void AODELd::train()
void AODELd::buildModel(const torch::Tensor& weights)
{
models.clear();
for (int i = 0; i < features.size(); ++i) {
models.push_back(std::make_unique<SPODELd>(i));
}
n_models = models.size();
significanceModels = std::vector<double>(n_models, 1.0);
}
Tensor AODELd::predict(Tensor& X)
void AODELd::trainModel(const torch::Tensor& weights)
{
return Ensemble::predict(X);
for (const auto& model : models) {
model->fit(Xf, y, features, className, states);
}
vector<string> AODELd::graph(const string& name)
}
std::vector<std::string> AODELd::graph(const std::string& name) const
{
return Ensemble::graph(name);
}

View File

@@ -5,16 +5,16 @@
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
class AODELd : public Ensemble, public Proposal {
protected:
void trainModel(const torch::Tensor& weights) override;
void buildModel(const torch::Tensor& weights) override;
public:
AODELd();
AODELd& fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_) override;
virtual ~AODELd() = default;
AODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "AODE") override;
Tensor predict(Tensor& X) override;
void train() override;
static inline string version() { return "0.0.1"; };
std::vector<std::string> graph(const std::string& name = "AODELd") const override;
static inline std::string version() { return "0.0.1"; };
};
}
#endif // !AODELD_H

View File

@@ -1,28 +1,38 @@
#ifndef BASE_H
#define BASE_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <vector>
namespace bayesnet {
using namespace std;
enum status_t { NORMAL, WARNING, ERROR };
class BaseClassifier {
public:
// X is nxm vector, y is nx1 vector
virtual BaseClassifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
// X is nxm std::vector, y is nx1 std::vector
virtual BaseClassifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
// X is nxm tensor, y is nx1 tensor
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) = 0;
virtual BaseClassifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) = 0;
virtual ~BaseClassifier() = default;
torch::Tensor virtual predict(torch::Tensor& X) = 0;
vector<int> virtual predict(vector<vector<int>>& X) = 0;
float virtual score(vector<vector<int>>& X, vector<int>& y) = 0;
std::vector<int> virtual predict(std::vector<std::vector<int >>& X) = 0;
status_t virtual getStatus() const = 0;
float virtual score(std::vector<std::vector<int>>& X, std::vector<int>& y) = 0;
float virtual score(torch::Tensor& X, torch::Tensor& y) = 0;
int virtual getNumberOfNodes() = 0;
int virtual getNumberOfEdges() = 0;
int virtual getNumberOfStates() = 0;
vector<string> virtual show() = 0;
vector<string> virtual graph(const string& title = "") = 0;
const string inline getVersion() const { return "0.1.0"; };
vector<string> virtual topological_order() = 0;
void virtual dump_cpt() = 0;
int virtual getNumberOfNodes()const = 0;
int virtual getNumberOfEdges()const = 0;
int virtual getNumberOfStates() const = 0;
std::vector<std::string> virtual show() const = 0;
std::vector<std::string> virtual graph(const std::string& title = "") const = 0;
virtual std::string getVersion() = 0;
std::vector<std::string> virtual topological_order() = 0;
std::vector<std::string> virtual getNotes() const = 0;
void virtual dump_cpt()const = 0;
virtual void setHyperparameters(const nlohmann::json& hyperparameters) = 0;
std::vector<std::string>& getValidHyperparameters() { return validHyperparameters; }
protected:
virtual void trainModel(const torch::Tensor& weights) = 0;
std::vector<std::string> validHyperparameters;
};
}
#endif

View File

@@ -1,16 +1,16 @@
#include "BayesMetrics.h"
#include "Mst.h"
namespace bayesnet {
//samples is nxm tensor used to fit the model
Metrics::Metrics(torch::Tensor& samples, vector<string>& features, string& className, int classNumStates)
//samples is n+1xm tensor used to fit the model
Metrics::Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: samples(samples)
, features(features)
, className(className)
, classNumStates(classNumStates)
{
}
//samples is nxm vector used to fit the model
Metrics::Metrics(const vector<vector<int>>& vsamples, const vector<int>& labels, const vector<string>& features, const string& className, const int classNumStates)
//samples is nxm std::vector used to fit the model
Metrics::Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates)
: features(features)
, className(className)
, classNumStates(classNumStates)
@@ -21,28 +21,57 @@ namespace bayesnet {
}
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
}
vector<pair<string, string>> Metrics::doCombinations(const vector<string>& source)
std::vector<int> Metrics::SelectKBestWeighted(const torch::Tensor& weights, bool ascending, unsigned k)
{
vector<pair<string, string>> result;
for (int i = 0; i < source.size(); ++i) {
string temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
// Return the K Best features
auto n = samples.size(0) - 1;
if (k == 0) {
k = n;
}
// compute scores
scoresKBest.clear();
featuresKBest.clear();
auto label = samples.index({ -1, "..." });
for (int i = 0; i < n; ++i) {
scoresKBest.push_back(mutualInformation(label, samples.index({ i, "..." }), weights));
featuresKBest.push_back(i);
}
// sort & reduce scores and features
if (ascending) {
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
{ return scoresKBest[i] < scoresKBest[j]; });
sort(scoresKBest.begin(), scoresKBest.end(), std::less<double>());
if (k < n) {
for (int i = 0; i < n - k; ++i) {
featuresKBest.erase(featuresKBest.begin());
scoresKBest.erase(scoresKBest.begin());
}
}
return result;
} else {
sort(featuresKBest.begin(), featuresKBest.end(), [&](int i, int j)
{ return scoresKBest[i] > scoresKBest[j]; });
sort(scoresKBest.begin(), scoresKBest.end(), std::greater<double>());
featuresKBest.resize(k);
scoresKBest.resize(k);
}
torch::Tensor Metrics::conditionalEdge()
return featuresKBest;
}
std::vector<double> Metrics::getScoresKBest() const
{
auto result = vector<double>();
auto source = vector<string>(features);
return scoresKBest;
}
torch::Tensor Metrics::conditionalEdge(const torch::Tensor& weights)
{
auto result = std::vector<double>();
auto source = std::vector<std::string>(features);
source.push_back(className);
auto combinations = doCombinations(source);
// Compute class prior
auto margin = torch::zeros({ classNumStates });
auto margin = torch::zeros({ classNumStates }, torch::kFloat);
for (int value = 0; value < classNumStates; ++value) {
auto mask = samples.index({ -1, "..." }) == value;
margin[value] = mask.sum().item<float>() / samples.size(1);
margin[value] = mask.sum().item<double>() / samples.size(1);
}
for (auto [first, second] : combinations) {
int index_first = find(features.begin(), features.end(), first) - features.begin();
@@ -52,8 +81,9 @@ namespace bayesnet {
auto mask = samples.index({ -1, "..." }) == value;
auto first_dataset = samples.index({ index_first, mask });
auto second_dataset = samples.index({ index_second, mask });
auto mi = mutualInformation(first_dataset, second_dataset);
auto pb = margin[value].item<float>();
auto weights_dataset = weights.index({ mask });
auto mi = mutualInformation(first_dataset, second_dataset, weights_dataset);
auto pb = margin[value].item<double>();
accumulated += pb * mi;
}
result.push_back(accumulated);
@@ -70,31 +100,32 @@ namespace bayesnet {
return matrix;
}
// To use in Python
vector<float> Metrics::conditionalEdgeWeights()
std::vector<float> Metrics::conditionalEdgeWeights(std::vector<float>& weights_)
{
auto matrix = conditionalEdge();
const torch::Tensor weights = torch::tensor(weights_);
auto matrix = conditionalEdge(weights);
std::vector<float> v(matrix.data_ptr<float>(), matrix.data_ptr<float>() + matrix.numel());
return v;
}
double Metrics::entropy(torch::Tensor& feature)
double Metrics::entropy(const torch::Tensor& feature, const torch::Tensor& weights)
{
torch::Tensor counts = feature.bincount();
int totalWeight = counts.sum().item<int>();
torch::Tensor counts = feature.bincount(weights);
double totalWeight = counts.sum().item<double>();
torch::Tensor probs = counts.to(torch::kFloat) / totalWeight;
torch::Tensor logProbs = torch::log(probs);
torch::Tensor entropy = -probs * logProbs;
return entropy.nansum().item<double>();
}
// H(Y|X) = sum_{x in X} p(x) H(Y|X=x)
double Metrics::conditionalEntropy(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
double Metrics::conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{
int numSamples = firstFeature.sizes()[0];
torch::Tensor featureCounts = secondFeature.bincount();
unordered_map<int, unordered_map<int, double>> jointCounts;
torch::Tensor featureCounts = secondFeature.bincount(weights);
std::unordered_map<int, std::unordered_map<int, double>> jointCounts;
double totalWeight = 0;
for (auto i = 0; i < numSamples; i++) {
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += 1;
totalWeight += 1;
jointCounts[secondFeature[i].item<int>()][firstFeature[i].item<int>()] += weights[i].item<double>();
totalWeight += weights[i].item<float>();
}
if (totalWeight == 0)
return 0;
@@ -115,16 +146,16 @@ namespace bayesnet {
return entropyValue;
}
// I(X;Y) = H(Y) - H(Y|X)
double Metrics::mutualInformation(torch::Tensor& firstFeature, torch::Tensor& secondFeature)
double Metrics::mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights)
{
return entropy(firstFeature) - conditionalEntropy(firstFeature, secondFeature);
return entropy(firstFeature, weights) - conditionalEntropy(firstFeature, secondFeature, weights);
}
/*
Compute the maximum spanning tree considering the weights as distances
and the indices of the weights as nodes of this square matrix using
Kruskal algorithm
*/
vector<pair<int, int>> Metrics::maximumSpanningTree(vector<string> features, Tensor& weights, int root)
std::vector<std::pair<int, int>> Metrics::maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root)
{
auto mst = MST(features, weights, root);
return mst.maximumSpanningTree();

View File

@@ -4,25 +4,46 @@
#include <vector>
#include <string>
namespace bayesnet {
using namespace std;
using namespace torch;
class Metrics {
private:
Tensor samples; // nxm tensor used to fit the model
vector<string> features;
string className;
int classNumStates = 0;
std::vector<double> scoresKBest;
std::vector<int> featuresKBest; // sorted indices of the features
double conditionalEntropy(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
protected:
torch::Tensor samples; // n+1xm torch::Tensor used to fit the model where samples[-1] is the y std::vector
std::string className;
double entropy(const torch::Tensor& feature, const torch::Tensor& weights);
std::vector<std::string> features;
template <class T>
std::vector<std::pair<T, T>> doCombinations(const std::vector<T>& source)
{
std::vector<std::pair<T, T>> result;
for (int i = 0; i < source.size(); ++i) {
T temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
}
}
return result;
}
template <class T>
T pop_first(std::vector<T>& v)
{
T temp = v[0];
v.erase(v.begin());
return temp;
}
public:
Metrics() = default;
Metrics(Tensor&, vector<string>&, string&, int);
Metrics(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&, const int);
double entropy(Tensor&);
double conditionalEntropy(Tensor&, Tensor&);
double mutualInformation(Tensor&, Tensor&);
vector<float> conditionalEdgeWeights(); // To use in Python
Tensor conditionalEdge();
vector<pair<string, string>> doCombinations(const vector<string>&);
vector<pair<int, int>> maximumSpanningTree(vector<string> features, Tensor& weights, int root);
Metrics(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
Metrics(const std::vector<std::vector<int>>& vsamples, const std::vector<int>& labels, const std::vector<std::string>& features, const std::string& className, const int classNumStates);
std::vector<int> SelectKBestWeighted(const torch::Tensor& weights, bool ascending = false, unsigned k = 0);
std::vector<double> getScoresKBest() const;
double mutualInformation(const torch::Tensor& firstFeature, const torch::Tensor& secondFeature, const torch::Tensor& weights);
std::vector<float> conditionalEdgeWeights(std::vector<float>& weights); // To use in Python
torch::Tensor conditionalEdge(const torch::Tensor& weights);
std::vector<std::pair<int, int>> maximumSpanningTree(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
};
}
#endif

210
src/BayesNet/BoostAODE.cc Normal file
View File

@@ -0,0 +1,210 @@
#include <set>
#include <functional>
#include <limits.h>
#include "BoostAODE.h"
#include "CFS.h"
#include "FCBF.h"
#include "IWSS.h"
#include "folding.hpp"
namespace bayesnet {
BoostAODE::BoostAODE() : Ensemble()
{
validHyperparameters = { "repeatSparent", "maxModels", "ascending", "convergence", "threshold", "select_features", "tolerance" };
}
void BoostAODE::buildModel(const torch::Tensor& weights)
{
// Models shall be built in trainModel
models.clear();
n_models = 0;
// Prepare the validation dataset
auto y_ = dataset.index({ -1, "..." });
if (convergence) {
// Prepare train & validation sets from train data
auto fold = folding::StratifiedKFold(5, y_, 271);
dataset_ = torch::clone(dataset);
// save input dataset
auto [train, test] = fold.getFold(0);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
// Get train and validation sets
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), train_t });
y_train = dataset.index({ -1, train_t });
X_test = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), test_t });
y_test = dataset.index({ -1, test_t });
dataset = X_train;
m = X_train.size(1);
auto n_classes = states.at(className).size();
metrics = Metrics(dataset, features, className, n_classes);
// Build dataset with train data
buildDataset(y_train);
} else {
// Use all data to train
X_train = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." });
y_train = y_;
}
}
void BoostAODE::setHyperparameters(const nlohmann::json& hyperparameters_)
{
auto hyperparameters = hyperparameters_;
if (hyperparameters.contains("repeatSparent")) {
repeatSparent = hyperparameters["repeatSparent"];
hyperparameters.erase("repeatSparent");
}
if (hyperparameters.contains("maxModels")) {
maxModels = hyperparameters["maxModels"];
hyperparameters.erase("maxModels");
}
if (hyperparameters.contains("ascending")) {
ascending = hyperparameters["ascending"];
hyperparameters.erase("ascending");
}
if (hyperparameters.contains("convergence")) {
convergence = hyperparameters["convergence"];
hyperparameters.erase("convergence");
}
if (hyperparameters.contains("threshold")) {
threshold = hyperparameters["threshold"];
hyperparameters.erase("threshold");
}
if (hyperparameters.contains("tolerance")) {
tolerance = hyperparameters["tolerance"];
hyperparameters.erase("tolerance");
}
if (hyperparameters.contains("select_features")) {
auto selectedAlgorithm = hyperparameters["select_features"];
std::vector<std::string> algos = { "IWSS", "FCBF", "CFS" };
selectFeatures = true;
algorithm = selectedAlgorithm;
if (std::find(algos.begin(), algos.end(), selectedAlgorithm) == algos.end()) {
throw std::invalid_argument("Invalid selectFeatures value [IWSS, FCBF, CFS]");
}
hyperparameters.erase("select_features");
}
if (!hyperparameters.empty()) {
throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
}
}
std::unordered_set<int> BoostAODE::initializeModels()
{
std::unordered_set<int> featuresUsed;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
int maxFeatures = 0;
if (algorithm == "CFS") {
featureSelector = new CFS(dataset, features, className, maxFeatures, states.at(className).size(), weights_);
} else if (algorithm == "IWSS") {
if (threshold < 0 || threshold >0.5) {
throw std::invalid_argument("Invalid threshold value for IWSS [0, 0.5]");
}
featureSelector = new IWSS(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
} else if (algorithm == "FCBF") {
if (threshold < 1e-7 || threshold > 1) {
throw std::invalid_argument("Invalid threshold value [1e-7, 1]");
}
featureSelector = new FCBF(dataset, features, className, maxFeatures, states.at(className).size(), weights_, threshold);
}
featureSelector->fit();
auto cfsFeatures = featureSelector->getFeatures();
for (const int& feature : cfsFeatures) {
// std::cout << "Feature: [" << feature << "] " << feature << " " << features.at(feature) << std::endl;
featuresUsed.insert(feature);
std::unique_ptr<Classifier> model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
models.push_back(std::move(model));
significanceModels.push_back(1.0);
n_models++;
}
notes.push_back("Used features in initialization: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()) + " with " + algorithm);
delete featureSelector;
return featuresUsed;
}
void BoostAODE::trainModel(const torch::Tensor& weights)
{
std::unordered_set<int> featuresUsed;
if (selectFeatures) {
featuresUsed = initializeModels();
}
if (maxModels == 0)
maxModels = .1 * n > 10 ? .1 * n : n;
torch::Tensor weights_ = torch::full({ m }, 1.0 / m, torch::kFloat64);
bool exitCondition = false;
// Variables to control the accuracy finish condition
double priorAccuracy = 0.0;
double delta = 1.0;
double threshold = 1e-4;
int count = 0; // number of times the accuracy is lower than the threshold
fitted = true; // to enable predict
// Step 0: Set the finish condition
// if not repeatSparent a finish condition is run out of features
// n_models == maxModels
// epsilon sub t > 0.5 => inverse the weights policy
// validation error is not decreasing
while (!exitCondition) {
// Step 1: Build ranking with mutual information
auto featureSelection = metrics.SelectKBestWeighted(weights_, ascending, n); // Get all the features sorted
std::unique_ptr<Classifier> model;
auto feature = featureSelection[0];
if (!repeatSparent || featuresUsed.size() < featureSelection.size()) {
bool used = true;
for (const auto& feat : featureSelection) {
if (std::find(featuresUsed.begin(), featuresUsed.end(), feat) != featuresUsed.end()) {
continue;
}
used = false;
feature = feat;
break;
}
if (used) {
exitCondition = true;
continue;
}
}
featuresUsed.insert(feature);
model = std::make_unique<SPODE>(feature);
model->fit(dataset, features, className, states, weights_);
auto ypred = model->predict(X_train);
// Step 3.1: Compute the classifier amout of say
auto mask_wrong = ypred != y_train;
auto mask_right = ypred == y_train;
auto masked_weights = weights_ * mask_wrong.to(weights_.dtype());
double epsilon_t = masked_weights.sum().item<double>();
double wt = (1 - epsilon_t) / epsilon_t;
double alpha_t = epsilon_t == 0 ? 1 : 0.5 * log(wt);
// Step 3.2: Update weights for next classifier
// Step 3.2.1: Update weights of wrong samples
weights_ += mask_wrong.to(weights_.dtype()) * exp(alpha_t) * weights_;
// Step 3.2.2: Update weights of right samples
weights_ += mask_right.to(weights_.dtype()) * exp(-alpha_t) * weights_;
// Step 3.3: Normalise the weights
double totalWeights = torch::sum(weights_).item<double>();
weights_ = weights_ / totalWeights;
// Step 3.4: Store classifier and its accuracy to weigh its future vote
models.push_back(std::move(model));
significanceModels.push_back(alpha_t);
n_models++;
if (convergence) {
auto y_val_predict = predict(X_test);
double accuracy = (y_val_predict == y_test).sum().item<double>() / (double)y_test.size(0);
if (priorAccuracy == 0) {
priorAccuracy = accuracy;
} else {
delta = accuracy - priorAccuracy;
}
if (delta < threshold) {
count++;
}
}
exitCondition = n_models >= maxModels && repeatSparent || epsilon_t > 0.5 || count > tolerance;
}
if (featuresUsed.size() != features.size()) {
notes.push_back("Used features in train: " + std::to_string(featuresUsed.size()) + " of " + std::to_string(features.size()));
status = WARNING;
}
notes.push_back("Number of models: " + std::to_string(n_models));
}
std::vector<std::string> BoostAODE::graph(const std::string& title) const
{
return Ensemble::graph(title);
}
}

33
src/BayesNet/BoostAODE.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef BOOSTAODE_H
#define BOOSTAODE_H
#include "Ensemble.h"
#include <map>
#include "SPODE.h"
#include "FeatureSelect.h"
namespace bayesnet {
class BoostAODE : public Ensemble {
public:
BoostAODE();
virtual ~BoostAODE() = default;
std::vector<std::string> graph(const std::string& title = "BoostAODE") const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
protected:
void buildModel(const torch::Tensor& weights) override;
void trainModel(const torch::Tensor& weights) override;
private:
torch::Tensor dataset_;
torch::Tensor X_train, y_train, X_test, y_test;
std::unordered_set<int> initializeModels();
// Hyperparameters
bool repeatSparent = false; // if true, a feature can be selected more than once
int maxModels = 0;
int tolerance = 0;
bool ascending = false; //Process KBest features ascending or descending order
bool convergence = false; //if true, stop when the model does not improve
bool selectFeatures = false; // if true, use feature selection
std::string algorithm = ""; // Selected feature selection algorithm
FeatureSelect* featureSelector = nullptr;
double threshold = -1;
};
}
#endif

72
src/BayesNet/CFS.cc Normal file
View File

@@ -0,0 +1,72 @@
#include "CFS.h"
#include <limits>
#include "bayesnetUtils.h"
namespace bayesnet {
void CFS::fit()
{
initialize();
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto continueCondition = true;
auto feature = featureOrder[0];
selectedFeatures.push_back(feature);
selectedScores.push_back(suLabels[feature]);
selectedFeatures.erase(selectedFeatures.begin());
while (continueCondition) {
double merit = std::numeric_limits<double>::lowest();
int bestFeature = -1;
for (auto feature : featureOrder) {
selectedFeatures.push_back(feature);
// Compute merit with selectedFeatures
auto meritNew = computeMeritCFS();
if (meritNew > merit) {
merit = meritNew;
bestFeature = feature;
}
selectedFeatures.pop_back();
}
if (bestFeature == -1) {
// meritNew has to be nan due to constant features
break;
}
selectedFeatures.push_back(bestFeature);
selectedScores.push_back(merit);
featureOrder.erase(remove(featureOrder.begin(), featureOrder.end(), bestFeature), featureOrder.end());
continueCondition = computeContinueCondition(featureOrder);
}
fitted = true;
}
bool CFS::computeContinueCondition(const std::vector<int>& featureOrder)
{
if (selectedFeatures.size() == maxFeatures || featureOrder.size() == 0) {
return false;
}
if (selectedScores.size() >= 5) {
/*
"To prevent the best first search from exploring the entire
feature subset search space, a stopping criterion is imposed.
The search will terminate if five consecutive fully expanded
subsets show no improvement over the current best subset."
as stated in Mark A.Hall Thesis
*/
double item_ant = std::numeric_limits<double>::lowest();
int num = 0;
std::vector<double> lastFive(selectedScores.end() - 5, selectedScores.end());
for (auto item : lastFive) {
if (item_ant == std::numeric_limits<double>::lowest()) {
item_ant = item;
}
if (item > item_ant) {
break;
} else {
num++;
item_ant = item;
}
}
if (num == 5) {
return false;
}
}
return true;
}
}

20
src/BayesNet/CFS.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef CFS_H
#define CFS_H
#include <torch/torch.h>
#include <vector>
#include "FeatureSelect.h"
namespace bayesnet {
class CFS : public FeatureSelect {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
CFS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights)
{
}
virtual ~CFS() {};
void fit() override;
private:
bool computeContinueCondition(const std::vector<int>& featureOrder);
};
}
#endif

View File

@@ -1,5 +1,13 @@
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(
${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include
${BayesNet_SOURCE_DIR}/src/BayesNet
${CMAKE_BINARY_DIR}/configured_files/include
)
add_library(BayesNet bayesnetUtils.cc Network.cc Node.cc BayesMetrics.cc Classifier.cc
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc Mst.cc Proposal.cc)
target_link_libraries(BayesNet mdlp ArffFiles "${TORCH_LIBRARIES}")
KDB.cc TAN.cc SPODE.cc Ensemble.cc AODE.cc TANLd.cc KDBLd.cc SPODELd.cc AODELd.cc BoostAODE.cc
Mst.cc Proposal.cc CFS.cc FCBF.cc IWSS.cc FeatureSelect.cc )
target_link_libraries(BayesNet mdlp "${TORCH_LIBRARIES}")

View File

@@ -2,113 +2,125 @@
#include "bayesnetUtils.h"
namespace bayesnet {
using namespace torch;
Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
Classifier& Classifier::build(vector<string>& features, string className, map<string, vector<int>>& states)
Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
this->features = features;
this->className = className;
this->states = states;
m = dataset.size(1);
n = dataset.size(0) - 1;
checkFitParameters();
auto n_classes = states[className].size();
metrics = Metrics(samples, features, className, n_classes);
auto n_classes = states.at(className).size();
metrics = Metrics(dataset, features, className, n_classes);
model.initialize();
train();
if (Xv.empty()) {
// fit with tensors
model.fit(X, y, features, className);
} else {
// fit with vectors
model.fit(Xv, yv, features, className);
}
buildModel(weights);
trainModel(weights);
fitted = true;
return *this;
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
void Classifier::buildDataset(torch::Tensor& ytmp)
{
this->X = X;
this->y = y;
Xv = vector<vector<int>>();
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
return build(features, className, states);
try {
auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
dataset = torch::cat({ dataset, yresized }, 0);
}
void Classifier::generateTensorXFromVector()
catch (const std::exception& e) {
std::cerr << e.what() << '\n';
std::cout << "X dimensions: " << dataset.sizes() << "\n";
std::cout << "y dimensions: " << ytmp.sizes() << "\n";
exit(1);
}
}
void Classifier::trainModel(const torch::Tensor& weights)
{
X = torch::zeros({ static_cast<int>(Xv.size()), static_cast<int>(Xv[0].size()) }, kInt32);
for (int i = 0; i < Xv.size(); ++i) {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], kInt32));
}
model.fit(dataset, weights, features, className, states);
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
Xv = X;
generateTensorXFromVector();
this->y = torch::tensor(y, kInt32);
yv = y;
return build(features, className, states);
dataset = X;
buildDataset(y);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
// X is nxm where n is the number of features and m the number of samples
Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
for (int i = 0; i < X.size(); ++i) {
dataset.index_put_({ i, "..." }, torch::tensor(X[i], torch::kInt32));
}
auto ytmp = torch::tensor(y, torch::kInt32);
buildDataset(ytmp);
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states)
{
this->dataset = dataset;
const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
return build(features, className, states, weights);
}
Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
this->dataset = dataset;
return build(features, className, states, weights);
}
void Classifier::checkFitParameters()
{
auto sizes = X.sizes();
m = sizes[1];
n = sizes[0];
if (m != y.size(0)) {
throw invalid_argument("X and y must have the same number of samples");
if (torch::is_floating_point(dataset)) {
throw std::invalid_argument("dataset (X, y) must be of type Integer");
}
if (n != features.size()) {
throw invalid_argument("X and features must have the same number of features");
throw std::invalid_argument("Classifier: X " + std::to_string(n) + " and features " + std::to_string(features.size()) + " must have the same number of features");
}
if (states.find(className) == states.end()) {
throw invalid_argument("className not found in states");
throw std::invalid_argument("className not found in states");
}
for (auto feature : features) {
if (states.find(feature) == states.end()) {
throw invalid_argument("feature [" + feature + "] not found in states");
throw std::invalid_argument("feature [" + feature + "] not found in states");
}
}
}
Tensor Classifier::predict(Tensor& X)
torch::Tensor Classifier::predict(torch::Tensor& X)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error("Classifier has not been fitted");
}
return model.predict(X);
}
vector<int> Classifier::predict(vector<vector<int>>& X)
std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error("Classifier has not been fitted");
}
auto m_ = X[0].size();
auto n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
}
auto yp = model.predict(Xd);
return yp;
}
float Classifier::score(Tensor& X, Tensor& y)
float Classifier::score(torch::Tensor& X, torch::Tensor& y)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error("Classifier has not been fitted");
}
Tensor y_pred = predict(X);
torch::Tensor y_pred = predict(X);
return (y_pred == y).sum().item<float>() / y.size(0);
}
float Classifier::score(vector<vector<int>>& X, vector<int>& y)
float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{
if (!fitted) {
throw logic_error("Classifier has not been fitted");
throw std::logic_error("Classifier has not been fitted");
}
return model.score(X, y);
}
vector<string> Classifier::show()
std::vector<std::string> Classifier::show() const
{
return model.show();
}
@@ -120,26 +132,29 @@ namespace bayesnet {
}
model.addNode(className);
}
int Classifier::getNumberOfNodes()
int Classifier::getNumberOfNodes() const
{
// Features does not include class
return fitted ? model.getFeatures().size() + 1 : 0;
return fitted ? model.getFeatures().size() : 0;
}
int Classifier::getNumberOfEdges()
int Classifier::getNumberOfEdges() const
{
return fitted ? model.getEdges().size() : 0;
return fitted ? model.getNumEdges() : 0;
}
int Classifier::getNumberOfStates()
int Classifier::getNumberOfStates() const
{
return fitted ? model.getStates() : 0;
}
vector<string> Classifier::topological_order()
std::vector<std::string> Classifier::topological_order()
{
return model.topological_sort();
}
void Classifier::dump_cpt()
void Classifier::dump_cpt() const
{
model.dump_cpt();
}
void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
{
//For classifiers that don't have hyperparameters
}
}

View File

@@ -4,45 +4,48 @@
#include "BaseClassifier.h"
#include "Network.h"
#include "BayesMetrics.h"
using namespace std;
using namespace torch;
namespace bayesnet {
class Classifier : public BaseClassifier {
private:
bool fitted;
Classifier& build(vector<string>& features, string className, map<string, vector<int>>& states);
Classifier& build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
protected:
Network model;
bool fitted;
int m, n; // m: number of samples, n: number of features
Tensor X; // nxm tensor
vector<vector<int>> Xv; // nxm vector
Tensor y;
vector<int> yv;
Tensor samples; // (n+1)xm tensor
Network model;
Metrics metrics;
vector<string> features;
string className;
map<string, vector<int>> states;
std::vector<std::string> features;
std::string className;
std::map<std::string, std::vector<int>> states;
torch::Tensor dataset; // (n+1)xm tensor
status_t status = NORMAL;
std::vector<std::string> notes; // Used to store messages occurred during the fit process
void checkFitParameters();
void generateTensorXFromVector();
virtual void train() = 0;
virtual void buildModel(const torch::Tensor& weights) = 0;
void trainModel(const torch::Tensor& weights) override;
void buildDataset(torch::Tensor& y);
public:
Classifier(Network model);
virtual ~Classifier() = default;
Classifier& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Classifier& fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states) override;
Classifier& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights) override;
void addNodes();
int getNumberOfNodes() override;
int getNumberOfEdges() override;
int getNumberOfStates() override;
Tensor predict(Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
vector<string> show() override;
vector<string> topological_order() override;
void dump_cpt() override;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
torch::Tensor predict(torch::Tensor& X) override;
status_t getStatus() const override { return status; }
std::string getVersion() override { return { project_version.begin(), project_version.end() }; };
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
std::vector<std::string> show() const override;
std::vector<std::string> topological_order() override;
std::vector<std::string> getNotes() const override { return notes; }
void dump_cpt() const override;
void setHyperparameters(const nlohmann::json& hyperparameters) override; //For classifiers that don't have hyperparameters
};
}
#endif

View File

@@ -1,65 +1,29 @@
#include "Ensemble.h"
namespace bayesnet {
using namespace torch;
Ensemble::Ensemble() : n_models(0), metrics(Metrics()), fitted(false) {}
Ensemble& Ensemble::build(vector<string>& features, string className, map<string, vector<int>>& states)
Ensemble::Ensemble() : Classifier(Network()), n_models(0) {}
void Ensemble::trainModel(const torch::Tensor& weights)
{
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
this->features = features;
this->className = className;
this->states = states;
auto n_classes = states[className].size();
metrics = Metrics(samples, features, className, n_classes);
// Build models
train();
// Train models
n_models = models.size();
for (auto i = 0; i < n_models; ++i) {
if (Xv.empty()) {
// fit with tensors
models[i]->fit(X, y, features, className, states);
} else {
// fit with vectors
models[i]->fit(Xv, yv, features, className, states);
// fit with std::vectors
models[i]->fit(dataset, features, className, states);
}
}
fitted = true;
return *this;
}
void Ensemble::generateTensorXFromVector()
{
X = torch::zeros({ static_cast<int>(Xv.size()), static_cast<int>(Xv[0].size()) }, kInt32);
for (int i = 0; i < Xv.size(); ++i) {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], kInt32));
}
}
Ensemble& Ensemble::fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
this->X = X;
this->y = y;
Xv = vector<vector<int>>();
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
return build(features, className, states);
}
Ensemble& Ensemble::fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states)
{
Xv = X;
generateTensorXFromVector();
this->y = torch::tensor(y, kInt32);
yv = y;
return build(features, className, states);
}
vector<int> Ensemble::voting(Tensor& y_pred)
std::vector<int> Ensemble::voting(torch::Tensor& y_pred)
{
auto y_pred_ = y_pred.accessor<int, 2>();
vector<int> y_pred_final;
std::vector<int> y_pred_final;
int numClasses = states.at(className).size();
// y_pred is m x n_models with the prediction of every model for each sample
for (int i = 0; i < y_pred.size(0); ++i) {
vector<float> votes(y_pred.size(1), 0);
for (int j = 0; j < y_pred.size(1); ++j) {
votes[y_pred_[i][j]] += 1;
// votes store in each index (value of class) the significance added by each model
// i.e. votes[0] contains how much value has the value 0 of class. That value is generated by the models predictions
std::vector<double> votes(numClasses, 0.0);
for (int j = 0; j < n_models; ++j) {
votes[y_pred_[i][j]] += significanceModels.at(j);
}
// argsort in descending order
auto indices = argsort(votes);
@@ -67,19 +31,18 @@ namespace bayesnet {
}
return y_pred_final;
}
Tensor Ensemble::predict(Tensor& X)
torch::Tensor Ensemble::predict(torch::Tensor& X)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
throw std::logic_error("Ensemble has not been fitted");
}
Tensor y_pred = torch::zeros({ X.size(1), n_models }, kInt32);
//Create a threadpool
auto threads{ vector<thread>() };
mutex mtx;
torch::Tensor y_pred = torch::zeros({ X.size(1), n_models }, torch::kInt32);
auto threads{ std::vector<std::thread>() };
std::mutex mtx;
for (auto i = 0; i < n_models; ++i) {
threads.push_back(thread([&, i]() {
threads.push_back(std::thread([&, i]() {
auto ypredict = models[i]->predict(X);
lock_guard<mutex> lock(mtx);
std::lock_guard<std::mutex> lock(mtx);
y_pred.index_put_({ "...", i }, ypredict);
}));
}
@@ -88,27 +51,27 @@ namespace bayesnet {
}
return torch::tensor(voting(y_pred));
}
vector<int> Ensemble::predict(vector<vector<int>>& X)
std::vector<int> Ensemble::predict(std::vector<std::vector<int>>& X)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
throw std::logic_error("Ensemble has not been fitted");
}
long m_ = X[0].size();
long n_ = X.size();
vector<vector<int>> Xd(n_, vector<int>(m_, 0));
std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
for (auto i = 0; i < n_; i++) {
Xd[i] = vector<int>(X[i].begin(), X[i].end());
Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
}
Tensor y_pred = torch::zeros({ m_, n_models }, kInt32);
torch::Tensor y_pred = torch::zeros({ m_, n_models }, torch::kInt32);
for (auto i = 0; i < n_models; ++i) {
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), kInt32));
y_pred.index_put_({ "...", i }, torch::tensor(models[i]->predict(Xd), torch::kInt32));
}
return voting(y_pred);
}
float Ensemble::score(Tensor& X, Tensor& y)
float Ensemble::score(torch::Tensor& X, torch::Tensor& y)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
throw std::logic_error("Ensemble has not been fitted");
}
auto y_pred = predict(X);
int correct = 0;
@@ -119,10 +82,10 @@ namespace bayesnet {
}
return (double)correct / y_pred.size(0);
}
float Ensemble::score(vector<vector<int>>& X, vector<int>& y)
float Ensemble::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
{
if (!fitted) {
throw logic_error("Ensemble has not been fitted");
throw std::logic_error("Ensemble has not been fitted");
}
auto y_pred = predict(X);
int correct = 0;
@@ -132,27 +95,26 @@ namespace bayesnet {
}
}
return (double)correct / y_pred.size();
}
vector<string> Ensemble::show()
std::vector<std::string> Ensemble::show() const
{
auto result = vector<string>();
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->show();
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
vector<string> Ensemble::graph(const string& title)
std::vector<std::string> Ensemble::graph(const std::string& title) const
{
auto result = vector<string>();
auto result = std::vector<std::string>();
for (auto i = 0; i < n_models; ++i) {
auto res = models[i]->graph(title + "_" + to_string(i));
auto res = models[i]->graph(title + "_" + std::to_string(i));
result.insert(result.end(), res.begin(), res.end());
}
return result;
}
int Ensemble::getNumberOfNodes()
int Ensemble::getNumberOfNodes() const
{
int nodes = 0;
for (auto i = 0; i < n_models; ++i) {
@@ -160,7 +122,7 @@ namespace bayesnet {
}
return nodes;
}
int Ensemble::getNumberOfEdges()
int Ensemble::getNumberOfEdges() const
{
int edges = 0;
for (auto i = 0; i < n_models; ++i) {
@@ -168,7 +130,7 @@ namespace bayesnet {
}
return edges;
}
int Ensemble::getNumberOfStates()
int Ensemble::getNumberOfStates() const
{
int nstates = 0;
for (auto i = 0; i < n_models; ++i) {

View File

@@ -4,48 +4,34 @@
#include "Classifier.h"
#include "BayesMetrics.h"
#include "bayesnetUtils.h"
using namespace std;
using namespace torch;
namespace bayesnet {
class Ensemble : public BaseClassifier {
class Ensemble : public Classifier {
private:
Ensemble& build(vector<string>& features, string className, map<string, vector<int>>& states);
Ensemble& build(std::vector<std::string>& features, std::string className, std::map<std::string, std::vector<int>>& states);
protected:
unsigned n_models;
bool fitted;
vector<unique_ptr<Classifier>> models;
Tensor X;
vector<vector<int>> Xv;
Tensor y;
vector<int> yv;
Tensor samples;
Metrics metrics;
vector<string> features;
string className;
map<string, vector<int>> states;
void virtual train() = 0;
vector<int> voting(Tensor& y_pred);
void generateTensorXFromVector();
std::vector<std::unique_ptr<Classifier>> models;
std::vector<double> significanceModels;
void trainModel(const torch::Tensor& weights) override;
std::vector<int> voting(torch::Tensor& y_pred);
public:
Ensemble();
virtual ~Ensemble() = default;
Ensemble& fit(vector<vector<int>>& X, vector<int>& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Ensemble& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
Tensor predict(Tensor& X) override;
vector<int> predict(vector<vector<int>>& X) override;
float score(Tensor& X, Tensor& y) override;
float score(vector<vector<int>>& X, vector<int>& y) override;
int getNumberOfNodes() override;
int getNumberOfEdges() override;
int getNumberOfStates() override;
vector<string> show() override;
vector<string> graph(const string& title) override;
vector<string> topological_order() override
torch::Tensor predict(torch::Tensor& X) override;
std::vector<int> predict(std::vector<std::vector<int>>& X) override;
float score(torch::Tensor& X, torch::Tensor& y) override;
float score(std::vector<std::vector<int>>& X, std::vector<int>& y) override;
int getNumberOfNodes() const override;
int getNumberOfEdges() const override;
int getNumberOfStates() const override;
std::vector<std::string> show() const override;
std::vector<std::string> graph(const std::string& title) const override;
std::vector<std::string> topological_order() override
{
return vector<string>();
return std::vector<std::string>();
}
void dump_cpt() override
void dump_cpt() const override
{
}
};

44
src/BayesNet/FCBF.cc Normal file
View File

@@ -0,0 +1,44 @@
#include "bayesnetUtils.h"
#include "FCBF.h"
namespace bayesnet {
FCBF::FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
{
if (threshold < 1e-7) {
throw std::invalid_argument("Threshold cannot be less than 1e-7");
}
}
void FCBF::fit()
{
initialize();
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto featureOrderCopy = featureOrder;
for (const auto& feature : featureOrder) {
// Don't self compare
featureOrderCopy.erase(featureOrderCopy.begin());
if (suLabels.at(feature) == 0.0) {
// The feature has been removed from the list
continue;
}
if (suLabels.at(feature) < threshold) {
break;
}
// Remove redundant features
for (const auto& featureCopy : featureOrderCopy) {
double value = computeSuFeatures(feature, featureCopy);
if (value >= suLabels.at(featureCopy)) {
// Remove feature from list
suLabels[featureCopy] = 0.0;
}
}
selectedFeatures.push_back(feature);
selectedScores.push_back(suLabels[feature]);
if (selectedFeatures.size() == maxFeatures) {
break;
}
}
fitted = true;
}
}

17
src/BayesNet/FCBF.h Normal file
View File

@@ -0,0 +1,17 @@
#ifndef FCBF_H
#define FCBF_H
#include <torch/torch.h>
#include <vector>
#include "FeatureSelect.h"
namespace bayesnet {
class FCBF : public FeatureSelect {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
FCBF(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
virtual ~FCBF() {};
void fit() override;
private:
double threshold = -1;
};
}
#endif

View File

@@ -0,0 +1,79 @@
#include "FeatureSelect.h"
#include <limits>
#include "bayesnetUtils.h"
namespace bayesnet {
FeatureSelect::FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights) :
Metrics(samples, features, className, classNumStates), maxFeatures(maxFeatures == 0 ? samples.size(0) - 1 : maxFeatures), weights(weights)
{
}
void FeatureSelect::initialize()
{
selectedFeatures.clear();
selectedScores.clear();
}
double FeatureSelect::symmetricalUncertainty(int a, int b)
{
/*
Compute symmetrical uncertainty. Normalize* information gain (mutual
information) with the entropies of the features in order to compensate
the bias due to high cardinality features. *Range [0, 1]
(https://www.sciencedirect.com/science/article/pii/S0020025519303603)
*/
auto x = samples.index({ a, "..." });
auto y = samples.index({ b, "..." });
auto mu = mutualInformation(x, y, weights);
auto hx = entropy(x, weights);
auto hy = entropy(y, weights);
return 2.0 * mu / (hx + hy);
}
void FeatureSelect::computeSuLabels()
{
// Compute Simmetrical Uncertainty between features and labels
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
for (int i = 0; i < features.size(); ++i) {
suLabels.push_back(symmetricalUncertainty(i, -1));
}
}
double FeatureSelect::computeSuFeatures(const int firstFeature, const int secondFeature)
{
// Compute Simmetrical Uncertainty between features
// https://en.wikipedia.org/wiki/Symmetric_uncertainty
try {
return suFeatures.at({ firstFeature, secondFeature });
}
catch (const std::out_of_range& e) {
double result = symmetricalUncertainty(firstFeature, secondFeature);
suFeatures[{firstFeature, secondFeature}] = result;
return result;
}
}
double FeatureSelect::computeMeritCFS()
{
double result;
double rcf = 0;
for (auto feature : selectedFeatures) {
rcf += suLabels[feature];
}
double rff = 0;
int n = selectedFeatures.size();
for (const auto& item : doCombinations(selectedFeatures)) {
rff += computeSuFeatures(item.first, item.second);
}
return rcf / sqrt(n + (n * n - n) * rff);
}
std::vector<int> FeatureSelect::getFeatures() const
{
if (!fitted) {
throw std::runtime_error("FeatureSelect not fitted");
}
return selectedFeatures;
}
std::vector<double> FeatureSelect::getScores() const
{
if (!fitted) {
throw std::runtime_error("FeatureSelect not fitted");
}
return selectedScores;
}
}

View File

@@ -0,0 +1,30 @@
#ifndef FEATURE_SELECT_H
#define FEATURE_SELECT_H
#include <torch/torch.h>
#include <vector>
#include "BayesMetrics.h"
namespace bayesnet {
class FeatureSelect : public Metrics {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
FeatureSelect(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights);
virtual ~FeatureSelect() {};
virtual void fit() = 0;
std::vector<int> getFeatures() const;
std::vector<double> getScores() const;
protected:
void initialize();
void computeSuLabels();
double computeSuFeatures(const int a, const int b);
double symmetricalUncertainty(int a, int b);
double computeMeritCFS();
const torch::Tensor& weights;
int maxFeatures;
std::vector<int> selectedFeatures;
std::vector<double> selectedScores;
std::vector<double> suLabels;
std::map<std::pair<int, int>, double> suFeatures;
bool fitted = false;
};
}
#endif

47
src/BayesNet/IWSS.cc Normal file
View File

@@ -0,0 +1,47 @@
#include "IWSS.h"
#include <limits>
#include "bayesnetUtils.h"
namespace bayesnet {
IWSS::IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold) :
FeatureSelect(samples, features, className, maxFeatures, classNumStates, weights), threshold(threshold)
{
if (threshold < 0 || threshold > .5) {
throw std::invalid_argument("Threshold has to be in [0, 0.5]");
}
}
void IWSS::fit()
{
initialize();
computeSuLabels();
auto featureOrder = argsort(suLabels); // sort descending order
auto featureOrderCopy = featureOrder;
// Add first and second features to result
// First with its own score
auto first_feature = pop_first(featureOrderCopy);
selectedFeatures.push_back(first_feature);
selectedScores.push_back(suLabels.at(first_feature));
// Second with the score of the candidates
selectedFeatures.push_back(pop_first(featureOrderCopy));
auto merit = computeMeritCFS();
selectedScores.push_back(merit);
for (const auto feature : featureOrderCopy) {
selectedFeatures.push_back(feature);
// Compute merit with selectedFeatures
auto meritNew = computeMeritCFS();
double delta = merit != 0.0 ? abs(merit - meritNew) / merit : 0.0;
if (meritNew > merit || delta < threshold) {
if (meritNew > merit) {
merit = meritNew;
}
selectedScores.push_back(meritNew);
} else {
selectedFeatures.pop_back();
break;
}
if (selectedFeatures.size() == maxFeatures) {
break;
}
}
fitted = true;
}
}

17
src/BayesNet/IWSS.h Normal file
View File

@@ -0,0 +1,17 @@
#ifndef IWSS_H
#define IWSS_H
#include <torch/torch.h>
#include <vector>
#include "FeatureSelect.h"
namespace bayesnet {
class IWSS : public FeatureSelect {
public:
// dataset is a n+1xm tensor of integers where dataset[-1] is the y std::vector
IWSS(const torch::Tensor& samples, const std::vector<std::string>& features, const std::string& className, const int maxFeatures, const int classNumStates, const torch::Tensor& weights, const double threshold);
virtual ~IWSS() {};
void fit() override;
private:
double threshold = -1;
};
}
#endif

View File

@@ -1,10 +1,21 @@
#include "KDB.h"
namespace bayesnet {
using namespace torch;
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta)
{
validHyperparameters = { "k", "theta" };
KDB::KDB(int k, float theta) : Classifier(Network()), k(k), theta(theta) {}
void KDB::train()
}
void KDB::setHyperparameters(const nlohmann::json& hyperparameters)
{
if (hyperparameters.contains("k")) {
k = hyperparameters["k"];
}
if (hyperparameters.contains("theta")) {
theta = hyperparameters["theta"];
}
}
void KDB::buildModel(const torch::Tensor& weights)
{
/*
1. For each feature Xi, compute mutual information, I(X;C),
@@ -28,15 +39,16 @@ namespace bayesnet {
// 1. For each feature Xi, compute mutual information, I(X;C),
// where C is the class.
addNodes();
vector <float> mi;
const torch::Tensor& y = dataset.index({ -1, "..." });
std::vector<double> mi;
for (auto i = 0; i < features.size(); i++) {
Tensor firstFeature = X.index({ i, "..." });
mi.push_back(metrics.mutualInformation(firstFeature, y));
torch::Tensor firstFeature = dataset.index({ i, "..." });
mi.push_back(metrics.mutualInformation(firstFeature, y, weights));
}
// 2. Compute class conditional mutual information I(Xi;XjIC), f or each
auto conditionalEdgeWeights = metrics.conditionalEdge();
auto conditionalEdgeWeights = metrics.conditionalEdge(weights);
// 3. Let the used variable list, S, be empty.
vector<int> S;
std::vector<int> S;
// 4. Let the DAG network being constructed, BN, begin with a single
// class node, C.
// 5. Repeat until S includes all domain features
@@ -54,9 +66,9 @@ namespace bayesnet {
S.push_back(idx);
}
}
void KDB::add_m_edges(int idx, vector<int>& S, Tensor& weights)
void KDB::add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights)
{
auto n_edges = min(k, static_cast<int>(S.size()));
auto n_edges = std::min(k, static_cast<int>(S.size()));
auto cond_w = clone(weights);
bool exit_cond = k == 0;
int num = 0;
@@ -68,7 +80,7 @@ namespace bayesnet {
model.addEdge(features[max_minfo], features[idx]);
num++;
}
catch (const invalid_argument& e) {
catch (const std::invalid_argument& e) {
// Loops are not allowed
}
}
@@ -78,11 +90,11 @@ namespace bayesnet {
exit_cond = num == n_edges || candidates.size(0) == 0;
}
}
vector<string> KDB::graph(const string& title)
std::vector<std::string> KDB::graph(const std::string& title) const
{
string header{ title };
std::string header{ title };
if (title == "KDB") {
header += " (k=" + to_string(k) + ", theta=" + to_string(theta) + ")";
header += " (k=" + std::to_string(k) + ", theta=" + std::to_string(theta) + ")";
}
return model.graph(header);
}

View File

@@ -1,21 +1,21 @@
#ifndef KDB_H
#define KDB_H
#include <torch/torch.h>
#include "Classifier.h"
#include "bayesnetUtils.h"
namespace bayesnet {
using namespace std;
using namespace torch;
class KDB : public Classifier {
private:
int k;
float theta;
void add_m_edges(int idx, vector<int>& S, Tensor& weights);
void add_m_edges(int idx, std::vector<int>& S, torch::Tensor& weights);
protected:
void train() override;
void buildModel(const torch::Tensor& weights) override;
public:
explicit KDB(int k, float theta = 0.03);
virtual ~KDB() {};
vector<string> graph(const string& name = "KDB") override;
virtual ~KDB() = default;
void setHyperparameters(const nlohmann::json& hyperparameters) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override;
};
}
#endif

View File

@@ -1,34 +1,28 @@
#include "KDBLd.h"
namespace bayesnet {
using namespace std;
KDBLd::KDBLd(int k) : KDB(k), Proposal(KDB::Xv, KDB::yv, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
KDBLd::KDBLd(int k) : KDB(k), Proposal(dataset, features, className) {}
KDBLd& KDBLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
fit_local_discretization(states, y);
generateTensorXFromVector();
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal KDB structure, KDB::fit initializes the base Bayesian network
KDB::fit(KDB::Xv, KDB::yv, features, className, states);
localDiscretizationProposal(states, model);
generateTensorXFromVector();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
model.fit(KDB::Xv, KDB::yv, features, className);
KDB::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor KDBLd::predict(Tensor& X)
torch::Tensor KDBLd::predict(torch::Tensor& X)
{
auto Xt = prepareX(X);
return KDB::predict(Xt);
}
vector<string> KDBLd::graph(const string& name)
std::vector<std::string> KDBLd::graph(const std::string& name) const
{
return KDB::graph(name);
}

View File

@@ -4,16 +4,15 @@
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class KDBLd : public KDB, public Proposal {
private:
public:
explicit KDBLd(int k);
virtual ~KDBLd() = default;
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "KDB") override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
KDBLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
std::vector<std::string> graph(const std::string& name = "KDB") const override;
torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };
};
}
#endif // !KDBLD_H

View File

@@ -1,13 +1,13 @@
#include "Mst.h"
#include <vector>
#include <list>
/*
Based on the code from https://www.softwaretestinghelp.com/minimum-spanning-tree-tutorial/
*/
namespace bayesnet {
using namespace std;
Graph::Graph(int V) : V(V), parent(vector<int>(V))
Graph::Graph(int V) : V(V), parent(std::vector<int>(V))
{
for (int i = 0; i < V; i++)
parent[i] = i;
@@ -34,36 +34,45 @@ namespace bayesnet {
void Graph::kruskal_algorithm()
{
// sort the edges ordered on decreasing weight
sort(G.begin(), G.end(), [](const auto& left, const auto& right) {return left.first > right.first;});
stable_sort(G.begin(), G.end(), [](const auto& left, const auto& right) {return left.first > right.first;});
for (int i = 0; i < G.size(); i++) {
int uSt, vEd;
uSt = find_set(G[i].second.first);
vEd = find_set(G[i].second.second);
if (uSt != vEd) {
T.push_back(G[i]); // add to mst vector
T.push_back(G[i]); // add to mst std::vector
union_set(uSt, vEd);
}
}
}
void Graph::display_mst()
{
cout << "Edge :" << " Weight" << endl;
std::cout << "Edge :" << " Weight" << std::endl;
for (int i = 0; i < T.size(); i++) {
cout << T[i].second.first << " - " << T[i].second.second << " : "
std::cout << T[i].second.first << " - " << T[i].second.second << " : "
<< T[i].first;
cout << endl;
std::cout << std::endl;
}
}
vector<pair<int, int>> reorder(vector<pair<float, pair<int, int>>> T, int root_original)
void insertElement(std::list<int>& variables, int variable)
{
auto result = vector<pair<int, int>>();
auto visited = vector<int>();
auto nextVariables = unordered_set<int>();
nextVariables.emplace(root_original);
if (std::find(variables.begin(), variables.end(), variable) == variables.end()) {
variables.push_front(variable);
}
}
std::vector<std::pair<int, int>> reorder(std::vector<std::pair<float, std::pair<int, int>>> T, int root_original)
{
// Create the edges of a DAG from the MST
// replacing unordered_set with list because unordered_set cannot guarantee the order of the elements inserted
auto result = std::vector<std::pair<int, int>>();
auto visited = std::vector<int>();
auto nextVariables = std::list<int>();
nextVariables.push_front(root_original);
while (nextVariables.size() > 0) {
int root = *nextVariables.begin();
nextVariables.erase(nextVariables.begin());
int root = nextVariables.front();
nextVariables.pop_front();
for (int i = 0; i < T.size(); ++i) {
auto [weight, edge] = T[i];
auto [from, to] = edge;
@@ -71,10 +80,10 @@ namespace bayesnet {
visited.insert(visited.begin(), i);
if (from == root) {
result.push_back({ from, to });
nextVariables.emplace(to);
insertElement(nextVariables, to);
} else {
result.push_back({ to, from });
nextVariables.emplace(from);
insertElement(nextVariables, from);
}
}
}
@@ -94,12 +103,11 @@ namespace bayesnet {
return result;
}
MST::MST(vector<string>& features, Tensor& weights, int root) : features(features), weights(weights), root(root) {}
vector<pair<int, int>> MST::maximumSpanningTree()
MST::MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root) : features(features), weights(weights), root(root) {}
std::vector<std::pair<int, int>> MST::maximumSpanningTree()
{
auto num_features = features.size();
Graph g(num_features);
// Make a complete graph
for (int i = 0; i < num_features - 1; ++i) {
for (int j = i + 1; j < num_features; ++j) {

View File

@@ -4,24 +4,22 @@
#include <vector>
#include <string>
namespace bayesnet {
using namespace std;
using namespace torch;
class MST {
private:
Tensor weights;
vector<string> features;
torch::Tensor weights;
std::vector<std::string> features;
int root = 0;
public:
MST() = default;
MST(vector<string>& features, Tensor& weights, int root);
vector<pair<int, int>> maximumSpanningTree();
MST(const std::vector<std::string>& features, const torch::Tensor& weights, const int root);
std::vector<std::pair<int, int>> maximumSpanningTree();
};
class Graph {
private:
int V; // number of nodes in graph
vector <pair<float, pair<int, int>>> G; // vector for graph
vector <pair<float, pair<int, int>>> T; // vector for mst
vector<int> parent;
std::vector <std::pair<float, std::pair<int, int>>> G; // std::vector for graph
std::vector <std::pair<float, std::pair<int, int>>> T; // std::vector for mst
std::vector<int> parent;
public:
explicit Graph(int V);
void addEdge(int u, int v, float wt);
@@ -29,7 +27,7 @@ namespace bayesnet {
void union_set(int u, int v);
void kruskal_algorithm();
void display_mst();
vector <pair<float, pair<int, int>>> get_mst() { return T; }
std::vector <std::pair<float, std::pair<int, int>>> get_mst() { return T; }
};
}
#endif

View File

@@ -3,24 +3,22 @@
#include "Network.h"
#include "bayesnetUtils.h"
namespace bayesnet {
Network::Network() : features(vector<string>()), className(""), classNumStates(0), fitted(false) {}
Network::Network(float maxT) : features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
Network::Network(float maxT, int smoothing) : laplaceSmoothing(smoothing), features(vector<string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false) {}
Network::Network() : features(std::vector<std::string>()), className(""), classNumStates(0), fitted(false), laplaceSmoothing(0) {}
Network::Network(float maxT) : features(std::vector<std::string>()), className(""), classNumStates(0), maxThreads(maxT), fitted(false), laplaceSmoothing(0) {}
Network::Network(Network& other) : laplaceSmoothing(other.laplaceSmoothing), features(other.features), className(other.className), classNumStates(other.getClassNumStates()), maxThreads(other.
getmaxThreads()), fitted(other.fitted)
{
for (const auto& pair : other.nodes) {
nodes[pair.first] = std::make_unique<Node>(*pair.second);
for (const auto& node : other.nodes) {
nodes[node.first] = std::make_unique<Node>(*node.second);
}
}
void Network::initialize()
{
features = vector<string>();
features = std::vector<std::string>();
className = "";
classNumStates = 0;
fitted = false;
nodes.clear();
dataset.clear();
samples = torch::Tensor();
}
float Network::getmaxThreads()
@@ -31,10 +29,10 @@ namespace bayesnet {
{
return samples;
}
void Network::addNode(const string& name)
void Network::addNode(const std::string& name)
{
if (name == "") {
throw invalid_argument("Node name cannot be empty");
throw std::invalid_argument("Node name cannot be empty");
}
if (nodes.find(name) != nodes.end()) {
return;
@@ -44,15 +42,15 @@ namespace bayesnet {
}
nodes[name] = std::make_unique<Node>(name);
}
vector<string> Network::getFeatures()
std::vector<std::string> Network::getFeatures() const
{
return features;
}
int Network::getClassNumStates()
int Network::getClassNumStates() const
{
return classNumStates;
}
int Network::getStates()
int Network::getStates() const
{
int result = 0;
for (auto& node : nodes) {
@@ -60,11 +58,11 @@ namespace bayesnet {
}
return result;
}
string Network::getClassName()
std::string Network::getClassName() const
{
return className;
}
bool Network::isCyclic(const string& nodeId, unordered_set<string>& visited, unordered_set<string>& recStack)
bool Network::isCyclic(const std::string& nodeId, std::unordered_set<std::string>& visited, std::unordered_set<std::string>& recStack)
{
if (visited.find(nodeId) == visited.end()) // if node hasn't been visited yet
{
@@ -80,124 +78,107 @@ namespace bayesnet {
recStack.erase(nodeId); // remove node from recursion stack before function ends
return false;
}
void Network::addEdge(const string& parent, const string& child)
void Network::addEdge(const std::string& parent, const std::string& child)
{
if (nodes.find(parent) == nodes.end()) {
throw invalid_argument("Parent node " + parent + " does not exist");
throw std::invalid_argument("Parent node " + parent + " does not exist");
}
if (nodes.find(child) == nodes.end()) {
throw invalid_argument("Child node " + child + " does not exist");
throw std::invalid_argument("Child node " + child + " does not exist");
}
// Temporarily add edge to check for cycles
nodes[parent]->addChild(nodes[child].get());
nodes[child]->addParent(nodes[parent].get());
unordered_set<string> visited;
unordered_set<string> recStack;
std::unordered_set<std::string> visited;
std::unordered_set<std::string> recStack;
if (isCyclic(nodes[child]->getName(), visited, recStack)) // if adding this edge forms a cycle
{
// remove problematic edge
nodes[parent]->removeChild(nodes[child].get());
nodes[child]->removeParent(nodes[parent].get());
throw invalid_argument("Adding this edge forms a cycle in the graph.");
throw std::invalid_argument("Adding this edge forms a cycle in the graph.");
}
}
map<string, std::unique_ptr<Node>>& Network::getNodes()
std::map<std::string, std::unique_ptr<Node>>& Network::getNodes()
{
return nodes;
}
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const vector<string>& featureNames, const string& className)
void Network::checkFitData(int n_samples, int n_features, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
if (weights.size(0) != n_samples) {
throw std::invalid_argument("Weights (" + std::to_string(weights.size(0)) + ") must have the same number of elements as samples (" + std::to_string(n_samples) + ") in Network::fit");
}
if (n_samples != n_samples_y) {
throw invalid_argument("X and y must have the same number of samples in Network::fit (" + to_string(n_samples) + " != " + to_string(n_samples_y) + ")");
throw std::invalid_argument("X and y must have the same number of samples in Network::fit (" + std::to_string(n_samples) + " != " + std::to_string(n_samples_y) + ")");
}
if (n_features != featureNames.size()) {
throw invalid_argument("X and features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(featureNames.size()) + ")");
throw std::invalid_argument("X and features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(featureNames.size()) + ")");
}
if (n_features != features.size() - 1) {
throw invalid_argument("X and local features must have the same number of features in Network::fit (" + to_string(n_features) + " != " + to_string(features.size() - 1) + ")");
throw std::invalid_argument("X and local features must have the same number of features in Network::fit (" + std::to_string(n_features) + " != " + std::to_string(features.size() - 1) + ")");
}
if (find(features.begin(), features.end(), className) == features.end()) {
throw invalid_argument("className not found in Network::features");
throw std::invalid_argument("className not found in Network::features");
}
for (auto& feature : featureNames) {
if (find(features.begin(), features.end(), feature) == features.end()) {
throw invalid_argument("Feature " + feature + " not found in Network::features");
throw std::invalid_argument("Feature " + feature + " not found in Network::features");
}
if (states.find(feature) == states.end()) {
throw std::invalid_argument("Feature " + feature + " not found in states");
}
}
}
void Network::setStates()
void Network::setStates(const std::map<std::string, std::vector<int>>& states)
{
// Set states to every Node in the network
for (int i = 0; i < features.size(); ++i) {
nodes[features[i]]->setNumStates(static_cast<int>(torch::max(samples.index({ i, "..." })).item<int>()) + 1);
}
classNumStates = nodes[className]->getNumStates();
for_each(features.begin(), features.end(), [this, &states](const std::string& feature) {
nodes.at(feature)->setNumStates(states.at(feature).size());
});
classNumStates = nodes.at(className)->getNumStates();
}
// X comes in nxm, where n is the number of features and m the number of samples
void Network::fit(torch::Tensor& X, torch::Tensor& y, const vector<string>& featureNames, const string& className)
void Network::fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className);
checkFitData(X.size(1), X.size(0), y.size(0), featureNames, className, states, weights);
this->className = className;
dataset.clear();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
torch::Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X , ytmp }, 0);
for (int i = 0; i < featureNames.size(); ++i) {
auto row_feature = X.index({ i, "..." });
dataset[featureNames[i]] = vector<int>(row_feature.data_ptr<int>(), row_feature.data_ptr<int>() + row_feature.size(0));;
}
dataset[className] = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
completeFit();
completeFit(states, weights);
}
void Network::fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
checkFitData(samples.size(1), samples.size(0) - 1, samples.size(1), featureNames, className, states, weights);
this->className = className;
this->samples = samples;
completeFit(states, weights);
}
// input_data comes in nxm, where n is the number of features and m the number of samples
void Network::fit(const vector<vector<int>>& input_data, const vector<int>& labels, const vector<string>& featureNames, const string& className)
void Network::fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights_, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states)
{
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className);
const torch::Tensor weights = torch::tensor(weights_, torch::kFloat64);
checkFitData(input_data[0].size(), input_data.size(), labels.size(), featureNames, className, states, weights);
this->className = className;
dataset.clear();
// Build dataset & tensor of samples (nxm) (n+1 because of the class)
// Build tensor of samples (nxm) (n+1 because of the class)
samples = torch::zeros({ static_cast<int>(input_data.size() + 1), static_cast<int>(input_data[0].size()) }, torch::kInt32);
for (int i = 0; i < featureNames.size(); ++i) {
dataset[featureNames[i]] = input_data[i];
samples.index_put_({ i, "..." }, torch::tensor(input_data[i], torch::kInt32));
}
dataset[className] = labels;
samples.index_put_({ -1, "..." }, torch::tensor(labels, torch::kInt32));
completeFit();
completeFit(states, weights);
}
void Network::completeFit()
void Network::completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights)
{
setStates();
int maxThreadsRunning = static_cast<int>(std::thread::hardware_concurrency() * maxThreads);
if (maxThreadsRunning < 1) {
maxThreadsRunning = 1;
}
vector<thread> threads;
mutex mtx;
condition_variable cv;
int activeThreads = 0;
int nextNodeIndex = 0;
while (nextNodeIndex < nodes.size()) {
unique_lock<mutex> lock(mtx);
cv.wait(lock, [&activeThreads, &maxThreadsRunning]() { return activeThreads < maxThreadsRunning; });
threads.emplace_back([this, &nextNodeIndex, &mtx, &cv, &activeThreads]() {
while (true) {
unique_lock<mutex> lock(mtx);
if (nextNodeIndex >= nodes.size()) {
break; // No more work remaining
}
auto& pair = *std::next(nodes.begin(), nextNodeIndex);
++nextNodeIndex;
lock.unlock();
pair.second->computeCPT(dataset, laplaceSmoothing);
lock.lock();
nodes[pair.first] = std::move(pair.second);
lock.unlock();
}
lock_guard<mutex> lock(mtx);
--activeThreads;
cv.notify_one();
setStates(states);
laplaceSmoothing = 1.0 / samples.size(1); // To use in CPT computation
std::vector<std::thread> threads;
for (auto& node : nodes) {
threads.emplace_back([this, &node, &weights]() {
node.second->computeCPT(samples, features, laplaceSmoothing, weights);
});
++activeThreads;
}
for (auto& thread : threads) {
thread.join();
@@ -207,12 +188,12 @@ namespace bayesnet {
torch::Tensor Network::predict_tensor(const torch::Tensor& samples, const bool proba)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict()");
throw std::logic_error("You must call fit() before calling predict()");
}
torch::Tensor result;
result = torch::zeros({ samples.size(1), classNumStates }, torch::kFloat64);
for (int i = 0; i < samples.size(1); ++i) {
auto sample = samples.index({ "...", i });
const torch::Tensor sample = samples.index({ "...", i });
auto psample = predict_sample(sample);
auto temp = torch::tensor(psample, torch::kFloat64);
// result.index_put_({ i, "..." }, torch::tensor(predict_sample(sample), torch::kFloat64));
@@ -220,36 +201,35 @@ namespace bayesnet {
}
if (proba)
return result;
else
return result.argmax(1);
}
// Return mxn tensor of probabilities
Tensor Network::predict_proba(const Tensor& samples)
torch::Tensor Network::predict_proba(const torch::Tensor& samples)
{
return predict_tensor(samples, true);
}
// Return mxn tensor of probabilities
Tensor Network::predict(const Tensor& samples)
torch::Tensor Network::predict(const torch::Tensor& samples)
{
return predict_tensor(samples, false);
}
// Return mx1 vector of predictions
// tsamples is nxm vector of samples
vector<int> Network::predict(const vector<vector<int>>& tsamples)
// Return mx1 std::vector of predictions
// tsamples is nxm std::vector of samples
std::vector<int> Network::predict(const std::vector<std::vector<int>>& tsamples)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict()");
throw std::logic_error("You must call fit() before calling predict()");
}
vector<int> predictions;
vector<int> sample;
std::vector<int> predictions;
std::vector<int> sample;
for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear();
for (int col = 0; col < tsamples.size(); ++col) {
sample.push_back(tsamples[col][row]);
}
vector<double> classProbabilities = predict_sample(sample);
std::vector<double> classProbabilities = predict_sample(sample);
// Find the class with the maximum posterior probability
auto maxElem = max_element(classProbabilities.begin(), classProbabilities.end());
int predictedClass = distance(classProbabilities.begin(), maxElem);
@@ -257,14 +237,14 @@ namespace bayesnet {
}
return predictions;
}
// Return mxn vector of probabilities
vector<vector<double>> Network::predict_proba(const vector<vector<int>>& tsamples)
// Return mxn std::vector of probabilities
std::vector<std::vector<double>> Network::predict_proba(const std::vector<std::vector<int>>& tsamples)
{
if (!fitted) {
throw logic_error("You must call fit() before calling predict_proba()");
throw std::logic_error("You must call fit() before calling predict_proba()");
}
vector<vector<double>> predictions;
vector<int> sample;
std::vector<std::vector<double>> predictions;
std::vector<int> sample;
for (int row = 0; row < tsamples[0].size(); ++row) {
sample.clear();
for (int col = 0; col < tsamples.size(); ++col) {
@@ -274,9 +254,9 @@ namespace bayesnet {
}
return predictions;
}
double Network::score(const vector<vector<int>>& tsamples, const vector<int>& labels)
double Network::score(const std::vector<std::vector<int>>& tsamples, const std::vector<int>& labels)
{
vector<int> y_pred = predict(tsamples);
std::vector<int> y_pred = predict(tsamples);
int correct = 0;
for (int i = 0; i < y_pred.size(); ++i) {
if (y_pred[i] == labels[i]) {
@@ -285,35 +265,35 @@ namespace bayesnet {
}
return (double)correct / y_pred.size();
}
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const vector<int>& sample)
// Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const std::vector<int>& sample)
{
// Ensure the sample size is equal to the number of features
if (sample.size() != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size()) +
") does not match the number of features (" + to_string(features.size() - 1) + ")");
throw std::invalid_argument("Sample size (" + std::to_string(sample.size()) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
map<string, int> evidence;
std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(); ++i) {
evidence[features[i]] = sample[i];
}
return exactInference(evidence);
}
// Return 1xn vector of probabilities
vector<double> Network::predict_sample(const Tensor& sample)
// Return 1xn std::vector of probabilities
std::vector<double> Network::predict_sample(const torch::Tensor& sample)
{
// Ensure the sample size is equal to the number of features
if (sample.size(0) != features.size() - 1) {
throw invalid_argument("Sample size (" + to_string(sample.size(0)) +
") does not match the number of features (" + to_string(features.size() - 1) + ")");
throw std::invalid_argument("Sample size (" + std::to_string(sample.size(0)) +
") does not match the number of features (" + std::to_string(features.size() - 1) + ")");
}
map<string, int> evidence;
std::map<std::string, int> evidence;
for (int i = 0; i < sample.size(0); ++i) {
evidence[features[i]] = sample[i].item<int>();
}
return exactInference(evidence);
}
double Network::computeFactor(map<string, int>& completeEvidence)
double Network::computeFactor(std::map<std::string, int>& completeEvidence)
{
double result = 1.0;
for (auto& node : getNodes()) {
@@ -321,17 +301,17 @@ namespace bayesnet {
}
return result;
}
vector<double> Network::exactInference(map<string, int>& evidence)
std::vector<double> Network::exactInference(std::map<std::string, int>& evidence)
{
vector<double> result(classNumStates, 0.0);
vector<thread> threads;
mutex mtx;
std::vector<double> result(classNumStates, 0.0);
std::vector<std::thread> threads;
std::mutex mtx;
for (int i = 0; i < classNumStates; ++i) {
threads.emplace_back([this, &result, &evidence, i, &mtx]() {
auto completeEvidence = map<string, int>(evidence);
auto completeEvidence = std::map<std::string, int>(evidence);
completeEvidence[getClassName()] = i;
double factor = computeFactor(completeEvidence);
lock_guard<mutex> lock(mtx);
std::lock_guard<std::mutex> lock(mtx);
result[i] = factor;
});
}
@@ -340,15 +320,15 @@ namespace bayesnet {
}
// Normalize result
double sum = accumulate(result.begin(), result.end(), 0.0);
transform(result.begin(), result.end(), result.begin(), [sum](double& value) { return value / sum; });
transform(result.begin(), result.end(), result.begin(), [sum](const double& value) { return value / sum; });
return result;
}
vector<string> Network::show()
std::vector<std::string> Network::show() const
{
vector<string> result;
std::vector<std::string> result;
// Draw the network
for (auto& node : nodes) {
string line = node.first + " -> ";
std::string line = node.first + " -> ";
for (auto child : node.second->getChildren()) {
line += child->getName() + ", ";
}
@@ -356,12 +336,12 @@ namespace bayesnet {
}
return result;
}
vector<string> Network::graph(const string& title)
std::vector<std::string> Network::graph(const std::string& title) const
{
auto output = vector<string>();
auto output = std::vector<std::string>();
auto prefix = "digraph BayesNet {\nlabel=<BayesNet ";
auto suffix = ">\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n";
string header = prefix + title + suffix;
std::string header = prefix + title + suffix;
output.push_back(header);
for (auto& node : nodes) {
auto result = node.second->graph(className);
@@ -370,9 +350,9 @@ namespace bayesnet {
output.push_back("}\n");
return output;
}
vector<pair<string, string>> Network::getEdges()
std::vector<std::pair<std::string, std::string>> Network::getEdges() const
{
auto edges = vector<pair<string, string>>();
auto edges = std::vector<std::pair<std::string, std::string>>();
for (const auto& node : nodes) {
auto head = node.first;
for (const auto& child : node.second->getChildren()) {
@@ -382,13 +362,16 @@ namespace bayesnet {
}
return edges;
}
vector<string> Network::topological_sort()
int Network::getNumEdges() const
{
return getEdges().size();
}
std::vector<std::string> Network::topological_sort()
{
/* Check if al the fathers of every node are before the node */
auto result = features;
result.erase(remove(result.begin(), result.end(), className), result.end());
bool ending{ false };
int idx = 0;
while (!ending) {
ending = true;
for (auto feature : features) {
@@ -410,20 +393,21 @@ namespace bayesnet {
ending = false;
}
} else {
throw logic_error("Error in topological sort because of node " + feature + " is not in result");
throw std::logic_error("Error in topological sort because of node " + feature + " is not in result");
}
} else {
throw logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
throw std::logic_error("Error in topological sort because of node father " + fatherName + " is not in result");
}
}
}
}
return result;
}
void Network::dump_cpt()
void Network::dump_cpt() const
{
for (auto& node : nodes) {
cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << endl;
std::cout << "* " << node.first << ": (" << node.second->getNumStates() << ") : " << node.second->getCPT().sizes() << std::endl;
std::cout << node.second->getCPT() << std::endl;
}
}
}

View File

@@ -3,62 +3,61 @@
#include "Node.h"
#include <map>
#include <vector>
#include "config.h"
namespace bayesnet {
class Network {
private:
map<string, unique_ptr<Node>> nodes;
map<string, vector<int>> dataset;
std::map<std::string, std::unique_ptr<Node>> nodes;
bool fitted;
float maxThreads = 0.95;
int classNumStates;
vector<string> features; // Including class
string className;
int laplaceSmoothing = 1;
std::vector<std::string> features; // Including classname
std::string className;
double laplaceSmoothing;
torch::Tensor samples; // nxm tensor used to fit the model
bool isCyclic(const std::string&, std::unordered_set<std::string>&, std::unordered_set<std::string>&);
vector<double> predict_sample(const vector<int>&);
vector<double> predict_sample(const torch::Tensor&);
vector<double> exactInference(map<string, int>&);
double computeFactor(map<string, int>&);
double mutual_info(torch::Tensor&, torch::Tensor&);
double entropy(torch::Tensor&);
double conditionalEntropy(torch::Tensor&, torch::Tensor&);
double mutualInformation(torch::Tensor&, torch::Tensor&);
void completeFit();
void checkFitData(int n_features, int n_samples, int n_samples_y, const vector<string>& featureNames, const string& className);
void setStates();
std::vector<double> predict_sample(const std::vector<int>&);
std::vector<double> predict_sample(const torch::Tensor&);
std::vector<double> exactInference(std::map<std::string, int>&);
double computeFactor(std::map<std::string, int>&);
void completeFit(const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void checkFitData(int n_features, int n_samples, int n_samples_y, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights);
void setStates(const std::map<std::string, std::vector<int>>&);
public:
Network();
explicit Network(float, int);
explicit Network(float);
explicit Network(Network&);
~Network() = default;
torch::Tensor& getSamples();
float getmaxThreads();
void addNode(const string&);
void addEdge(const string&, const string&);
map<string, std::unique_ptr<Node>>& getNodes();
vector<string> getFeatures();
int getStates();
vector<pair<string, string>> getEdges();
int getClassNumStates();
string getClassName();
void fit(const vector<vector<int>>&, const vector<int>&, const vector<string>&, const string&);
void fit(torch::Tensor&, torch::Tensor&, const vector<string>&, const string&);
vector<int> predict(const vector<vector<int>>&); // Return mx1 vector of predictions
void addNode(const std::string&);
void addEdge(const std::string&, const std::string&);
std::map<std::string, std::unique_ptr<Node>>& getNodes();
std::vector<std::string> getFeatures() const;
int getStates() const;
std::vector<std::pair<std::string, std::string>> getEdges() const;
int getNumEdges() const;
int getClassNumStates() const;
std::string getClassName() const;
/*
Notice: Nodes have to be inserted in the same order as they are in the dataset, i.e., first node is first column and so on.
*/
void fit(const std::vector<std::vector<int>>& input_data, const std::vector<int>& labels, const std::vector<double>& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
void fit(const torch::Tensor& X, const torch::Tensor& y, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
void fit(const torch::Tensor& samples, const torch::Tensor& weights, const std::vector<std::string>& featureNames, const std::string& className, const std::map<std::string, std::vector<int>>& states);
std::vector<int> predict(const std::vector<std::vector<int>>&); // Return mx1 std::vector of predictions
torch::Tensor predict(const torch::Tensor&); // Return mx1 tensor of predictions
//Computes the conditional edge weight of variable index u and v conditioned on class_node
torch::Tensor conditionalEdgeWeight();
torch::Tensor predict_tensor(const torch::Tensor& samples, const bool proba);
vector<vector<double>> predict_proba(const vector<vector<int>>&); // Return mxn vector of probabilities
std::vector<std::vector<double>> predict_proba(const std::vector<std::vector<int>>&); // Return mxn std::vector of probabilities
torch::Tensor predict_proba(const torch::Tensor&); // Return mxn tensor of probabilities
double score(const vector<vector<int>>&, const vector<int>&);
vector<string> topological_sort();
vector<string> show();
vector<string> graph(const string& title); // Returns a vector of strings representing the graph in graphviz format
double score(const std::vector<std::vector<int>>&, const std::vector<int>&);
std::vector<std::string> topological_sort();
std::vector<std::string> show() const;
std::vector<std::string> graph(const std::string& title) const; // Returns a std::vector of std::strings representing the graph in graphviz format
void initialize();
void dump_cpt();
inline string version() { return "0.1.0"; }
void dump_cpt() const;
inline std::string version() { return { project_version.begin(), project_version.end() }; }
};
}
#endif

View File

@@ -3,7 +3,7 @@
namespace bayesnet {
Node::Node(const std::string& name)
: name(name), numStates(0), cpTable(torch::Tensor()), parents(vector<Node*>()), children(vector<Node*>())
: name(name), numStates(0), cpTable(torch::Tensor()), parents(std::vector<Node*>()), children(std::vector<Node*>())
{
}
void Node::clear()
@@ -14,7 +14,7 @@ namespace bayesnet {
dimensions.clear();
numStates = 0;
}
string Node::getName() const
std::string Node::getName() const
{
return name;
}
@@ -34,11 +34,11 @@ namespace bayesnet {
{
children.push_back(child);
}
vector<Node*>& Node::getParents()
std::vector<Node*>& Node::getParents()
{
return parents;
}
vector<Node*>& Node::getChildren()
std::vector<Node*>& Node::getChildren()
{
return children;
}
@@ -63,28 +63,28 @@ namespace bayesnet {
*/
unsigned Node::minFill()
{
unordered_set<string> neighbors;
std::unordered_set<std::string> neighbors;
for (auto child : children) {
neighbors.emplace(child->getName());
}
for (auto parent : parents) {
neighbors.emplace(parent->getName());
}
auto source = vector<string>(neighbors.begin(), neighbors.end());
auto source = std::vector<std::string>(neighbors.begin(), neighbors.end());
return combinations(source).size();
}
vector<pair<string, string>> Node::combinations(const vector<string>& source)
std::vector<std::pair<std::string, std::string>> Node::combinations(const std::vector<std::string>& source)
{
vector<pair<string, string>> result;
std::vector<std::pair<std::string, std::string>> result;
for (int i = 0; i < source.size(); ++i) {
string temp = source[i];
std::string temp = source[i];
for (int j = i + 1; j < source.size(); ++j) {
result.push_back({ temp, source[j] });
}
}
return result;
}
void Node::computeCPT(map<string, vector<int>>& dataset, const int laplaceSmoothing)
void Node::computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights)
{
dimensions.clear();
// Get dimensions of the CPT
@@ -94,27 +94,39 @@ namespace bayesnet {
// Create a tensor of zeros with the dimensions of the CPT
cpTable = torch::zeros(dimensions, torch::kFloat) + laplaceSmoothing;
// Fill table with counts
for (int n_sample = 0; n_sample < dataset[name].size(); ++n_sample) {
torch::List<c10::optional<torch::Tensor>> coordinates;
coordinates.push_back(torch::tensor(dataset[name][n_sample]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&dataset, &n_sample](const auto& parent) { return torch::tensor(dataset[parent->getName()][n_sample]); });
auto pos = find(features.begin(), features.end(), name);
if (pos == features.end()) {
throw std::logic_error("Feature " + name + " not found in dataset");
}
int name_index = pos - features.begin();
for (int n_sample = 0; n_sample < dataset.size(1); ++n_sample) {
c10::List<c10::optional<at::Tensor>> coordinates;
coordinates.push_back(dataset.index({ name_index, n_sample }));
for (auto parent : parents) {
pos = find(features.begin(), features.end(), parent->getName());
if (pos == features.end()) {
throw std::logic_error("Feature parent " + parent->getName() + " not found in dataset");
}
int parent_index = pos - features.begin();
coordinates.push_back(dataset.index({ parent_index, n_sample }));
}
// Increment the count of the corresponding coordinate
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + 1);
cpTable.index_put_({ coordinates }, cpTable.index({ coordinates }) + weights.index({ n_sample }).item<double>());
}
// Normalize the counts
cpTable = cpTable / cpTable.sum(0);
}
float Node::getFactorValue(map<string, int>& evidence)
float Node::getFactorValue(std::map<std::string, int>& evidence)
{
torch::List<c10::optional<torch::Tensor>> coordinates;
c10::List<c10::optional<at::Tensor>> coordinates;
// following predetermined order of indices in the cpTable (see Node.h)
coordinates.push_back(torch::tensor(evidence[name]));
transform(parents.begin(), parents.end(), back_inserter(coordinates), [&evidence](const auto& parent) { return torch::tensor(evidence[parent->getName()]); });
coordinates.push_back(at::tensor(evidence[name]));
transform(parents.begin(), parents.end(), std::back_inserter(coordinates), [&evidence](const auto& parent) { return at::tensor(evidence[parent->getName()]); });
return cpTable.index({ coordinates }).item<float>();
}
vector<string> Node::graph(const string& className)
std::vector<std::string> Node::graph(const std::string& className)
{
auto output = vector<string>();
auto output = std::vector<std::string>();
auto suffix = name == className ? ", fontcolor=red, fillcolor=lightblue, style=filled " : "";
output.push_back(name + " [shape=circle" + suffix + "] \n");
transform(children.begin(), children.end(), back_inserter(output), [this](const auto& child) { return name + " -> " + child->getName(); });

View File

@@ -5,33 +5,32 @@
#include <vector>
#include <string>
namespace bayesnet {
using namespace std;
class Node {
private:
string name;
vector<Node*> parents;
vector<Node*> children;
std::string name;
std::vector<Node*> parents;
std::vector<Node*> children;
int numStates; // number of states of the variable
torch::Tensor cpTable; // Order of indices is 0-> node variable, 1-> 1st parent, 2-> 2nd parent, ...
vector<int64_t> dimensions; // dimensions of the cpTable
std::vector<int64_t> dimensions; // dimensions of the cpTable
std::vector<std::pair<std::string, std::string>> combinations(const std::vector<std::string>&);
public:
vector<pair<string, string>> combinations(const vector<string>&);
explicit Node(const string&);
explicit Node(const std::string&);
void clear();
void addParent(Node*);
void addChild(Node*);
void removeParent(Node*);
void removeChild(Node*);
string getName() const;
vector<Node*>& getParents();
vector<Node*>& getChildren();
std::string getName() const;
std::vector<Node*>& getParents();
std::vector<Node*>& getChildren();
torch::Tensor& getCPT();
void computeCPT(map<string, vector<int>>&, const int);
void computeCPT(const torch::Tensor& dataset, const std::vector<std::string>& features, const double laplaceSmoothing, const torch::Tensor& weights);
int getNumStates() const;
void setNumStates(int);
unsigned minFill();
vector<string> graph(const string& clasName); // Returns a vector of strings representing the graph in graphviz format
float getFactorValue(map<string, int>&);
std::vector<std::string> graph(const std::string& clasName); // Returns a std::vector of std::strings representing the graph in graphviz format
float getFactorValue(std::map<std::string, int>&);
};
}
#endif

View File

@@ -2,21 +2,30 @@
#include "ArffFiles.h"
namespace bayesnet {
Proposal::Proposal(vector<vector<int>>& Xv_, vector<int>& yv_, vector<string>& features_, string& className_) : Xv(Xv_), yv(yv_), pFeatures(features_), pClassName(className_) {}
Proposal::Proposal(torch::Tensor& dataset_, std::vector<std::string>& features_, std::string& className_) : pDataset(dataset_), pFeatures(features_), pClassName(className_) {}
Proposal::~Proposal()
{
for (auto& [key, value] : discretizers) {
delete value;
}
}
void Proposal::localDiscretizationProposal(map<string, vector<int>>& states, Network& model)
void Proposal::checkInput(const torch::Tensor& X, const torch::Tensor& y)
{
if (!torch::is_floating_point(X)) {
throw std::invalid_argument("X must be a floating point tensor");
}
if (torch::is_floating_point(y)) {
throw std::invalid_argument("y must be an integer tensor");
}
}
map<std::string, std::vector<int>> Proposal::localDiscretizationProposal(const map<std::string, std::vector<int>>& oldStates, Network& model)
{
// order of local discretization is important. no good 0, 1, 2...
// although we rediscretize features after the local discretization of every feature
auto order = model.topological_sort();
auto& nodes = model.getNodes();
vector<int> indicesToReDiscretize;
auto n_samples = Xf.size(1);
map<std::string, std::vector<int>> states = oldStates;
std::vector<int> indicesToReDiscretize;
bool upgrade = false; // Flag to check if we need to upgrade the model
for (auto feature : order) {
auto nodeParents = nodes[feature]->getParents();
@@ -24,76 +33,75 @@ namespace bayesnet {
upgrade = true;
int index = find(pFeatures.begin(), pFeatures.end(), feature) - pFeatures.begin();
indicesToReDiscretize.push_back(index); // We need to re-discretize this feature
vector<string> parents;
std::vector<std::string> parents;
transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); });
// Remove class as parent as it will be added later
parents.erase(remove(parents.begin(), parents.end(), pClassName), parents.end());
// Get the indices of the parents
vector<int> indices;
std::vector<int> indices;
indices.push_back(-1); // Add class index
transform(parents.begin(), parents.end(), back_inserter(indices), [&](const auto& p) {return find(pFeatures.begin(), pFeatures.end(), p) - pFeatures.begin(); });
// Now we fit the discretizer of the feature, conditioned on its parents and the class i.e. discretizer.fit(X[index], X[indices] + y)
vector<string> yJoinParents;
transform(yv.begin(), yv.end(), back_inserter(yJoinParents), [&](const auto& p) {return to_string(p); });
std::vector<std::string> yJoinParents(Xf.size(1));
for (auto idx : indices) {
for (int i = 0; i < n_samples; ++i) {
yJoinParents[i] += to_string(Xv[idx][i]);
for (int i = 0; i < Xf.size(1); ++i) {
yJoinParents[i] += to_string(pDataset.index({ idx, i }).item<int>());
}
}
auto arff = ArffFiles();
auto yxv = arff.factorize(yJoinParents);
auto xvf_ptr = Xf.index({ index }).data_ptr<float>();
auto xvf = vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
auto xvf = std::vector<mdlp::precision_t>(xvf_ptr, xvf_ptr + Xf.size(1));
discretizers[feature]->fit(xvf, yxv);
//
//
//
// auto tmp = discretizers[feature]->transform(xvf);
// Xv[index] = tmp;
// auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
// iota(xStates.begin(), xStates.end(), 0);
// //Update new states of the feature/node
// states[feature] = xStates;
}
if (upgrade) {
// Discretize again X (only the affected indices) with the new fitted discretizers
for (auto index : indicesToReDiscretize) {
auto Xt_ptr = Xf.index({ index }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
Xv[index] = discretizers[pFeatures[index]]->transform(Xt);
auto xStates = vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
pDataset.index_put_({ index, "..." }, torch::tensor(discretizers[pFeatures[index]]->transform(Xt)));
auto xStates = std::vector<int>(discretizers[pFeatures[index]]->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
//Update new states of the feature/node
states[pFeatures[index]] = xStates;
}
const torch::Tensor weights = torch::full({ pDataset.size(1) }, 1.0 / pDataset.size(1), torch::kDouble);
model.fit(pDataset, weights, pFeatures, pClassName, states);
}
return states;
}
void Proposal::fit_local_discretization(map<string, vector<int>>& states, torch::Tensor& y)
map<std::string, std::vector<int>> Proposal::fit_local_discretization(const torch::Tensor& y)
{
// Sharing Xv and yv with Classifier
Xv = vector<vector<int>>();
yv = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// Discretize the continuous input data and build pDataset (Classifier::dataset)
int m = Xf.size(1);
int n = Xf.size(0);
map<std::string, std::vector<int>> states;
pDataset = torch::zeros({ n + 1, m }, torch::kInt32);
auto yv = std::vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + y.size(0));
// discretize input data by feature(row)
for (int i = 0; i < pFeatures.size(); ++i) {
for (auto i = 0; i < pFeatures.size(); ++i) {
auto* discretizer = new mdlp::CPPFImdlp();
auto Xt_ptr = Xf.index({ i }).data_ptr<float>();
auto Xt = vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
auto Xt = std::vector<float>(Xt_ptr, Xt_ptr + Xf.size(1));
discretizer->fit(Xt, yv);
Xv.push_back(discretizer->transform(Xt));
auto xStates = vector<int>(discretizer->getCutPoints().size() + 1);
pDataset.index_put_({ i, "..." }, torch::tensor(discretizer->transform(Xt)));
auto xStates = std::vector<int>(discretizer->getCutPoints().size() + 1);
iota(xStates.begin(), xStates.end(), 0);
states[pFeatures[i]] = xStates;
discretizers[pFeatures[i]] = discretizer;
}
int n_classes = torch::max(y).item<int>() + 1;
auto yStates = vector<int>(n_classes);
auto yStates = std::vector<int>(n_classes);
iota(yStates.begin(), yStates.end(), 0);
states[pClassName] = yStates;
pDataset.index_put_({ n, "..." }, y);
return states;
}
torch::Tensor Proposal::prepareX(torch::Tensor& X)
{
auto Xtd = torch::zeros_like(X, torch::kInt32);
for (int i = 0; i < X.size(0); ++i) {
auto Xt = vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xt = std::vector<float>(X[i].data_ptr<float>(), X[i].data_ptr<float>() + X.size(1));
auto Xd = discretizers[pFeatures[i]]->transform(Xt);
Xtd.index_put_({ i }, torch::tensor(Xd, torch::kInt32));
}

View File

@@ -10,19 +10,20 @@
namespace bayesnet {
class Proposal {
public:
Proposal(vector<vector<int>>& Xv_, vector<int>& yv_, vector<string>& features_, string& className_);
Proposal(torch::Tensor& pDataset, std::vector<std::string>& features_, std::string& className_);
virtual ~Proposal();
protected:
void checkInput(const torch::Tensor& X, const torch::Tensor& y);
torch::Tensor prepareX(torch::Tensor& X);
void localDiscretizationProposal(map<string, vector<int>>& states, Network& model);
void fit_local_discretization(map<string, vector<int>>& states, torch::Tensor& y);
map<std::string, std::vector<int>> localDiscretizationProposal(const map<std::string, std::vector<int>>& states, Network& model);
map<std::string, std::vector<int>> fit_local_discretization(const torch::Tensor& y);
torch::Tensor Xf; // X continuous nxm tensor
map<string, mdlp::CPPFImdlp*> discretizers;
torch::Tensor y; // y discrete nx1 tensor
map<std::string, mdlp::CPPFImdlp*> discretizers;
private:
vector<string>& pFeatures;
string& pClassName;
vector<vector<int>>& Xv; // X discrete nxm vector
vector<int>& yv;
torch::Tensor& pDataset; // (n+1)xm tensor
std::vector<std::string>& pFeatures;
std::string& pClassName;
};
}

View File

@@ -4,7 +4,7 @@ namespace bayesnet {
SPODE::SPODE(int root) : Classifier(Network()), root(root) {}
void SPODE::train()
void SPODE::buildModel(const torch::Tensor& weights)
{
// 0. Add all nodes to the model
addNodes();
@@ -17,7 +17,7 @@ namespace bayesnet {
}
}
}
vector<string> SPODE::graph(const string& name)
std::vector<std::string> SPODE::graph(const std::string& name) const
{
return model.graph(name);
}

View File

@@ -7,11 +7,11 @@ namespace bayesnet {
private:
int root;
protected:
void train() override;
void buildModel(const torch::Tensor& weights) override;
public:
explicit SPODE(int root);
virtual ~SPODE() {};
vector<string> graph(const string& name = "SPODE") override;
virtual ~SPODE() = default;
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
};
}
#endif

View File

@@ -1,34 +1,46 @@
#include "SPODELd.h"
namespace bayesnet {
using namespace std;
SPODELd::SPODELd(int root) : SPODE(root), Proposal(SPODE::Xv, SPODE::yv, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
SPODELd::SPODELd(int root) : SPODE(root), Proposal(dataset, features, className) {}
SPODELd& SPODELd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
fit_local_discretization(states, y);
generateTensorXFromVector();
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(SPODE::Xv, SPODE::yv, features, className, states);
localDiscretizationProposal(states, model);
generateTensorXFromVector();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
model.fit(SPODE::Xv, SPODE::yv, features, className);
SPODE::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor SPODELd::predict(Tensor& X)
SPODELd& SPODELd::fit(torch::Tensor& dataset, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
if (!torch::is_floating_point(dataset)) {
throw std::runtime_error("Dataset must be a floating point tensor");
}
Xf = dataset.index({ torch::indexing::Slice(0, dataset.size(0) - 1), "..." }).clone();
y = dataset.index({ -1, "..." }).clone();
features = features_;
className = className_;
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal SPODE structure, SPODE::fit initializes the base Bayesian network
SPODE::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
torch::Tensor SPODELd::predict(torch::Tensor& X)
{
auto Xt = prepareX(X);
return SPODE::predict(Xt);
}
vector<string> SPODELd::graph(const string& name)
std::vector<std::string> SPODELd::graph(const std::string& name) const
{
return SPODE::graph(name);
}

View File

@@ -4,16 +4,15 @@
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class SPODELd : public SPODE, public Proposal {
private:
public:
explicit SPODELd(int root);
virtual ~SPODELd() = default;
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "SPODE") override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
SPODELd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
SPODELd& fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
std::vector<std::string> graph(const std::string& name = "SPODE") const override;
torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };
};
}
#endif // !SPODELD_H

View File

@@ -1,29 +1,27 @@
#include "TAN.h"
namespace bayesnet {
using namespace torch;
TAN::TAN() : Classifier(Network()) {}
void TAN::train()
void TAN::buildModel(const torch::Tensor& weights)
{
// 0. Add all nodes to the model
addNodes();
// 1. Compute mutual information between each feature and the class and set the root node
// as the highest mutual information with the class
auto mi = vector <pair<int, float >>();
Tensor class_dataset = samples.index({ -1, "..." });
auto mi = std::vector <std::pair<int, float >>();
torch::Tensor class_dataset = dataset.index({ -1, "..." });
for (int i = 0; i < static_cast<int>(features.size()); ++i) {
Tensor feature_dataset = samples.index({ i, "..." });
auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset);
torch::Tensor feature_dataset = dataset.index({ i, "..." });
auto mi_value = metrics.mutualInformation(class_dataset, feature_dataset, weights);
mi.push_back({ i, mi_value });
}
sort(mi.begin(), mi.end(), [](const auto& left, const auto& right) {return left.second < right.second;});
auto root = mi[mi.size() - 1].first;
// 2. Compute mutual information between each feature and the class
auto weights = metrics.conditionalEdge();
auto weights_matrix = metrics.conditionalEdge(weights);
// 3. Compute the maximum spanning tree
auto mst = metrics.maximumSpanningTree(features, weights, root);
auto mst = metrics.maximumSpanningTree(features, weights_matrix, root);
// 4. Add edges from the maximum spanning tree to the model
for (auto i = 0; i < mst.size(); ++i) {
auto [from, to] = mst[i];
@@ -34,7 +32,7 @@ namespace bayesnet {
model.addEdge(className, feature);
}
}
vector<string> TAN::graph(const string& title)
std::vector<std::string> TAN::graph(const std::string& title) const
{
return model.graph(title);
}

View File

@@ -2,16 +2,14 @@
#define TAN_H
#include "Classifier.h"
namespace bayesnet {
using namespace std;
using namespace torch;
class TAN : public Classifier {
private:
protected:
void train() override;
void buildModel(const torch::Tensor& weights) override;
public:
TAN();
virtual ~TAN() {};
vector<string> graph(const string& name = "TAN") override;
virtual ~TAN() = default;
std::vector<std::string> graph(const std::string& name = "TAN") const override;
};
}
#endif

View File

@@ -1,34 +1,29 @@
#include "TANLd.h"
namespace bayesnet {
using namespace std;
TANLd::TANLd() : TAN(), Proposal(TAN::Xv, TAN::yv, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, vector<string>& features_, string className_, map<string, vector<int>>& states_)
TANLd::TANLd() : TAN(), Proposal(dataset, features, className) {}
TANLd& TANLd::fit(torch::Tensor& X_, torch::Tensor& y_, const std::vector<std::string>& features_, const std::string& className_, map<std::string, std::vector<int>>& states_)
{
// This first part should go in a Classifier method called fit_local_discretization o fit_float...
checkInput(X_, y_);
features = features_;
className = className_;
Xf = X_;
y = y_;
// Fills vectors Xv & yv with the data from tensors X_ (discretized) & y
fit_local_discretization(states, y);
generateTensorXFromVector();
// Fills std::vectors Xv & yv with the data from tensors X_ (discretized) & y
states = fit_local_discretization(y);
// We have discretized the input data
// 1st we need to fit the model to build the normal TAN structure, TAN::fit initializes the base Bayesian network
TAN::fit(TAN::Xv, TAN::yv, features, className, states);
localDiscretizationProposal(states, model);
generateTensorXFromVector();
Tensor ytmp = torch::transpose(y.view({ y.size(0), 1 }), 0, 1);
samples = torch::cat({ X, ytmp }, 0);
model.fit(TAN::Xv, TAN::yv, features, className);
TAN::fit(dataset, features, className, states);
states = localDiscretizationProposal(states, model);
return *this;
}
Tensor TANLd::predict(Tensor& X)
torch::Tensor TANLd::predict(torch::Tensor& X)
{
auto Xt = prepareX(X);
return TAN::predict(Xt);
}
vector<string> TANLd::graph(const string& name)
std::vector<std::string> TANLd::graph(const std::string& name) const
{
return TAN::graph(name);
}

View File

@@ -4,16 +4,15 @@
#include "Proposal.h"
namespace bayesnet {
using namespace std;
class TANLd : public TAN, public Proposal {
private:
public:
TANLd();
virtual ~TANLd() = default;
TANLd& fit(torch::Tensor& X, torch::Tensor& y, vector<string>& features, string className, map<string, vector<int>>& states) override;
vector<string> graph(const string& name = "TAN") override;
Tensor predict(Tensor& X) override;
static inline string version() { return "0.0.1"; };
TANLd& fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, map<std::string, std::vector<int>>& states) override;
std::vector<std::string> graph(const std::string& name = "TAN") const override;
torch::Tensor predict(torch::Tensor& X) override;
static inline std::string version() { return "0.0.1"; };
};
}
#endif // !TANLD_H

View File

@@ -1,25 +1,23 @@
#include "bayesnetUtils.h"
namespace bayesnet {
using namespace std;
using namespace torch;
// Return the indices in descending order
vector<int> argsort(vector<float>& nums)
std::vector<int> argsort(std::vector<double>& nums)
{
int n = nums.size();
vector<int> indices(n);
std::vector<int> indices(n);
iota(indices.begin(), indices.end(), 0);
sort(indices.begin(), indices.end(), [&nums](int i, int j) {return nums[i] > nums[j];});
return indices;
}
vector<vector<int>> tensorToVector(Tensor& tensor)
std::vector<std::vector<int>> tensorToVector(torch::Tensor& tensor)
{
// convert mxn tensor to nxm vector
vector<vector<int>> result;
// convert mxn tensor to nxm std::vector
std::vector<std::vector<int>> result;
// Iterate over cols
for (int i = 0; i < tensor.size(1); ++i) {
auto col_tensor = tensor.index({ "...", i });
auto col = vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
auto col = std::vector<int>(col_tensor.data_ptr<int>(), col_tensor.data_ptr<int>() + tensor.size(0));
result.push_back(col);
}
return result;

View File

@@ -3,9 +3,7 @@
#include <torch/torch.h>
#include <vector>
namespace bayesnet {
using namespace std;
using namespace torch;
vector<int> argsort(vector<float>& nums);
vector<vector<int>> tensorToVector(Tensor& tensor);
std::vector<int> argsort(std::vector<double>& nums);
std::vector<std::vector<int>> tensorToVector(torch::Tensor& tensor);
}
#endif //BAYESNET_UTILS_H

View File

@@ -1,8 +0,0 @@
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
include_directories(${BayesNet_SOURCE_DIR}/lib/argparse/include)
include_directories(${BayesNet_SOURCE_DIR}/lib/json/include)
add_executable(main main.cc Folding.cc platformUtils.cc Experiment.cc Datasets.cc Models.cc Report.cc)
target_link_libraries(main BayesNet ArffFiles mdlp "${TORCH_LIBRARIES}")

View File

@@ -1,231 +0,0 @@
#include "Datasets.h"
#include "platformUtils.h"
#include "ArffFiles.h"
namespace platform {
void Datasets::load()
{
ifstream catalog(path + "/all.txt");
if (catalog.is_open()) {
string line;
while (getline(catalog, line)) {
vector<string> tokens = split(line, ',');
string name = tokens[0];
string className = tokens[1];
datasets[name] = make_unique<Dataset>(path, name, className, discretize, fileType);
}
catalog.close();
} else {
throw invalid_argument("Unable to open catalog file. [" + path + "/all.txt" + "]");
}
}
vector<string> Datasets::getNames()
{
vector<string> result;
transform(datasets.begin(), datasets.end(), back_inserter(result), [](const auto& d) { return d.first; });
return result;
}
vector<string> Datasets::getFeatures(string name)
{
if (datasets[name]->isLoaded()) {
return datasets[name]->getFeatures();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
map<string, vector<int>> Datasets::getStates(string name)
{
if (datasets[name]->isLoaded()) {
return datasets[name]->getStates();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
string Datasets::getClassName(string name)
{
if (datasets[name]->isLoaded()) {
return datasets[name]->getClassName();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Datasets::getNSamples(string name)
{
if (datasets[name]->isLoaded()) {
return datasets[name]->getNSamples();
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Datasets::getVectors(string name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectors();
}
pair<vector<vector<int>>&, vector<int>&> Datasets::getVectorsDiscretized(string name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getVectorsDiscretized();
}
pair<torch::Tensor&, torch::Tensor&> Datasets::getTensors(string name)
{
if (!datasets[name]->isLoaded()) {
datasets[name]->load();
}
return datasets[name]->getTensors();
}
bool Datasets::isDataset(const string& name)
{
return datasets.find(name) != datasets.end();
}
Dataset::Dataset(const Dataset& dataset) : path(dataset.path), name(dataset.name), className(dataset.className), n_samples(dataset.n_samples), n_features(dataset.n_features), features(dataset.features), states(dataset.states), loaded(dataset.loaded), discretize(dataset.discretize), X(dataset.X), y(dataset.y), Xv(dataset.Xv), Xd(dataset.Xd), yv(dataset.yv), fileType(dataset.fileType)
{
}
string Dataset::getName()
{
return name;
}
string Dataset::getClassName()
{
return className;
}
vector<string> Dataset::getFeatures()
{
if (loaded) {
return features;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNFeatures()
{
if (loaded) {
return n_features;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
int Dataset::getNSamples()
{
if (loaded) {
return n_samples;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
map<string, vector<int>> Dataset::getStates()
{
if (loaded) {
return states;
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<float>>&, vector<int>&> Dataset::getVectors()
{
if (loaded) {
return { Xv, yv };
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<vector<vector<int>>&, vector<int>&> Dataset::getVectorsDiscretized()
{
if (loaded) {
return { Xd, yv };
} else {
throw invalid_argument("Dataset not loaded.");
}
}
pair<torch::Tensor&, torch::Tensor&> Dataset::getTensors()
{
if (loaded) {
buildTensors();
return { X, y };
} else {
throw invalid_argument("Dataset not loaded.");
}
}
void Dataset::load_csv()
{
ifstream file(path + "/" + name + ".csv");
if (file.is_open()) {
string line;
getline(file, line);
vector<string> tokens = split(line, ',');
features = vector<string>(tokens.begin(), tokens.end() - 1);
className = tokens.back();
for (auto i = 0; i < features.size(); ++i) {
Xv.push_back(vector<float>());
}
while (getline(file, line)) {
tokens = split(line, ',');
for (auto i = 0; i < features.size(); ++i) {
Xv[i].push_back(stof(tokens[i]));
}
yv.push_back(stoi(tokens.back()));
}
file.close();
} else {
throw invalid_argument("Unable to open dataset file.");
}
}
void Dataset::computeStates()
{
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xd[i].begin(), Xd[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0);
}
states[className] = vector<int>(*max_element(yv.begin(), yv.end()) + 1);
iota(begin(states[className]), end(states[className]), 0);
}
void Dataset::load_arff()
{
auto arff = ArffFiles();
arff.load(path + "/" + name + ".arff", className);
// Get Dataset X, y
Xv = arff.getX();
yv = arff.getY();
// Get className & Features
className = arff.getClassName();
auto attributes = arff.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& attribute) { return attribute.first; });
}
void Dataset::load()
{
if (loaded) {
return;
}
if (fileType == CSV) {
load_csv();
} else if (fileType == ARFF) {
load_arff();
}
if (discretize) {
Xd = discretizeDataset(Xv, yv);
computeStates();
}
n_samples = Xv[0].size();
n_features = Xv.size();
loaded = true;
}
void Dataset::buildTensors()
{
if (discretize) {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kInt32);
} else {
X = torch::zeros({ static_cast<int>(n_features), static_cast<int>(n_samples) }, torch::kFloat32);
}
for (int i = 0; i < features.size(); ++i) {
if (discretize) {
X.index_put_({ i, "..." }, torch::tensor(Xd[i], torch::kInt32));
} else {
X.index_put_({ i, "..." }, torch::tensor(Xv[i], torch::kFloat32));
}
}
y = torch::tensor(yv, torch::kInt32);
}
}

View File

@@ -1,65 +0,0 @@
#ifndef DATASETS_H
#define DATASETS_H
#include <torch/torch.h>
#include <map>
#include <vector>
#include <string>
namespace platform {
using namespace std;
enum fileType_t { CSV, ARFF };
class Dataset {
private:
string path;
string name;
fileType_t fileType;
string className;
int n_samples{ 0 }, n_features{ 0 };
vector<string> features;
map<string, vector<int>> states;
bool loaded;
bool discretize;
torch::Tensor X, y;
vector<vector<float>> Xv;
vector<vector<int>> Xd;
vector<int> yv;
void buildTensors();
void load_csv();
void load_arff();
void computeStates();
public:
Dataset(const string& path, const string& name, const string& className, bool discretize, fileType_t fileType) : path(path), name(name), className(className), discretize(discretize), loaded(false), fileType(fileType) {};
explicit Dataset(const Dataset&);
string getName();
string getClassName();
vector<string> getFeatures();
map<string, vector<int>> getStates();
pair<vector<vector<float>>&, vector<int>&> getVectors();
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized();
pair<torch::Tensor&, torch::Tensor&> getTensors();
int getNFeatures();
int getNSamples();
void load();
const bool inline isLoaded() const { return loaded; };
};
class Datasets {
private:
string path;
fileType_t fileType;
map<string, unique_ptr<Dataset>> datasets;
bool discretize;
void load(); // Loads the list of datasets
public:
explicit Datasets(const string& path, bool discretize = false, fileType_t fileType = ARFF) : path(path), discretize(discretize), fileType(fileType) { load(); };
vector<string> getNames();
vector<string> getFeatures(string name);
int getNSamples(string name);
string getClassName(string name);
map<string, vector<int>> getStates(string name);
pair<vector<vector<float>>&, vector<int>&> getVectors(string name);
pair<vector<vector<int>>&, vector<int>&> getVectorsDiscretized(string name);
pair<torch::Tensor&, torch::Tensor&> getTensors(string name);
bool isDataset(const string& name);
};
};
#endif

View File

@@ -1,62 +0,0 @@
#ifndef DOTENV_H
#define DOTENV_H
#include <string>
#include <map>
#include <fstream>
#include <sstream>
#include "platformUtils.h"
namespace platform {
class DotEnv {
private:
std::map<std::string, std::string> env;
std::string trim(const std::string& str)
{
std::string result = str;
result.erase(result.begin(), std::find_if(result.begin(), result.end(), [](int ch) {
return !std::isspace(ch);
}));
result.erase(std::find_if(result.rbegin(), result.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), result.end());
return result;
}
public:
DotEnv()
{
std::ifstream file(".env");
if (!file.is_open()) {
std::cerr << "File .env not found" << std::endl;
exit(1);
}
std::string line;
while (std::getline(file, line)) {
line = trim(line);
if (line.empty() || line[0] == '#') {
continue;
}
std::istringstream iss(line);
std::string key, value;
if (std::getline(iss, key, '=') && std::getline(iss, value)) {
env[key] = value;
}
}
}
std::string get(const std::string& key)
{
return env[key];
}
std::vector<int> getSeeds()
{
auto seeds = std::vector<int>();
auto seeds_str = env["seeds"];
seeds_str = trim(seeds_str);
seeds_str = seeds_str.substr(1, seeds_str.size() - 2);
auto seeds_str_split = split(seeds_str, ',');
transform(seeds_str_split.begin(), seeds_str_split.end(), back_inserter(seeds), [](const std::string& str) {
return stoi(str);
});
return seeds;
}
};
}
#endif

View File

@@ -1,184 +0,0 @@
#include "Experiment.h"
#include "Datasets.h"
#include "Models.h"
#include "Report.h"
namespace platform {
using json = nlohmann::json;
string get_date()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%Y-%m-%d");
return oss.str();
}
string get_time()
{
time_t rawtime;
tm* timeinfo;
time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::ostringstream oss;
oss << std::put_time(timeinfo, "%H:%M:%S");
return oss.str();
}
string Experiment::get_file_name()
{
string result = "results_" + score_name + "_" + model + "_" + platform + "_" + get_date() + "_" + get_time() + "_" + (stratified ? "1" : "0") + ".json";
return result;
}
json Experiment::build_json()
{
json result;
result["title"] = title;
result["date"] = get_date();
result["time"] = get_time();
result["model"] = model;
result["version"] = model_version;
result["platform"] = platform;
result["score_name"] = score_name;
result["language"] = language;
result["language_version"] = language_version;
result["discretized"] = discretized;
result["stratified"] = stratified;
result["folds"] = nfolds;
result["seeds"] = randomSeeds;
result["duration"] = duration;
result["results"] = json::array();
for (const auto& r : results) {
json j;
j["dataset"] = r.getDataset();
j["hyperparameters"] = r.getHyperparameters();
j["samples"] = r.getSamples();
j["features"] = r.getFeatures();
j["classes"] = r.getClasses();
j["score_train"] = r.getScoreTrain();
j["score_test"] = r.getScoreTest();
j["score"] = r.getScoreTest();
j["score_std"] = r.getScoreTestStd();
j["score_train_std"] = r.getScoreTrainStd();
j["score_test_std"] = r.getScoreTestStd();
j["train_time"] = r.getTrainTime();
j["train_time_std"] = r.getTrainTimeStd();
j["test_time"] = r.getTestTime();
j["test_time_std"] = r.getTestTimeStd();
j["time"] = r.getTestTime() + r.getTrainTime();
j["time_std"] = r.getTestTimeStd() + r.getTrainTimeStd();
j["scores_train"] = r.getScoresTrain();
j["scores_test"] = r.getScoresTest();
j["times_train"] = r.getTimesTrain();
j["times_test"] = r.getTimesTest();
j["nodes"] = r.getNodes();
j["leaves"] = r.getLeaves();
j["depth"] = r.getDepth();
result["results"].push_back(j);
}
return result;
}
void Experiment::save(const string& path)
{
json data = build_json();
ofstream file(path + "/" + get_file_name());
file << data;
file.close();
}
void Experiment::report()
{
json data = build_json();
Report report(data);
report.show();
}
void Experiment::show()
{
json data = build_json();
cout << data.dump(4) << endl;
}
void Experiment::go(vector<string> filesToProcess, const string& path)
{
cout << "*** Starting experiment: " << title << " ***" << endl;
for (auto fileName : filesToProcess) {
cout << "- " << setw(20) << left << fileName << " " << right << flush;
cross_validation(path, fileName);
cout << endl;
}
}
void Experiment::cross_validation(const string& path, const string& fileName)
{
auto datasets = platform::Datasets(path, discretized, platform::ARFF);
// Get dataset
auto [X, y] = datasets.getTensors(fileName);
auto states = datasets.getStates(fileName);
auto features = datasets.getFeatures(fileName);
auto samples = datasets.getNSamples(fileName);
auto className = datasets.getClassName(fileName);
cout << " (" << setw(5) << samples << "," << setw(3) << features.size() << ") " << flush;
// Prepare Result
auto result = Result();
auto [values, counts] = at::_unique(y);
result.setSamples(X.size(1)).setFeatures(X.size(0)).setClasses(values.size(0));
int nResults = nfolds * static_cast<int>(randomSeeds.size());
auto accuracy_test = torch::zeros({ nResults }, torch::kFloat64);
auto accuracy_train = torch::zeros({ nResults }, torch::kFloat64);
auto train_time = torch::zeros({ nResults }, torch::kFloat64);
auto test_time = torch::zeros({ nResults }, torch::kFloat64);
auto nodes = torch::zeros({ nResults }, torch::kFloat64);
auto edges = torch::zeros({ nResults }, torch::kFloat64);
auto num_states = torch::zeros({ nResults }, torch::kFloat64);
Timer train_timer, test_timer;
int item = 0;
for (auto seed : randomSeeds) {
cout << "(" << seed << ") doing Fold: " << flush;
Fold* fold;
if (stratified)
fold = new StratifiedKFold(nfolds, y, seed);
else
fold = new KFold(nfolds, y.size(0), seed);
for (int nfold = 0; nfold < nfolds; nfold++) {
auto clf = Models::instance()->create(model);
setModelVersion(clf->getVersion());
train_timer.start();
auto [train, test] = fold->getFold(nfold);
auto train_t = torch::tensor(train);
auto test_t = torch::tensor(test);
auto X_train = X.index({ "...", train_t });
auto y_train = y.index({ train_t });
auto X_test = X.index({ "...", test_t });
auto y_test = y.index({ test_t });
cout << nfold + 1 << ", " << flush;
clf->fit(X_train, y_train, features, className, states);
nodes[item] = clf->getNumberOfNodes();
edges[item] = clf->getNumberOfEdges();
num_states[item] = clf->getNumberOfStates();
train_time[item] = train_timer.getDuration();
auto accuracy_train_value = clf->score(X_train, y_train);
test_timer.start();
auto accuracy_test_value = clf->score(X_test, y_test);
test_time[item] = test_timer.getDuration();
accuracy_train[item] = accuracy_train_value;
accuracy_test[item] = accuracy_test_value;
// Store results and times in vector
result.addScoreTrain(accuracy_train_value);
result.addScoreTest(accuracy_test_value);
result.addTimeTrain(train_time[item].item<double>());
result.addTimeTest(test_time[item].item<double>());
item++;
}
cout << "end. " << flush;
delete fold;
}
result.setScoreTest(torch::mean(accuracy_test).item<double>()).setScoreTrain(torch::mean(accuracy_train).item<double>());
result.setScoreTestStd(torch::std(accuracy_test).item<double>()).setScoreTrainStd(torch::std(accuracy_train).item<double>());
result.setTrainTime(torch::mean(train_time).item<double>()).setTestTime(torch::mean(test_time).item<double>());
result.setNodes(torch::mean(nodes).item<double>()).setLeaves(torch::mean(edges).item<double>()).setDepth(torch::mean(num_states).item<double>());
result.setDataset(fileName);
addResult(result);
}
}

View File

@@ -1,114 +0,0 @@
#ifndef EXPERIMENT_H
#define EXPERIMENT_H
#include <torch/torch.h>
#include <nlohmann/json.hpp>
#include <string>
#include <chrono>
#include "Folding.h"
#include "BaseClassifier.h"
#include "TAN.h"
#include "KDB.h"
#include "AODE.h"
using namespace std;
namespace platform {
using json = nlohmann::json;
class Timer {
private:
chrono::high_resolution_clock::time_point begin;
public:
Timer() = default;
~Timer() = default;
void start() { begin = chrono::high_resolution_clock::now(); }
double getDuration()
{
chrono::high_resolution_clock::time_point end = chrono::high_resolution_clock::now();
chrono::duration<double> time_span = chrono::duration_cast<chrono::duration<double>>(end - begin);
return time_span.count();
}
};
class Result {
private:
string dataset, hyperparameters, model_version;
int samples{ 0 }, features{ 0 }, classes{ 0 };
double score_train{ 0 }, score_test{ 0 }, score_train_std{ 0 }, score_test_std{ 0 }, train_time{ 0 }, train_time_std{ 0 }, test_time{ 0 }, test_time_std{ 0 };
float nodes{ 0 }, leaves{ 0 }, depth{ 0 };
vector<double> scores_train, scores_test, times_train, times_test;
public:
Result() = default;
Result& setDataset(const string& dataset) { this->dataset = dataset; return *this; }
Result& setHyperparameters(const string& hyperparameters) { this->hyperparameters = hyperparameters; return *this; }
Result& setSamples(int samples) { this->samples = samples; return *this; }
Result& setFeatures(int features) { this->features = features; return *this; }
Result& setClasses(int classes) { this->classes = classes; return *this; }
Result& setScoreTrain(double score) { this->score_train = score; return *this; }
Result& setScoreTest(double score) { this->score_test = score; return *this; }
Result& setScoreTrainStd(double score_std) { this->score_train_std = score_std; return *this; }
Result& setScoreTestStd(double score_std) { this->score_test_std = score_std; return *this; }
Result& setTrainTime(double train_time) { this->train_time = train_time; return *this; }
Result& setTrainTimeStd(double train_time_std) { this->train_time_std = train_time_std; return *this; }
Result& setTestTime(double test_time) { this->test_time = test_time; return *this; }
Result& setTestTimeStd(double test_time_std) { this->test_time_std = test_time_std; return *this; }
Result& setNodes(float nodes) { this->nodes = nodes; return *this; }
Result& setLeaves(float leaves) { this->leaves = leaves; return *this; }
Result& setDepth(float depth) { this->depth = depth; return *this; }
Result& addScoreTrain(double score) { scores_train.push_back(score); return *this; }
Result& addScoreTest(double score) { scores_test.push_back(score); return *this; }
Result& addTimeTrain(double time) { times_train.push_back(time); return *this; }
Result& addTimeTest(double time) { times_test.push_back(time); return *this; }
const float get_score_train() const { return score_train; }
float get_score_test() { return score_test; }
const string& getDataset() const { return dataset; }
const string& getHyperparameters() const { return hyperparameters; }
const int getSamples() const { return samples; }
const int getFeatures() const { return features; }
const int getClasses() const { return classes; }
const double getScoreTrain() const { return score_train; }
const double getScoreTest() const { return score_test; }
const double getScoreTrainStd() const { return score_train_std; }
const double getScoreTestStd() const { return score_test_std; }
const double getTrainTime() const { return train_time; }
const double getTrainTimeStd() const { return train_time_std; }
const double getTestTime() const { return test_time; }
const double getTestTimeStd() const { return test_time_std; }
const float getNodes() const { return nodes; }
const float getLeaves() const { return leaves; }
const float getDepth() const { return depth; }
const vector<double>& getScoresTrain() const { return scores_train; }
const vector<double>& getScoresTest() const { return scores_test; }
const vector<double>& getTimesTrain() const { return times_train; }
const vector<double>& getTimesTest() const { return times_test; }
};
class Experiment {
private:
string title, model, platform, score_name, model_version, language_version, language;
bool discretized{ false }, stratified{ false };
vector<Result> results;
vector<int> randomSeeds;
int nfolds{ 0 };
float duration{ 0 };
json build_json();
public:
Experiment() = default;
Experiment& setTitle(const string& title) { this->title = title; return *this; }
Experiment& setModel(const string& model) { this->model = model; return *this; }
Experiment& setPlatform(const string& platform) { this->platform = platform; return *this; }
Experiment& setScoreName(const string& score_name) { this->score_name = score_name; return *this; }
Experiment& setModelVersion(const string& model_version) { this->model_version = model_version; return *this; }
Experiment& setLanguage(const string& language) { this->language = language; return *this; }
Experiment& setLanguageVersion(const string& language_version) { this->language_version = language_version; return *this; }
Experiment& setDiscretized(bool discretized) { this->discretized = discretized; return *this; }
Experiment& setStratified(bool stratified) { this->stratified = stratified; return *this; }
Experiment& setNFolds(int nfolds) { this->nfolds = nfolds; return *this; }
Experiment& addResult(Result result) { results.push_back(result); return *this; }
Experiment& addRandomSeed(int randomSeed) { randomSeeds.push_back(randomSeed); return *this; }
Experiment& setDuration(float duration) { this->duration = duration; return *this; }
string get_file_name();
void save(const string& path);
void cross_validation(const string& path, const string& fileName);
void go(vector<string> filesToProcess, const string& path);
void show();
void report();
};
}
#endif

View File

@@ -1,95 +0,0 @@
#include "Folding.h"
#include <algorithm>
#include <map>
Fold::Fold(int k, int n, int seed) : k(k), n(n), seed(seed)
{
random_device rd;
random_seed = default_random_engine(seed == -1 ? rd() : seed);
srand(seed == -1 ? time(0) : seed);
}
KFold::KFold(int k, int n, int seed) : Fold(k, n, seed), indices(vector<int>(n))
{
iota(begin(indices), end(indices), 0); // fill with 0, 1, ..., n - 1
shuffle(indices.begin(), indices.end(), random_seed);
}
pair<vector<int>, vector<int>> KFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
}
int nTest = n / k;
auto train = vector<int>();
auto test = vector<int>();
for (int i = 0; i < n; i++) {
if (i >= nTest * nFold && i < nTest * (nFold + 1)) {
test.push_back(indices[i]);
} else {
train.push_back(indices[i]);
}
}
return { train, test };
}
StratifiedKFold::StratifiedKFold(int k, torch::Tensor& y, int seed) : Fold(k, y.numel(), seed)
{
n = y.numel();
this->y = vector<int>(y.data_ptr<int>(), y.data_ptr<int>() + n);
build();
}
StratifiedKFold::StratifiedKFold(int k, const vector<int>& y, int seed)
: Fold(k, y.size(), seed)
{
this->y = y;
n = y.size();
build();
}
void StratifiedKFold::build()
{
stratified_indices = vector<vector<int>>(k);
int fold_size = n / k;
// Compute class counts and indices
auto class_indices = map<int, vector<int>>();
vector<int> class_counts(*max_element(y.begin(), y.end()) + 1, 0);
for (auto i = 0; i < n; ++i) {
class_counts[y[i]]++;
class_indices[y[i]].push_back(i);
}
// Shuffle class indices
for (auto& [cls, indices] : class_indices) {
shuffle(indices.begin(), indices.end(), random_seed);
}
// Assign indices to folds
for (auto label = 0; label < class_counts.size(); ++label) {
auto num_samples_to_take = class_counts[label] / k;
if (num_samples_to_take == 0)
continue;
auto remainder_samples_to_take = class_counts[label] % k;
for (auto fold = 0; fold < k; ++fold) {
auto it = next(class_indices[label].begin(), num_samples_to_take);
move(class_indices[label].begin(), it, back_inserter(stratified_indices[fold])); // ##
class_indices[label].erase(class_indices[label].begin(), it);
}
while (remainder_samples_to_take > 0) {
int fold = (rand() % static_cast<int>(k));
if (stratified_indices[fold].size() == fold_size + 1) {
continue;
}
auto it = next(class_indices[label].begin(), 1);
stratified_indices[fold].push_back(*class_indices[label].begin());
class_indices[label].erase(class_indices[label].begin(), it);
remainder_samples_to_take--;
}
}
}
pair<vector<int>, vector<int>> StratifiedKFold::getFold(int nFold)
{
if (nFold >= k || nFold < 0) {
throw out_of_range("nFold (" + to_string(nFold) + ") must be less than k (" + to_string(k) + ")");
}
vector<int> test_indices = stratified_indices[nFold];
vector<int> train_indices;
for (int i = 0; i < k; ++i) {
if (i == nFold) continue;
train_indices.insert(train_indices.end(), stratified_indices[i].begin(), stratified_indices[i].end());
}
return { train_indices, test_indices };
}

View File

@@ -1,37 +0,0 @@
#ifndef FOLDING_H
#define FOLDING_H
#include <torch/torch.h>
#include <vector>
#include <random>
using namespace std;
class Fold {
protected:
int k;
int n;
int seed;
default_random_engine random_seed;
public:
Fold(int k, int n, int seed = -1);
virtual pair<vector<int>, vector<int>> getFold(int nFold) = 0;
virtual ~Fold() = default;
int getNumberOfFolds() { return k; }
};
class KFold : public Fold {
private:
vector<int> indices;
public:
KFold(int k, int n, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
};
class StratifiedKFold : public Fold {
private:
vector<int> y;
vector<vector<int>> stratified_indices;
void build();
public:
StratifiedKFold(int k, const vector<int>& y, int seed = -1);
StratifiedKFold(int k, torch::Tensor& y, int seed = -1);
pair<vector<int>, vector<int>> getFold(int nFold) override;
};
#endif

View File

@@ -1,54 +0,0 @@
#include "Models.h"
namespace platform {
using namespace std;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
Models* Models::factory = nullptr;;
Models* Models::instance()
{
//manages singleton
if (factory == nullptr)
factory = new Models();
return factory;
}
void Models::registerFactoryFunction(const string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
functionRegistry[name] = classFactoryFunction;
}
shared_ptr<bayesnet::BaseClassifier> Models::create(const string& name)
{
bayesnet::BaseClassifier* instance = nullptr;
// find name in the registry and call factory method.
auto it = functionRegistry.find(name);
if (it != functionRegistry.end())
instance = it->second();
// wrap instance in a shared ptr and return
if (instance != nullptr)
return shared_ptr<bayesnet::BaseClassifier>(instance);
else
return nullptr;
}
vector<string> Models::getNames()
{
vector<string> names;
transform(functionRegistry.begin(), functionRegistry.end(), back_inserter(names),
[](const pair<string, function<bayesnet::BaseClassifier* (void)>>& pair) { return pair.first; });
return names;
}
string Models::toString()
{
string result = "";
for (const auto& pair : functionRegistry) {
result += pair.first + ", ";
}
return "{" + result.substr(0, result.size() - 2) + "}";
}
Registrar::Registrar(const string& name, function<bayesnet::BaseClassifier* (void)> classFactoryFunction)
{
// register the class factory function
Models::instance()->registerFactoryFunction(name, classFactoryFunction);
}
}

View File

@@ -1,36 +0,0 @@
#ifndef MODELS_H
#define MODELS_H
#include <map>
#include "BaseClassifier.h"
#include "AODE.h"
#include "TAN.h"
#include "KDB.h"
#include "SPODE.h"
#include "TANLd.h"
#include "KDBLd.h"
#include "SPODELd.h"
#include "AODELd.h"
namespace platform {
class Models {
private:
map<string, function<bayesnet::BaseClassifier* (void)>> functionRegistry;
static Models* factory; //singleton
Models() {};
public:
Models(Models&) = delete;
void operator=(const Models&) = delete;
// Idea from: https://www.codeproject.com/Articles/567242/AplusC-2b-2bplusObjectplusFactory
static Models* instance();
shared_ptr<bayesnet::BaseClassifier> create(const string& name);
void registerFactoryFunction(const string& name,
function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
vector<string> getNames();
string toString();
};
class Registrar {
public:
Registrar(const string& className, function<bayesnet::BaseClassifier* (void)> classFactoryFunction);
};
}
#endif

View File

@@ -1,66 +0,0 @@
#include "Report.h"
namespace platform {
string headerLine(const string& text)
{
int n = MAXL - text.length() - 3;
return "* " + text + string(n, ' ') + "*\n";
}
string Report::fromVector(const string& key)
{
string result = "";
for (auto& item : data[key]) {
result += to_string(item) + ", ";
}
return "[" + result.substr(0, result.length() - 2) + "]";
}
string fVector(const json& data)
{
string result = "";
for (const auto& item : data) {
result += to_string(item) + ", ";
}
return "[" + result.substr(0, result.length() - 2) + "]";
}
void Report::show()
{
header();
body();
}
void Report::header()
{
cout << string(MAXL, '*') << endl;
cout << headerLine("Report " + data["model"].get<string>() + " ver. " + data["version"].get<string>() + " with " + to_string(data["folds"].get<int>()) + " Folds cross validation and " + to_string(data["seeds"].size()) + " random seeds. " + data["date"].get<string>() + " " + data["time"].get<string>());
cout << headerLine(data["title"].get<string>());
cout << headerLine("Random seeds: " + fromVector("seeds") + " Stratified: " + (data["stratified"].get<bool>() ? "True" : "False"));
cout << headerLine("Execution took " + to_string(data["duration"].get<float>()) + " seconds, " + to_string(data["duration"].get<float>() / 3600) + " hours, on " + data["platform"].get<string>());
cout << headerLine("Score is " + data["score_name"].get<string>());
cout << string(MAXL, '*') << endl;
cout << endl;
}
void Report::body()
{
cout << "Dataset Sampl. Feat. Cls Nodes Edges States Score Time Hyperparameters" << endl;
cout << "============================== ====== ===== === ======= ======= ======= =============== ================= ===============" << endl;
for (const auto& r : data["results"]) {
cout << setw(30) << left << r["dataset"].get<string>() << " ";
cout << setw(6) << right << r["samples"].get<int>() << " ";
cout << setw(5) << right << r["features"].get<int>() << " ";
cout << setw(3) << right << r["classes"].get<int>() << " ";
cout << setw(7) << setprecision(2) << fixed << r["nodes"].get<float>() << " ";
cout << setw(7) << setprecision(2) << fixed << r["leaves"].get<float>() << " ";
cout << setw(7) << setprecision(2) << fixed << r["depth"].get<float>() << " ";
cout << setw(8) << right << setprecision(6) << fixed << r["score_test"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["score_test_std"].get<double>() << " ";
cout << setw(10) << right << setprecision(6) << fixed << r["test_time"].get<double>() << "±" << setw(6) << setprecision(4) << fixed << r["test_time_std"].get<double>() << " ";
cout << " " << r["hyperparameters"].get<string>();
cout << endl;
cout << string(MAXL, '*') << endl;
cout << headerLine("Train scores: " + fVector(r["scores_train"]));
cout << headerLine("Test scores: " + fVector(r["scores_test"]));
cout << headerLine("Train times: " + fVector(r["times_train"]));
cout << headerLine("Test times: " + fVector(r["times_test"]));
cout << string(MAXL, '*') << endl;
}
}
}

View File

@@ -1,23 +0,0 @@
#ifndef REPORT_H
#define REPORT_H
#include <string>
#include <iostream>
#include <nlohmann/json.hpp>
using json = nlohmann::json;
const int MAXL = 121;
namespace platform {
using namespace std;
class Report {
public:
explicit Report(json data_) { data = data_; };
virtual ~Report() = default;
void show();
private:
void header();
void body();
string fromVector(const string& key);
json data;
};
};
#endif

View File

@@ -1,123 +0,0 @@
#include <iostream>
#include <argparse/argparse.hpp>
#include "platformUtils.h"
#include "Experiment.h"
#include "Datasets.h"
#include "DotEnv.h"
#include "Models.h"
#include "modelRegister.h"
using namespace std;
const string PATH_RESULTS = "results";
const string PATH_DATASETS = "datasets";
argparse::ArgumentParser manageArguments(int argc, char** argv)
{
auto env = platform::DotEnv();
argparse::ArgumentParser program("BayesNetSample");
program.add_argument("-d", "--dataset").default_value("").help("Dataset file name");
program.add_argument("-p", "--path")
.help("folder where the data files are located, default")
.default_value(string{ PATH_DATASETS }
);
program.add_argument("-m", "--model")
.help("Model to use " + platform::Models::instance()->toString())
.action([](const std::string& value) {
static const vector<string> choices = platform::Models::instance()->getNames();
if (find(choices.begin(), choices.end(), value) != choices.end()) {
return value;
}
throw runtime_error("Model must be one of " + platform::Models::instance()->toString());
}
);
program.add_argument("--title").default_value("").help("Experiment title");
program.add_argument("--discretize").help("Discretize input dataset").default_value((bool)stoi(env.get("discretize"))).implicit_value(true);
program.add_argument("--stratified").help("If Stratified KFold is to be done").default_value((bool)stoi(env.get("stratified"))).implicit_value(true);
program.add_argument("-f", "--folds").help("Number of folds").default_value(stoi(env.get("n_folds"))).scan<'i', int>().action([](const string& value) {
try {
auto k = stoi(value);
if (k < 2) {
throw runtime_error("Number of folds must be greater than 1");
}
return k;
}
catch (const runtime_error& err) {
throw runtime_error(err.what());
}
catch (...) {
throw runtime_error("Number of folds must be an integer");
}});
auto seed_values = env.getSeeds();
program.add_argument("-s", "--seeds").nargs(1, 10).help("Random seeds. Set to -1 to have pseudo random").scan<'i', int>().default_value(seed_values);
try {
program.parse_args(argc, argv);
auto file_name = program.get<string>("dataset");
auto path = program.get<string>("path");
auto model_name = program.get<string>("model");
auto discretize_dataset = program.get<bool>("discretize");
auto stratified = program.get<bool>("stratified");
auto n_folds = program.get<int>("folds");
auto seeds = program.get<vector<int>>("seeds");
auto complete_file_name = path + file_name + ".arff";
auto title = program.get<string>("title");
if (title == "" && file_name == "") {
throw runtime_error("title is mandatory if dataset is not provided");
}
}
catch (const exception& err) {
cerr << err.what() << endl;
cerr << program;
exit(1);
}
return program;
}
int main(int argc, char** argv)
{
auto program = manageArguments(argc, argv);
bool saveResults = false;
auto file_name = program.get<string>("dataset");
auto path = program.get<string>("path");
auto model_name = program.get<string>("model");
auto discretize_dataset = program.get<bool>("discretize");
auto stratified = program.get<bool>("stratified");
auto n_folds = program.get<int>("folds");
auto seeds = program.get<vector<int>>("seeds");
vector<string> filesToTest;
auto datasets = platform::Datasets(path, true, platform::ARFF);
auto title = program.get<string>("title");
if (file_name != "") {
if (!datasets.isDataset(file_name)) {
cerr << "Dataset " << file_name << " not found" << endl;
exit(1);
}
if (title == "") {
title = "Test " + file_name + " " + model_name + " " + to_string(n_folds) + " folds";
}
filesToTest.push_back(file_name);
} else {
filesToTest = platform::Datasets(path, true, platform::ARFF).getNames();
saveResults = true;
}
/*
* Begin Processing
*/
auto env = platform::DotEnv();
auto experiment = platform::Experiment();
experiment.setTitle(title).setLanguage("cpp").setLanguageVersion("1.0.0");
experiment.setDiscretized(discretize_dataset).setModel(model_name).setPlatform(env.get("platform"));
experiment.setStratified(stratified).setNFolds(n_folds).setScoreName("accuracy");
for (auto seed : seeds) {
experiment.addRandomSeed(seed);
}
platform::Timer timer;
timer.start();
experiment.go(filesToTest, path);
experiment.setDuration(timer.getDuration());
if (saveResults)
experiment.save(PATH_RESULTS);
else
experiment.report();
cout << "Done!" << endl;
return 0;
}

View File

@@ -1,19 +0,0 @@
#ifndef MODEL_REGISTER_H
#define MODEL_REGISTER_H
static platform::Registrar registrarT("TAN",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TAN();});
static platform::Registrar registrarTLD("TANLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::TANLd();});
static platform::Registrar registrarS("SPODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODE(2);});
static platform::Registrar registrarSLD("SPODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::SPODELd(2);});
static platform::Registrar registrarK("KDB",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDB(2);});
static platform::Registrar registrarKLD("KDBLd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::KDBLd(2);});
static platform::Registrar registrarA("AODE",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODE();});
static platform::Registrar registrarALD("AODELd",
[](void) -> bayesnet::BaseClassifier* { return new bayesnet::AODELd();});
#endif

View File

@@ -1,108 +0,0 @@
#include "platformUtils.h"
using namespace torch;
vector<string> split(const string& text, char delimiter)
{
vector<string> result;
stringstream ss(text);
string token;
while (getline(ss, token, delimiter)) {
result.push_back(token);
}
return result;
}
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features)
{
vector<mdlp::labels_t> Xd;
map<string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xd.push_back(xd);
}
return { Xd, maxes };
}
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{
vector<mdlp::labels_t> Xd;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
Xd.push_back(xd);
}
return Xd;
}
bool file_exists(const string& name)
{
if (FILE* file = fopen(name.c_str(), "r")) {
fclose(file);
return true;
} else {
return false;
}
}
tuple<Tensor, Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(const string& path, const string& name, bool class_last, bool discretize_dataset)
{
auto handler = ArffFiles();
handler.load(path + static_cast<string>(name) + ".arff", class_last);
// Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
vector<string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
Tensor Xd;
auto states = map<string, vector<int>>();
if (discretize_dataset) {
auto Xr = discretizeDataset(X, y);
Xd = torch::zeros({ static_cast<int>(Xr[0].size()), static_cast<int>(Xr.size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
iota(begin(states[features[i]]), end(states[features[i]]), 0);
Xd.index_put_({ "...", i }, torch::tensor(Xr[i], torch::kInt32));
}
states[className] = vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states[className]), end(states[className]), 0);
} else {
Xd = torch::zeros({ static_cast<int>(X[0].size()), static_cast<int>(X.size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) {
Xd.index_put_({ "...", i }, torch::tensor(X[i]));
}
}
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
}
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name)
{
auto handler = ArffFiles();
handler.load(PATH + static_cast<string>(name) + ".arff");
// Get Dataset X, y
vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
vector<string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
// Discretize Dataset
vector<mdlp::labels_t> Xd;
map<string, int> maxes;
tie(Xd, maxes) = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<string, vector<int>> states;
for (auto feature : features) {
states[feature] = vector<int>(maxes[feature]);
}
states[className] = vector<int>(maxes[className]);
return { Xd, y, features, className, states };
}

View File

@@ -1,21 +0,0 @@
#ifndef PLATFORM_UTILS_H
#define PLATFORM_UTILS_H
#include <torch/torch.h>
#include <string>
#include <vector>
#include <map>
#include <tuple>
#include "ArffFiles.h"
#include "CPPFImdlp.h"
using namespace std;
const string PATH = "../../data/";
bool file_exists(const std::string& name);
vector<string> split(const string& text, char delimiter);
pair<vector<mdlp::labels_t>, map<string, int>> discretize(vector<mdlp::samples_t>& X, mdlp::labels_t& y, vector<string> features);
vector<mdlp::labels_t> discretizeDataset(vector<mdlp::samples_t>& X, mdlp::labels_t& y);
pair<torch::Tensor, map<string, vector<int>>> discretizeTorch(torch::Tensor& X, torch::Tensor& y, vector<string>& features, const string& className);
tuple<vector<vector<int>>, vector<int>, vector<string>, string, map<string, vector<int>>> loadFile(const string& name);
tuple<torch::Tensor, torch::Tensor, vector<string>, string, map<string, vector<int>>> loadDataset(const string& path, const string& name, bool class_last, bool discretize_dataset);
map<string, vector<int>> get_states(vector<string>& features, string className, map<string, int>& maxes);
#endif //PLATFORM_UTILS_H

View File

@@ -1,88 +0,0 @@
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <vector>
#include <map>
#include <string>
#include "KDB.h"
#include "TAN.h"
#include "SPODE.h"
#include "AODE.h"
#include "platformUtils.h"
TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
{
map <pair<string, string>, float> scores = {
{{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
{{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333}
};
string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
auto [Xd, y, features, className, states] = loadFile(file_name);
SECTION("Test TAN classifier (" + file_name + ")")
{
auto clf = bayesnet::TAN();
clf.fit(Xd, y, features, className, states);
auto score = clf.score(Xd, y);
//scores[{file_name, "TAN"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(1e-6));
}
SECTION("Test KDB classifier (" + file_name + ")")
{
auto clf = bayesnet::KDB(2);
clf.fit(Xd, y, features, className, states);
auto score = clf.score(Xd, y);
//scores[{file_name, "KDB"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDB"
}]).epsilon(1e-6));
}
SECTION("Test SPODE classifier (" + file_name + ")")
{
auto clf = bayesnet::SPODE(1);
clf.fit(Xd, y, features, className, states);
auto score = clf.score(Xd, y);
// scores[{file_name, "SPODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(1e-6));
}
SECTION("Test AODE classifier (" + file_name + ")")
{
auto clf = bayesnet::AODE();
clf.fit(Xd, y, features, className, states);
auto score = clf.score(Xd, y);
// scores[{file_name, "AODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(1e-6));
}
// for (auto scores : scores) {
// cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, ";
// }
}
TEST_CASE("Models features")
{
auto graph = vector<string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
"class -> sepallength", "class -> sepalwidth", "class -> petallength", "class -> petalwidth", "petallength [shape=circle] \n",
"petallength -> sepallength", "petalwidth [shape=circle] \n", "sepallength [shape=circle] \n",
"sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n"
}
);
auto clf = bayesnet::TAN();
auto [Xd, y, features, className, states] = loadFile("iris");
clf.fit(Xd, y, features, className, states);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.show() == vector<string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
REQUIRE(clf.graph("Test") == graph);
}
TEST_CASE("Get num features & num edges")
{
auto [Xd, y, features, className, states] = loadFile("iris");
auto clf = bayesnet::KDB(2);
clf.fit(Xd, y, features, className, states);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8);
}

View File

@@ -1,33 +0,0 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <string>
#include "KDB.h"
#include "platformUtils.h"
TEST_CASE("Test Bayesian Network")
{
auto [Xd, y, features, className, states] = loadFile("iris");
SECTION("Test get features")
{
auto net = bayesnet::Network();
net.addNode("A");
net.addNode("B");
REQUIRE(net.getFeatures() == vector<string>{"A", "B"});
net.addNode("C");
REQUIRE(net.getFeatures() == vector<string>{"A", "B", "C"});
}
SECTION("Test get edges")
{
auto net = bayesnet::Network();
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("B", "C");
REQUIRE(net.getEdges() == vector<pair<string, string>>{ {"A", "B"}, { "B", "C" } });
net.addEdge("A", "C");
REQUIRE(net.getEdges() == vector<pair<string, string>>{ {"A", "B"}, { "A", "C" }, { "B", "C" } });
}
}

View File

@@ -1,11 +1,16 @@
if(ENABLE_TESTING)
set(TEST_MAIN "unit_tests")
include_directories(${BayesNet_SOURCE_DIR}/src/BayesNet)
include_directories(${BayesNet_SOURCE_DIR}/src/Platform)
include_directories(${BayesNet_SOURCE_DIR}/lib/Files)
include_directories(${BayesNet_SOURCE_DIR}/lib/mdlp)
set(TEST_SOURCES BayesModels.cc BayesNetwork.cc ${BayesNet_SOURCE_DIR}/src/Platform/platformUtils.cc ${BayesNet_SOURCES})
add_executable(${TEST_MAIN} ${TEST_SOURCES})
target_link_libraries(${TEST_MAIN} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)
add_test(NAME ${TEST_MAIN} COMMAND ${TEST_MAIN})
set(TEST_BAYESNET "unit_tests_bayesnet")
include_directories(
${BayesNet_SOURCE_DIR}/src/BayesNet
${BayesNet_SOURCE_DIR}/src/Platform
${BayesNet_SOURCE_DIR}/lib/Files
${BayesNet_SOURCE_DIR}/lib/mdlp
${BayesNet_SOURCE_DIR}/lib/folding
${BayesNet_SOURCE_DIR}/lib/json/include
${CMAKE_BINARY_DIR}/configured_files/include
)
set(TEST_SOURCES_BAYESNET TestBayesModels.cc TestBayesNetwork.cc TestBayesMetrics.cc TestUtils.cc ${BayesNet_SOURCES})
add_executable(${TEST_BAYESNET} ${TEST_SOURCES_BAYESNET})
target_link_libraries(${TEST_BAYESNET} PUBLIC "${TORCH_LIBRARIES}" ArffFiles mdlp Catch2::Catch2WithMain)
add_test(NAME ${TEST_BAYESNET} COMMAND ${TEST_BAYESNET})
endif(ENABLE_TESTING)

62
tests/TestBayesMetrics.cc Normal file
View File

@@ -0,0 +1,62 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include "BayesMetrics.h"
#include "TestUtils.h"
TEST_CASE("Metrics Test", "[BayesNet]")
{
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
map<std::string, pair<int, std::vector<int>>> resultsKBest = {
{"glass", {7, { 0, 1, 7, 6, 3, 5, 2 }}},
{"iris", {3, { 0, 3, 2 }} },
{"ecoli", {6, { 2, 4, 1, 0, 6, 5 }}},
{"diabetes", {2, { 7, 1 }}}
};
map<std::string, double> resultsMI = {
{"glass", 0.12805398},
{"iris", 0.3158139948},
{"ecoli", 0.0089431099},
{"diabetes", 0.0345470614}
};
map<pair<std::string, int>, std::vector<pair<int, int>>> resultsMST = {
{ {"glass", 0}, { {0, 6}, {0, 5}, {0, 3}, {5, 1}, {5, 8}, {5, 4}, {6, 2}, {6, 7} } },
{ {"glass", 1}, { {1, 5}, {5, 0}, {5, 8}, {5, 4}, {0, 6}, {0, 3}, {6, 2}, {6, 7} } },
{ {"iris", 0}, { {0, 1}, {0, 2}, {1, 3} } },
{ {"iris", 1}, { {1, 0}, {1, 3}, {0, 2} } },
{ {"ecoli", 0}, { {0, 1}, {0, 2}, {1, 5}, {1, 3}, {5, 6}, {5, 4} } },
{ {"ecoli", 1}, { {1, 0}, {1, 5}, {1, 3}, {5, 6}, {5, 4}, {0, 2} } },
{ {"diabetes", 0}, { {0, 7}, {0, 2}, {0, 6}, {2, 3}, {3, 4}, {3, 5}, {4, 1} } },
{ {"diabetes", 1}, { {1, 4}, {4, 3}, {3, 2}, {3, 5}, {2, 0}, {0, 7}, {0, 6} } }
};
auto raw = RawDatasets(file_name, true);
bayesnet::Metrics metrics(raw.dataset, raw.featurest, raw.classNamet, raw.classNumStates);
SECTION("Test Constructor")
{
REQUIRE(metrics.getScoresKBest().size() == 0);
}
SECTION("Test SelectKBestWeighted")
{
std::vector<int> kBest = metrics.SelectKBestWeighted(raw.weights, true, resultsKBest.at(file_name).first);
REQUIRE(kBest.size() == resultsKBest.at(file_name).first);
REQUIRE(kBest == resultsKBest.at(file_name).second);
}
SECTION("Test Mutual Information")
{
auto result = metrics.mutualInformation(raw.dataset.index({ 1, "..." }), raw.dataset.index({ 2, "..." }), raw.weights);
REQUIRE(result == Catch::Approx(resultsMI.at(file_name)).epsilon(raw.epsilon));
}
SECTION("Test Maximum Spanning Tree")
{
auto weights_matrix = metrics.conditionalEdge(raw.weights);
for (int i = 0; i < 2; ++i) {
auto result = metrics.maximumSpanningTree(raw.featurest, weights_matrix, i);
REQUIRE(result == resultsMST.at({ file_name, i }));
}
}
}

176
tests/TestBayesModels.cc Normal file
View File

@@ -0,0 +1,176 @@
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <vector>
#include <map>
#include <string>
#include "KDB.h"
#include "TAN.h"
#include "SPODE.h"
#include "AODE.h"
#include "BoostAODE.h"
#include "TANLd.h"
#include "KDBLd.h"
#include "SPODELd.h"
#include "AODELd.h"
#include "TestUtils.h"
TEST_CASE("Library check version", "[BayesNet]")
{
auto clf = bayesnet::KDB(2);
REQUIRE(clf.getVersion() == "1.0.1");
}
TEST_CASE("Test Bayesian Classifiers score", "[BayesNet]")
{
map <pair<std::string, std::string>, float> scores = {
// Diabetes
{{"diabetes", "AODE"}, 0.811198}, {{"diabetes", "KDB"}, 0.852865}, {{"diabetes", "SPODE"}, 0.802083}, {{"diabetes", "TAN"}, 0.821615},
{{"diabetes", "AODELd"}, 0.8138f}, {{"diabetes", "KDBLd"}, 0.80208f}, {{"diabetes", "SPODELd"}, 0.78646f}, {{"diabetes", "TANLd"}, 0.8099f}, {{"diabetes", "BoostAODE"}, 0.83984f},
// Ecoli
{{"ecoli", "AODE"}, 0.889881}, {{"ecoli", "KDB"}, 0.889881}, {{"ecoli", "SPODE"}, 0.880952}, {{"ecoli", "TAN"}, 0.892857},
{{"ecoli", "AODELd"}, 0.8869f}, {{"ecoli", "KDBLd"}, 0.875f}, {{"ecoli", "SPODELd"}, 0.84226f}, {{"ecoli", "TANLd"}, 0.86905f}, {{"ecoli", "BoostAODE"}, 0.89583f},
// Glass
{{"glass", "AODE"}, 0.78972}, {{"glass", "KDB"}, 0.827103}, {{"glass", "SPODE"}, 0.775701}, {{"glass", "TAN"}, 0.827103},
{{"glass", "AODELd"}, 0.79439f}, {{"glass", "KDBLd"}, 0.85047f}, {{"glass", "SPODELd"}, 0.79439f}, {{"glass", "TANLd"}, 0.86449f}, {{"glass", "BoostAODE"}, 0.84579f},
// Iris
{{"iris", "AODE"}, 0.973333}, {{"iris", "KDB"}, 0.973333}, {{"iris", "SPODE"}, 0.973333}, {{"iris", "TAN"}, 0.973333},
{{"iris", "AODELd"}, 0.973333}, {{"iris", "KDBLd"}, 0.973333}, {{"iris", "SPODELd"}, 0.96f}, {{"iris", "TANLd"}, 0.97333f}, {{"iris", "BoostAODE"}, 0.98f}
};
std::string file_name = GENERATE("glass", "iris", "ecoli", "diabetes");
auto raw = RawDatasets(file_name, false);
SECTION("Test TAN classifier (" + file_name + ")")
{
auto clf = bayesnet::TAN();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
//scores[{file_name, "TAN"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TAN"}]).epsilon(raw.epsilon));
}
SECTION("Test TANLd classifier (" + file_name + ")")
{
auto clf = bayesnet::TANLd();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
//scores[{file_name, "TANLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "TANLd"}]).epsilon(raw.epsilon));
}
SECTION("Test KDB classifier (" + file_name + ")")
{
auto clf = bayesnet::KDB(2);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
//scores[{file_name, "KDB"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDB"
}]).epsilon(raw.epsilon));
}
SECTION("Test KDBLd classifier (" + file_name + ")")
{
auto clf = bayesnet::KDBLd(2);
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
//scores[{file_name, "KDBLd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "KDBLd"
}]).epsilon(raw.epsilon));
}
SECTION("Test SPODE classifier (" + file_name + ")")
{
auto clf = bayesnet::SPODE(1);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
// scores[{file_name, "SPODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODE"}]).epsilon(raw.epsilon));
}
SECTION("Test SPODELd classifier (" + file_name + ")")
{
auto clf = bayesnet::SPODELd(1);
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
// scores[{file_name, "SPODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "SPODELd"}]).epsilon(raw.epsilon));
}
SECTION("Test AODE classifier (" + file_name + ")")
{
auto clf = bayesnet::AODE();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
// scores[{file_name, "AODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODE"}]).epsilon(raw.epsilon));
}
SECTION("Test AODELd classifier (" + file_name + ")")
{
auto clf = bayesnet::AODELd();
clf.fit(raw.Xt, raw.yt, raw.featurest, raw.classNamet, raw.statest);
auto score = clf.score(raw.Xt, raw.yt);
// scores[{file_name, "AODELd"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "AODELd"}]).epsilon(raw.epsilon));
}
SECTION("Test BoostAODE classifier (" + file_name + ")")
{
auto clf = bayesnet::BoostAODE();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = clf.score(raw.Xv, raw.yv);
// scores[{file_name, "BoostAODE"}] = score;
REQUIRE(score == Catch::Approx(scores[{file_name, "BoostAODE"}]).epsilon(raw.epsilon));
}
// for (auto scores : scores) {
// std::cout << "{{\"" << scores.first.first << "\", \"" << scores.first.second << "\"}, " << scores.second << "}, ";
// }
}
TEST_CASE("Models features", "[BayesNet]")
{
auto graph = std::vector<std::string>({ "digraph BayesNet {\nlabel=<BayesNet Test>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n",
"class [shape=circle, fontcolor=red, fillcolor=lightblue, style=filled ] \n",
"class -> sepallength", "class -> sepalwidth", "class -> petallength", "class -> petalwidth", "petallength [shape=circle] \n",
"petallength -> sepallength", "petalwidth [shape=circle] \n", "sepallength [shape=circle] \n",
"sepallength -> sepalwidth", "sepalwidth [shape=circle] \n", "sepalwidth -> petalwidth", "}\n"
}
);
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::TAN();
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 7);
REQUIRE(clf.show() == std::vector<std::string>{"class -> sepallength, sepalwidth, petallength, petalwidth, ", "petallength -> sepallength, ", "petalwidth -> ", "sepallength -> sepalwidth, ", "sepalwidth -> petalwidth, "});
REQUIRE(clf.graph("Test") == graph);
}
TEST_CASE("Get num features & num edges", "[BayesNet]")
{
auto raw = RawDatasets("iris", true);
auto clf = bayesnet::KDB(2);
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 5);
REQUIRE(clf.getNumberOfEdges() == 8);
}
TEST_CASE("BoostAODE feature_select CFS", "[BayesNet]")
{
auto raw = RawDatasets("glass", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({ {"select_features", "CFS"} });
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 90);
REQUIRE(clf.getNumberOfEdges() == 153);
REQUIRE(clf.getNotes().size() == 2);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 9 with CFS");
REQUIRE(clf.getNotes()[1] == "Number of models: 9");
}
TEST_CASE("BoostAODE test used features in train note", "[BayesNet]")
{
auto raw = RawDatasets("diabetes", true);
auto clf = bayesnet::BoostAODE();
clf.setHyperparameters({
{"ascending",true},
{"convergence", true},
{"repeatSparent",true},
{"select_features","CFS"}
});
clf.fit(raw.Xv, raw.yv, raw.featuresv, raw.classNamev, raw.statesv);
REQUIRE(clf.getNumberOfNodes() == 72);
REQUIRE(clf.getNumberOfEdges() == 120);
REQUIRE(clf.getNotes().size() == 3);
REQUIRE(clf.getNotes()[0] == "Used features in initialization: 6 of 8 with CFS");
REQUIRE(clf.getNotes()[1] == "Used features in train: 7 of 8");
REQUIRE(clf.getNotes()[2] == "Number of models: 8");
}

211
tests/TestBayesNetwork.cc Normal file
View File

@@ -0,0 +1,211 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <string>
#include "TestUtils.h"
#include "Network.h"
void buildModel(bayesnet::Network& net, const std::vector<std::string>& features, const std::string& className)
{
std::vector<pair<int, int>> network = { {0, 1}, {0, 2}, {1, 3} };
for (const auto& feature : features) {
net.addNode(feature);
}
net.addNode(className);
for (const auto& edge : network) {
net.addEdge(features.at(edge.first), features.at(edge.second));
}
for (const auto& feature : features) {
net.addEdge(className, feature);
}
}
TEST_CASE("Test Bayesian Network", "[BayesNet]")
{
auto raw = RawDatasets("iris", true);
auto net = bayesnet::Network();
double threshold = 1e-4;
SECTION("Test get features")
{
net.addNode("A");
net.addNode("B");
REQUIRE(net.getFeatures() == std::vector<std::string>{"A", "B"});
net.addNode("C");
REQUIRE(net.getFeatures() == std::vector<std::string>{"A", "B", "C"});
}
SECTION("Test get edges")
{
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("B", "C");
REQUIRE(net.getEdges() == std::vector<pair<std::string, std::string>>{ {"A", "B"}, { "B", "C" } });
REQUIRE(net.getNumEdges() == 2);
net.addEdge("A", "C");
REQUIRE(net.getEdges() == std::vector<pair<std::string, std::string>>{ {"A", "B"}, { "A", "C" }, { "B", "C" } });
REQUIRE(net.getNumEdges() == 3);
}
SECTION("Test getNodes")
{
net.addNode("A");
net.addNode("B");
auto& nodes = net.getNodes();
REQUIRE(nodes.count("A") == 1);
REQUIRE(nodes.count("B") == 1);
}
SECTION("Test fit Network")
{
auto net2 = bayesnet::Network();
auto net3 = bayesnet::Network();
net3.initialize();
net2.initialize();
net.initialize();
buildModel(net, raw.featuresv, raw.classNamev);
buildModel(net2, raw.featurest, raw.classNamet);
buildModel(net3, raw.featurest, raw.classNamet);
std::vector<pair<std::string, std::string>> edges = {
{"class", "sepallength"}, {"class", "sepalwidth"}, {"class", "petallength"},
{"class", "petalwidth" }, {"sepallength", "sepalwidth"}, {"sepallength", "petallength"},
{"sepalwidth", "petalwidth"}
};
REQUIRE(net.getEdges() == edges);
REQUIRE(net2.getEdges() == edges);
REQUIRE(net3.getEdges() == edges);
std::vector<std::string> features = { "sepallength", "sepalwidth", "petallength", "petalwidth", "class" };
REQUIRE(net.getFeatures() == features);
REQUIRE(net2.getFeatures() == features);
REQUIRE(net3.getFeatures() == features);
auto& nodes = net.getNodes();
auto& nodes2 = net2.getNodes();
auto& nodes3 = net3.getNodes();
// Check Nodes parents & children
for (const auto& feature : features) {
// Parents
std::vector<std::string> parents, parents2, parents3, children, children2, children3;
auto nodeParents = nodes[feature]->getParents();
auto nodeParents2 = nodes2[feature]->getParents();
auto nodeParents3 = nodes3[feature]->getParents();
transform(nodeParents.begin(), nodeParents.end(), back_inserter(parents), [](const auto& p) { return p->getName(); });
transform(nodeParents2.begin(), nodeParents2.end(), back_inserter(parents2), [](const auto& p) { return p->getName(); });
transform(nodeParents3.begin(), nodeParents3.end(), back_inserter(parents3), [](const auto& p) { return p->getName(); });
REQUIRE(parents == parents2);
REQUIRE(parents == parents3);
// Children
auto nodeChildren = nodes[feature]->getChildren();
auto nodeChildren2 = nodes2[feature]->getChildren();
auto nodeChildren3 = nodes2[feature]->getChildren();
transform(nodeChildren.begin(), nodeChildren.end(), back_inserter(children), [](const auto& p) { return p->getName(); });
transform(nodeChildren2.begin(), nodeChildren2.end(), back_inserter(children2), [](const auto& p) { return p->getName(); });
transform(nodeChildren3.begin(), nodeChildren3.end(), back_inserter(children3), [](const auto& p) { return p->getName(); });
REQUIRE(children == children2);
REQUIRE(children == children3);
}
// Fit networks
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
net2.fit(raw.dataset, raw.weights, raw.featurest, raw.classNamet, raw.statest);
net3.fit(raw.Xt, raw.yt, raw.weights, raw.featurest, raw.classNamet, raw.statest);
REQUIRE(net.getStates() == net2.getStates());
REQUIRE(net.getStates() == net3.getStates());
// Check Conditional Probabilities tables
for (int i = 0; i < features.size(); ++i) {
auto feature = features.at(i);
for (const auto& feature : features) {
auto cpt = nodes[feature]->getCPT();
auto cpt2 = nodes2[feature]->getCPT();
auto cpt3 = nodes3[feature]->getCPT();
REQUIRE(cpt.equal(cpt2));
REQUIRE(cpt.equal(cpt3));
}
}
}
SECTION("Test show")
{
auto net = bayesnet::Network();
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("A", "C");
auto str = net.show();
REQUIRE(str.size() == 3);
REQUIRE(str[0] == "A -> B, C, ");
REQUIRE(str[1] == "B -> ");
REQUIRE(str[2] == "C -> ");
}
SECTION("Test topological_sort")
{
auto net = bayesnet::Network();
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("A", "C");
auto sorted = net.topological_sort();
REQUIRE(sorted.size() == 3);
REQUIRE(sorted[0] == "A");
bool result = sorted[1] == "B" && sorted[2] == "C";
REQUIRE(result);
}
SECTION("Test graph")
{
auto net = bayesnet::Network();
net.addNode("A");
net.addNode("B");
net.addNode("C");
net.addEdge("A", "B");
net.addEdge("A", "C");
auto str = net.graph("Test Graph");
REQUIRE(str.size() == 7);
REQUIRE(str[0] == "digraph BayesNet {\nlabel=<BayesNet Test Graph>\nfontsize=30\nfontcolor=blue\nlabelloc=t\nlayout=circo\n");
REQUIRE(str[1] == "A [shape=circle] \n");
REQUIRE(str[2] == "A -> B");
REQUIRE(str[3] == "A -> C");
REQUIRE(str[4] == "B [shape=circle] \n");
REQUIRE(str[5] == "C [shape=circle] \n");
REQUIRE(str[6] == "}\n");
}
SECTION("Test predict")
{
auto net = bayesnet::Network();
buildModel(net, raw.featuresv, raw.classNamev);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
std::vector<int> y_test = { 2, 2, 0, 2, 1 };
auto y_pred = net.predict(test);
REQUIRE(y_pred == y_test);
}
SECTION("Test predict_proba")
{
auto net = bayesnet::Network();
buildModel(net, raw.featuresv, raw.classNamev);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
std::vector<std::vector<int>> test = { {1, 2, 0, 1, 1}, {0, 1, 2, 0, 1}, {0, 0, 0, 0, 1}, {2, 2, 2, 2, 1} };
std::vector<std::vector<double>> y_test = {
{0.450237, 0.0866621, 0.463101},
{0.244443, 0.0925922, 0.662964},
{0.913441, 0.0125857, 0.0739732},
{0.450237, 0.0866621, 0.463101},
{0.0135226, 0.971726, 0.0147519}
};
auto y_pred = net.predict_proba(test);
REQUIRE(y_pred.size() == 5);
REQUIRE(y_pred[0].size() == 3);
for (int i = 0; i < y_pred.size(); ++i) {
for (int j = 0; j < y_pred[i].size(); ++j) {
REQUIRE(y_pred[i][j] == Catch::Approx(y_test[i][j]).margin(threshold));
}
}
}
SECTION("Test score")
{
auto net = bayesnet::Network();
buildModel(net, raw.featuresv, raw.classNamev);
net.fit(raw.Xv, raw.yv, raw.weightsv, raw.featuresv, raw.classNamev, raw.statesv);
auto score = net.score(raw.Xv, raw.yv);
REQUIRE(score == Catch::Approx(0.97333333).margin(threshold));
}
}

105
tests/TestUtils.cc Normal file
View File

@@ -0,0 +1,105 @@
#include "TestUtils.h"
#include "config.h"
class Paths {
public:
static std::string datasets()
{
return { data_path.begin(), data_path.end() };
}
};
pair<std::vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<std::string> features)
{
std::vector<mdlp::labels_t> Xd;
map<std::string, int> maxes;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
maxes[features[i]] = *max_element(xd.begin(), xd.end()) + 1;
Xd.push_back(xd);
}
return { Xd, maxes };
}
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y)
{
std::vector<mdlp::labels_t> Xd;
auto fimdlp = mdlp::CPPFImdlp();
for (int i = 0; i < X.size(); i++) {
fimdlp.fit(X[i], y);
mdlp::labels_t& xd = fimdlp.transform(X[i]);
Xd.push_back(xd);
}
return Xd;
}
bool file_exists(const std::string& name)
{
if (FILE* file = fopen(name.c_str(), "r")) {
fclose(file);
return true;
} else {
return false;
}
}
tuple<torch::Tensor, torch::Tensor, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last, bool discretize_dataset)
{
auto handler = ArffFiles();
handler.load(Paths::datasets() + static_cast<std::string>(name) + ".arff", class_last);
// Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
std::vector<std::string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
torch::Tensor Xd;
auto states = map<std::string, std::vector<int>>();
if (discretize_dataset) {
auto Xr = discretizeDataset(X, y);
Xd = torch::zeros({ static_cast<int>(Xr.size()), static_cast<int>(Xr[0].size()) }, torch::kInt32);
for (int i = 0; i < features.size(); ++i) {
states[features[i]] = std::vector<int>(*max_element(Xr[i].begin(), Xr[i].end()) + 1);
auto item = states.at(features[i]);
iota(begin(item), end(item), 0);
Xd.index_put_({ i, "..." }, torch::tensor(Xr[i], torch::kInt32));
}
states[className] = std::vector<int>(*max_element(y.begin(), y.end()) + 1);
iota(begin(states.at(className)), end(states.at(className)), 0);
} else {
Xd = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kFloat32);
for (int i = 0; i < features.size(); ++i) {
Xd.index_put_({ i, "..." }, torch::tensor(X[i]));
}
}
return { Xd, torch::tensor(y, torch::kInt32), features, className, states };
}
tuple<std::vector<std::vector<int>>, std::vector<int>, std::vector<std::string>, std::string, map<std::string, std::vector<int>>> loadFile(const std::string& name)
{
auto handler = ArffFiles();
handler.load(Paths::datasets() + static_cast<std::string>(name) + ".arff");
// Get Dataset X, y
std::vector<mdlp::samples_t>& X = handler.getX();
mdlp::labels_t& y = handler.getY();
// Get className & Features
auto className = handler.getClassName();
std::vector<std::string> features;
auto attributes = handler.getAttributes();
transform(attributes.begin(), attributes.end(), back_inserter(features), [](const auto& pair) { return pair.first; });
// Discretize Dataset
std::vector<mdlp::labels_t> Xd;
map<std::string, int> maxes;
tie(Xd, maxes) = discretize(X, y, features);
maxes[className] = *max_element(y.begin(), y.end()) + 1;
map<std::string, std::vector<int>> states;
for (auto feature : features) {
states[feature] = std::vector<int>(maxes[feature]);
}
states[className] = std::vector<int>(maxes[className]);
return { Xd, y, features, className, states };
}

43
tests/TestUtils.h Normal file
View File

@@ -0,0 +1,43 @@
#ifndef TEST_UTILS_H
#define TEST_UTILS_H
#include <torch/torch.h>
#include <string>
#include <vector>
#include <map>
#include <tuple>
#include "ArffFiles.h"
#include "CPPFImdlp.h"
bool file_exists(const std::string& name);
std::pair<vector<mdlp::labels_t>, map<std::string, int>> discretize(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y, std::vector<string> features);
std::vector<mdlp::labels_t> discretizeDataset(std::vector<mdlp::samples_t>& X, mdlp::labels_t& y);
std::tuple<vector<vector<int>>, std::vector<int>, std::vector<string>, std::string, map<std::string, std::vector<int>>> loadFile(const std::string& name);
std::tuple<torch::Tensor, torch::Tensor, std::vector<string>, std::string, map<std::string, std::vector<int>>> loadDataset(const std::string& name, bool class_last, bool discretize_dataset);
class RawDatasets {
public:
RawDatasets(const std::string& file_name, bool discretize)
{
// Xt can be either discretized or not
tie(Xt, yt, featurest, classNamet, statest) = loadDataset(file_name, true, discretize);
// Xv is always discretized
tie(Xv, yv, featuresv, classNamev, statesv) = loadFile(file_name);
auto yresized = torch::transpose(yt.view({ yt.size(0), 1 }), 0, 1);
dataset = torch::cat({ Xt, yresized }, 0);
nSamples = dataset.size(1);
weights = torch::full({ nSamples }, 1.0 / nSamples, torch::kDouble);
weightsv = std::vector<double>(nSamples, 1.0 / nSamples);
classNumStates = discretize ? statest.at(classNamet).size() : 0;
}
torch::Tensor Xt, yt, dataset, weights;
std::vector<vector<int>> Xv;
std::vector<double> weightsv;
std::vector<int> yv;
std::vector<string> featurest, featuresv;
map<std::string, std::vector<int>> statest, statesv;
std::string classNamet, classNamev;
int nSamples, classNumStates;
double epsilon = 1e-5;
};
#endif //TEST_UTILS_H

Some files were not shown because too many files have changed in this diff Show More